diff --git a/docs/data-sources/inference_model.md b/docs/data-sources/inference_model.md new file mode 100644 index 0000000000..d9f9049a23 --- /dev/null +++ b/docs/data-sources/inference_model.md @@ -0,0 +1,45 @@ +--- +subcategory: "inference" +page_title: "Scaleway: scaleway_inference_model" +--- + +# scaleway_inference_model + +The `scaleway_inference_model` data source allows you to retrieve information about an inference model available in the Scaleway Inference API, either by providing the model's `name` or its `model_id`. + +## Example Usage + +### Basic + +```hcl +data "scaleway_inference_model" "my_model" { + name = "meta/llama-3.1-8b-instruct:fp8" +} +``` + +## Argument Reference + +You must provide either name or model_id, but not both. + +- `name` (Optional, Conflicts with model_id) The fully qualified name of the model to look up (e.g., "meta/llama-3.1-8b-instruct:fp8"). The provider will search for a model with an exact name match in the selected region and project. +- `model_id` (Optional, Conflicts with name) The ID of the model to retrieve. Must be a valid UUID with locality (i.e., Scaleway's zoned UUID format). +- `project_id` (Optional) The project ID to use when listing models. If not provided, the provider default project is used. +- `region` (Optional) The region where the model is hosted. If not set, the provider default region is used. + +## Attributes Reference + +In addition to the input arguments above, the following attributes are exported: + +- `id` - The unique identifier of the model. +- `tags` - Tags associated with the model. +- `status` - The current status of the model (e.g., ready, error, etc.). +- `description` - A textual description of the model (if available). +- `has_eula` - Whether the model requires end-user license agreement acceptance before use. +- `parameter_size_bits` - Size, in bits, of the model parameters. +- `size_bytes` - Total size, in bytes, of the model archive. +- `nodes_support` - List of supported node types and their quantization options. Each entry contains: + - `node_type_name` - The type of node supported. + - `quantization` - A list of supported quantization options, including: + - `quantization_bits` - Number of bits used for quantization (e.g., 8, 16). + - `allowed` - Whether this quantization is allowed. + - `max_context_size` - Maximum context length supported by this quantization. \ No newline at end of file diff --git a/docs/resources/inference_deployment.md b/docs/resources/inference_deployment.md index b462f1c366..ed46868d97 100644 --- a/docs/resources/inference_deployment.md +++ b/docs/resources/inference_deployment.md @@ -13,10 +13,14 @@ For more information, see the [API documentation](https://www.scaleway.com/en/de ### Basic ```terraform +data "scaleway_inference_model" "my_model" { + name = "meta/llama-3.1-8b-instruct:fp8" +} + resource "scaleway_inference_deployment" "deployment" { name = "tf-inference-deployment" node_type = "L4" - model_name = "meta/llama-3.1-8b-instruct:fp8" + model_name = data.scaleway_inference_model.my_model.id public_endpoint { is_enabled = true } @@ -26,7 +30,7 @@ resource "scaleway_inference_deployment" "deployment" { ## Argument Reference -- `model_name` - (Required) The model name to use for the deployment. Model names can be found in Console or using Scaleway's CLI (`scw inference model list`) +- `model_id` - (Required) The model id used for the deployment. - `node_type` - (Required) The node type to use for the deployment. Node types can be found using Scaleway's CLI (`scw inference node-type list`) - `name` - (Optional) The deployment name. - `accept_eula` - (Optional) Some models (e.g Meta Llama) require end-user license agreements. Set `true` to accept. @@ -48,7 +52,7 @@ resource "scaleway_inference_deployment" "deployment" { In addition to all arguments above, the following attributes are exported: - `id` - The ID of the deployment. -- `model_id` - The model id used for the deployment. +- `model_name` - The model name used for the deployment. Model names can be found in Console or using Scaleway's CLI (`scw inference model list`) - `size` - The size of the pool. - `status` - The status of the deployment. - `created_at` - The date and time of the creation of the deployment. diff --git a/docs/resources/inference_model.md b/docs/resources/inference_model.md new file mode 100644 index 0000000000..e8e6b65451 --- /dev/null +++ b/docs/resources/inference_model.md @@ -0,0 +1,76 @@ +--- +subcategory: "Inference" +page_title: "Scaleway: scaleway_inference_model" +--- + +# Resource: scaleway_inference_model + +The scaleway_inference_model resource allows you to upload and manage inference models in the Scaleway Inference ecosystem. Once registered, a model can be used in any scaleway_inference_deployment resource. + +## Example Usage + +### Basic + +```terraform +resource "scaleway_inference_model" "test" { + name = "my-awesome-model" + url = "https://huggingface.co/agentica-org/DeepCoder-14B-Preview" + secret = "my-secret-token" +} +``` + +### Deploy your own model on your managed inference + +```terraform +resource "scaleway_inference_model" "my_model" { + name = "my-awesome-model" + url = "https://huggingface.co/agentica-org/DeepCoder-14B-Preview" + secret = "my-secret-token" +} + +resource "scaleway_inference_deployment" "my_deployment" { + name = "test-inference-deployment-basic" + node_type = "H100" # replace with your node type + model_id = scaleway_inference_model.my_model.id + + public_endpoint { + is_enabled = true + } + + accept_eula = true +} +``` + +## Argument Reference + +- `name` - (Required) The name of the model. This must be unique within the project. +- `url` - (Required) The HTTPS source URL from which the model will be downloaded. This is typically a Hugging Face repository URL (e.g., https://huggingface.co/agentica-org/DeepCoder-14B-Preview). The URL must be publicly accessible or require valid credentials via `secret` +- `secret` - (Optional, Sensitive) Authentication token used to pull the model from a private or gated URL (e.g., a Hugging Face access token with read permission). +- `region` - (Defaults to [provider](../index.md#region) `region`) The [region](../guides/regions_and_zones.md#regions) in which the deployment is created. +- `project_id` - (Defaults to [provider](../index.md#project_id) `project_id`) The ID of the project the deployment is associated with. + +## Attributes Reference + +In addition to all arguments above, the following attributes are exported: + +- `id` - The unique identifier of the model. +- `tags` - Tags associated with the model. +- `status` - The current status of the model (e.g., ready, error, etc.). +- `description` - A textual description of the model (if available). +- `has_eula` - Whether the model requires end-user license agreement acceptance before use. +- `parameter_size_bits` - Size, in bits, of the model parameters. +- `size_bytes` - Total size, in bytes, of the model archive. +- `nodes_support` - List of supported node types and their quantization options. Each entry contains: + - `node_type_name` - The type of node supported. + - `quantization` - A list of supported quantization options, including: + - `quantization_bits` - Number of bits used for quantization (e.g., 8, 16). + - `allowed` - Whether this quantization is allowed. + - `max_context_size` - Maximum context length supported by this quantization. + +## Import + +Models can be imported using, `{region}/{id}`, as shown below: + +```bash +terraform import scaleway_inference_model.my_model fr-par/11111111-1111-1111-1111-111111111111 +``` \ No newline at end of file diff --git a/internal/provider/provider.go b/internal/provider/provider.go index 1ad1d29604..764a2462de 100644 --- a/internal/provider/provider.go +++ b/internal/provider/provider.go @@ -167,6 +167,7 @@ func Provider(config *Config) plugin.ProviderFunc { "scaleway_iam_ssh_key": iam.ResourceSSKKey(), "scaleway_iam_user": iam.ResourceUser(), "scaleway_inference_deployment": inference.ResourceDeployment(), + "scaleway_inference_model": inference.ResourceModel(), "scaleway_instance_image": instance.ResourceImage(), "scaleway_instance_ip": instance.ResourceIP(), "scaleway_instance_ip_reverse_dns": instance.ResourceIPReverseDNS(), @@ -273,6 +274,7 @@ func Provider(config *Config) plugin.ProviderFunc { "scaleway_iam_ssh_key": iam.DataSourceSSHKey(), "scaleway_iam_user": iam.DataSourceUser(), "scaleway_iam_api_key": iam.DataSourceAPIKey(), + "scaleway_inference_model": inference.DataSourceModel(), "scaleway_instance_image": instance.DataSourceImage(), "scaleway_instance_ip": instance.DataSourceIP(), "scaleway_instance_placement_group": instance.DataSourcePlacementGroup(), diff --git a/internal/services/inference/deployment.go b/internal/services/inference/deployment.go index fdfa390c77..2413722ed3 100644 --- a/internal/services/inference/deployment.go +++ b/internal/services/inference/deployment.go @@ -6,9 +6,11 @@ import ( "github.com/hashicorp/terraform-plugin-sdk/v2/diag" "github.com/hashicorp/terraform-plugin-sdk/v2/helper/schema" "github.com/hashicorp/terraform-plugin-sdk/v2/helper/validation" - inference "github.com/scaleway/scaleway-sdk-go/api/inference/v1beta1" + "github.com/scaleway/scaleway-sdk-go/api/inference/v1" "github.com/scaleway/scaleway-sdk-go/scw" + "github.com/scaleway/terraform-provider-scaleway/v2/internal/dsf" "github.com/scaleway/terraform-provider-scaleway/v2/internal/httperrors" + "github.com/scaleway/terraform-provider-scaleway/v2/internal/locality" "github.com/scaleway/terraform-provider-scaleway/v2/internal/locality/regional" "github.com/scaleway/terraform-provider-scaleway/v2/internal/services/account" "github.com/scaleway/terraform-provider-scaleway/v2/internal/types" @@ -43,17 +45,20 @@ func ResourceDeployment() *schema.Resource { "node_type": { Type: schema.TypeString, Required: true, + ForceNew: true, Description: "The node type to use for the deployment", }, "model_name": { Type: schema.TypeString, - Required: true, + Computed: true, Description: "The model name to use for the deployment", }, "model_id": { - Type: schema.TypeString, - Computed: true, - Description: "The model id used for the deployment", + Type: schema.TypeString, + Required: true, + Description: "The model id used for the deployment", + ForceNew: true, + DiffSuppressFunc: dsf.Locality, }, "accept_eula": { Type: schema.TypeBool, @@ -70,16 +75,21 @@ func ResourceDeployment() *schema.Resource { "min_size": { Type: schema.TypeInt, Optional: true, - Computed: true, Description: "The minimum size of the pool", ValidateFunc: validation.IntAtLeast(1), + Default: 1, }, "max_size": { Type: schema.TypeInt, Optional: true, - Computed: true, Description: "The maximum size of the pool", ValidateFunc: validation.IntAtLeast(1), + Default: 1, + }, + "quantization": { + Type: schema.TypeInt, + Optional: true, + Description: "The number of bits each model parameter should be quantized to", }, "size": { Type: schema.TypeInt, @@ -178,13 +188,13 @@ func ResourceDeploymentCreate(ctx context.Context, d *schema.ResourceData, m int } req := &inference.CreateDeploymentRequest{ - Region: region, - ProjectID: d.Get("project_id").(string), - Name: d.Get("name").(string), - NodeType: d.Get("node_type").(string), - ModelName: d.Get("model_name").(string), - Tags: types.ExpandStrings(d.Get("tags")), - Endpoints: buildEndpoints(d), + Region: region, + ProjectID: d.Get("project_id").(string), + Name: d.Get("name").(string), + NodeTypeName: d.Get("node_type").(string), + ModelID: locality.ExpandID(d.Get("model_id").(string)), + Tags: types.ExpandStrings(d.Get("tags")), + Endpoints: buildEndpoints(d), } if isAcceptingEula, ok := d.GetOk("accept_eula"); ok { @@ -199,6 +209,12 @@ func ResourceDeploymentCreate(ctx context.Context, d *schema.ResourceData, m int req.MaxSize = scw.Uint32Ptr(uint32(maxSize.(int))) } + if quantization, ok := d.GetOk("quantization"); ok { + req.Quantization = &inference.DeploymentQuantization{ + Bits: uint32(quantization.(int)), + } + } + deployment, err := api.CreateDeployment(req, scw.WithContext(ctx)) if err != nil { return diag.FromErr(err) @@ -221,8 +237,8 @@ func buildEndpoints(d *schema.ResourceData) []*inference.EndpointSpec { publicEndpointMap := publicEndpoint.([]interface{})[0].(map[string]interface{}) if publicEndpointMap["is_enabled"].(bool) { publicEp := inference.EndpointSpec{ - Public: &inference.EndpointSpecPublic{}, - DisableAuth: publicEndpointMap["disable_auth"].(bool), + PublicNetwork: &inference.EndpointPublicNetworkDetails{}, + DisableAuth: publicEndpointMap["disable_auth"].(bool), } endpoints = append(endpoints, &publicEp) } @@ -232,7 +248,7 @@ func buildEndpoints(d *schema.ResourceData) []*inference.EndpointSpec { privateEndpointMap := privateEndpoint.([]interface{})[0].(map[string]interface{}) if privateID, exists := privateEndpointMap["private_network_id"]; exists { privateEp := inference.EndpointSpec{ - PrivateNetwork: &inference.EndpointSpecPrivateNetwork{ + PrivateNetwork: &inference.EndpointPrivateNetworkDetails{ PrivateNetworkID: regional.ExpandID(privateID.(string)).ID, }, DisableAuth: privateEndpointMap["disable_auth"].(bool), @@ -264,7 +280,7 @@ func ResourceDeploymentRead(ctx context.Context, d *schema.ResourceData, m inter _ = d.Set("name", deployment.Name) _ = d.Set("region", deployment.Region) _ = d.Set("project_id", deployment.ProjectID) - _ = d.Set("node_type", deployment.NodeType) + _ = d.Set("node_type", deployment.NodeTypeName) _ = d.Set("model_name", deployment.ModelName) _ = d.Set("min_size", int(deployment.MinSize)) _ = d.Set("max_size", int(deployment.MaxSize)) @@ -290,7 +306,7 @@ func ResourceDeploymentRead(ctx context.Context, d *schema.ResourceData, m inter privateEndpoints = append(privateEndpoints, privateEndpointSpec) } - if endpoint.PublicAccess != nil { + if endpoint.PublicNetwork != nil { publicEndpointSpec := map[string]interface{}{ "id": endpoint.ID, "is_enabled": true, diff --git a/internal/services/inference/deployment_test.go b/internal/services/inference/deployment_test.go index 5d9a40b180..00aba49ba8 100644 --- a/internal/services/inference/deployment_test.go +++ b/internal/services/inference/deployment_test.go @@ -6,12 +6,17 @@ import ( "github.com/hashicorp/terraform-plugin-sdk/v2/helper/resource" "github.com/hashicorp/terraform-plugin-sdk/v2/terraform" - inferenceSDK "github.com/scaleway/scaleway-sdk-go/api/inference/v1beta1" + inferenceSDK "github.com/scaleway/scaleway-sdk-go/api/inference/v1" "github.com/scaleway/terraform-provider-scaleway/v2/internal/acctest" "github.com/scaleway/terraform-provider-scaleway/v2/internal/services/inference" inferencetestfuncs "github.com/scaleway/terraform-provider-scaleway/v2/internal/services/inference/testfuncs" ) +const ( + modelNameMeta = "meta/llama-3.1-8b-instruct:bf16" + nodeType = "L4" +) + func TestAccDeployment_Basic(t *testing.T) { tt := acctest.NewTestTools(t) defer tt.Cleanup() @@ -22,17 +27,22 @@ func TestAccDeployment_Basic(t *testing.T) { CheckDestroy: inferencetestfuncs.IsDeploymentDestroyed(tt), Steps: []resource.TestStep{ { - Config: ` + Config: fmt.Sprintf(` + data "scaleway_inference_model" "my-model" { + name = "%s" + } + resource "scaleway_inference_deployment" "main" { name = "test-inference-deployment-basic" - node_type = "L4" - model_name = "meta/llama-3.1-8b-instruct:fp8" + node_type = "%s" + model_id = data.scaleway_inference_model.my-model.id public_endpoint { is_enabled = true } accept_eula = true } - `, + + `, modelNameMeta, nodeType), Check: resource.ComposeTestCheckFunc( testAccCheckDeploymentExists(tt, "scaleway_inference_deployment.main"), resource.TestCheckResourceAttr("scaleway_inference_deployment.main", "name", "test-inference-deployment-basic"), @@ -52,20 +62,25 @@ func TestAccDeployment_Endpoint(t *testing.T) { CheckDestroy: inferencetestfuncs.IsDeploymentDestroyed(tt), Steps: []resource.TestStep{ { - Config: ` + Config: fmt.Sprintf(` + data "scaleway_inference_model" "my-model" { + name = "%s" + } + resource "scaleway_vpc_private_network" "pn01" { name = "private-network-test-inference" } + resource "scaleway_inference_deployment" "main" { name = "test-inference-deployment-endpoint-private" - node_type = "L4" - model_name = "meta/llama-3.1-8b-instruct:fp8" + node_type = "%s" + model_id = data.scaleway_inference_model.my-model.id private_endpoint { private_network_id = "${scaleway_vpc_private_network.pn01.id}" } accept_eula = true } - `, + `, modelNameMeta, nodeType), Check: resource.ComposeTestCheckFunc( testAccCheckDeploymentExists(tt, "scaleway_inference_deployment.main"), resource.TestCheckResourceAttr("scaleway_inference_deployment.main", "name", "test-inference-deployment-endpoint-private"), @@ -74,14 +89,19 @@ func TestAccDeployment_Endpoint(t *testing.T) { ), }, { - Config: ` + Config: fmt.Sprintf(` + data "scaleway_inference_model" "my-model" { + name = "%s" + } + resource "scaleway_vpc_private_network" "pn01" { name = "private-network-test-inference-public" } + resource "scaleway_inference_deployment" "main" { name = "test-inference-deployment-basic-endpoints-private-public" - node_type = "L4" - model_name = "meta/llama-3.1-8b-instruct:fp8" + node_type = "%s" + model_id = data.scaleway_inference_model.my-model.id private_endpoint { private_network_id = "${scaleway_vpc_private_network.pn01.id}" } @@ -90,7 +110,7 @@ func TestAccDeployment_Endpoint(t *testing.T) { } accept_eula = true } - `, + `, modelNameMeta, nodeType), Check: resource.ComposeTestCheckFunc( testAccCheckDeploymentExists(tt, "scaleway_inference_deployment.main"), resource.TestCheckResourceAttr("scaleway_inference_deployment.main", "name", "test-inference-deployment-basic-endpoints-private-public"), diff --git a/internal/services/inference/helpers_inference.go b/internal/services/inference/helpers_inference.go index f5201363b5..3db5b11f86 100644 --- a/internal/services/inference/helpers_inference.go +++ b/internal/services/inference/helpers_inference.go @@ -4,7 +4,7 @@ import ( "time" "github.com/hashicorp/terraform-plugin-sdk/v2/helper/schema" - inference "github.com/scaleway/scaleway-sdk-go/api/inference/v1beta1" + "github.com/scaleway/scaleway-sdk-go/api/inference/v1" "github.com/scaleway/scaleway-sdk-go/scw" "github.com/scaleway/terraform-provider-scaleway/v2/internal/locality/regional" "github.com/scaleway/terraform-provider-scaleway/v2/internal/meta" @@ -13,6 +13,8 @@ import ( const ( defaultInferenceDeploymentTimeout = 80 * time.Minute defaultDeploymentRetryInterval = 1 * time.Minute + defaultModelTimeout = 180 * time.Minute + defaultModelRetryInterval = 1 * time.Minute ) // NewAPIWithRegion returns a new inference API and the region for a Create request diff --git a/internal/services/inference/model.go b/internal/services/inference/model.go new file mode 100644 index 0000000000..a0371d660c --- /dev/null +++ b/internal/services/inference/model.go @@ -0,0 +1,232 @@ +package inference + +import ( + "context" + "fmt" + + "github.com/hashicorp/terraform-plugin-sdk/v2/diag" + "github.com/hashicorp/terraform-plugin-sdk/v2/helper/schema" + "github.com/scaleway/scaleway-sdk-go/api/inference/v1" + "github.com/scaleway/scaleway-sdk-go/scw" + "github.com/scaleway/terraform-provider-scaleway/v2/internal/httperrors" + "github.com/scaleway/terraform-provider-scaleway/v2/internal/locality/regional" + "github.com/scaleway/terraform-provider-scaleway/v2/internal/services/account" + "github.com/scaleway/terraform-provider-scaleway/v2/internal/types" +) + +func ResourceModel() *schema.Resource { + return &schema.Resource{ + CreateContext: ResourceModelCreate, + ReadContext: ResourceModelRead, + DeleteContext: ResourceModelDelete, + Importer: &schema.ResourceImporter{ + StateContext: schema.ImportStatePassthroughContext, + }, + Timeouts: &schema.ResourceTimeout{ + Default: schema.DefaultTimeout(defaultModelTimeout), + Create: schema.DefaultTimeout(defaultModelTimeout), + Update: schema.DefaultTimeout(defaultModelTimeout), + Delete: schema.DefaultTimeout(defaultModelTimeout), + }, + SchemaVersion: 0, + Schema: map[string]*schema.Schema{ + "name": { + Type: schema.TypeString, + Required: true, + ForceNew: true, + Description: "The name of the model", + }, + "url": { + Type: schema.TypeString, + Required: true, + ForceNew: true, + Description: "The HTTPS URL to the model archive or repository. Typically, this is a Hugging Face repository URL (e.g., " + + "`https://huggingface.co/your-org/your-model`). The URL must be publicly accessible or require a valid secret for authentication.", + }, + "secret": { + Type: schema.TypeString, + Optional: true, + Sensitive: true, + ForceNew: true, + Description: "A token or credential used to authenticate when pulling the model from a private or gated source. For example, a Hugging Face access token with read permissions.", + }, + "tags": { + Type: schema.TypeList, + Elem: &schema.Schema{Type: schema.TypeString}, + Computed: true, + Description: "The tags associated with the deployment", + }, + "project_id": account.ProjectIDSchema(), + "status": { + Type: schema.TypeString, + Computed: true, + Description: "The status of the model", + }, + "description": { + Type: schema.TypeString, + Computed: true, + Description: "The description of the model", + }, + "created_at": { + Type: schema.TypeString, + Computed: true, + Description: "The date and time of the creation of the model", + }, + "updated_at": { + Type: schema.TypeString, + Computed: true, + Description: "The date and time of the last update of the model", + }, + "has_eula": { + Type: schema.TypeBool, + Computed: true, + Description: "Defines whether the model has an end user license agreement", + }, + "nodes_support": { + Type: schema.TypeList, + Computed: true, + Description: "Supported node types with quantization options and context lengths.", + Elem: &schema.Resource{ + Schema: map[string]*schema.Schema{ + "node_type_name": { + Type: schema.TypeString, + Computed: true, + Description: "Supported node type.", + }, + "quantization": { + Type: schema.TypeList, + Computed: true, + Description: "Supported quantization options.", + Elem: &schema.Resource{ + Schema: map[string]*schema.Schema{ + "quantization_bits": { + Type: schema.TypeInt, + Computed: true, + Description: "Number of bits used for quantization.", + }, + "allowed": { + Type: schema.TypeBool, + Computed: true, + Description: "Whether this quantization is allowed for the model.", + }, + "max_context_size": { + Type: schema.TypeInt, + Computed: true, + Description: "Maximum inference context size for this quantization and node type.", + }, + }, + }, + }, + }, + }, + }, + "parameter_size_bits": { + Type: schema.TypeInt, + Computed: true, + Description: "Size, in bits, of the model parameters", + }, + "size_bytes": { + Type: schema.TypeInt, + Computed: true, + Description: "Total size, in bytes, of the model files", + }, + "region": regional.Schema(), + }, + } +} + +func ResourceModelCreate(ctx context.Context, d *schema.ResourceData, m interface{}) diag.Diagnostics { + api, region, err := NewAPIWithRegion(d, m) + if err != nil { + return diag.FromErr(err) + } + + modelSource := &inference.ModelSource{ + URL: d.Get("url").(string), + } + + if secret, ok := d.GetOk("secret"); ok { + modelSource.Secret = types.ExpandStringPtr(secret) + } + + reqCreateModel := &inference.CreateModelRequest{ + Region: region, + Name: d.Get("name").(string), + ProjectID: d.Get("project_id").(string), + Source: modelSource, + } + + model, err := api.CreateModel(reqCreateModel) + if err != nil { + return diag.FromErr(err) + } + + d.SetId(regional.NewIDString(region, model.ID)) + + model, err = waitForModel(ctx, api, region, model.ID, d.Timeout(schema.TimeoutCreate)) + if err != nil { + return diag.FromErr(err) + } + + if model.Status == inference.ModelStatusError { + errMsg := *model.ErrorMessage + + return diag.FromErr(fmt.Errorf("model '%s' is in status '%s'", model.ID, errMsg)) + } + + return ResourceModelRead(ctx, d, m) +} + +func ResourceModelRead(ctx context.Context, d *schema.ResourceData, m interface{}) diag.Diagnostics { + api, region, id, err := NewAPIWithRegionAndID(m, d.Id()) + if err != nil { + return diag.FromErr(err) + } + + model, err := waitForModel(ctx, api, region, id, d.Timeout(schema.TimeoutRead)) + if err != nil { + if httperrors.Is404(err) { + d.SetId("") + + return nil + } + + return diag.FromErr(err) + } + + _ = d.Set("parameter_size_bits", int32(model.ParameterSizeBits)) + _ = d.Set("size_bytes", int64(model.SizeBytes)) + _ = d.Set("name", model.Name) + _ = d.Set("status", model.Status.String()) + _ = d.Set("description", model.Description) + _ = d.Set("tags", model.Tags) + _ = d.Set("created_at", types.FlattenTime(model.CreatedAt)) + _ = d.Set("updated_at", types.FlattenTime(model.UpdatedAt)) + _ = d.Set("has_eula", model.HasEula) + _ = d.Set("nodes_support", flattenNodeSupport(model.NodesSupport)) + + return nil +} + +func ResourceModelDelete(ctx context.Context, d *schema.ResourceData, m interface{}) diag.Diagnostics { + api, region, id, err := NewAPIWithRegionAndID(m, d.Id()) + if err != nil { + return diag.FromErr(err) + } + + _, err = waitForModel(ctx, api, region, id, d.Timeout(schema.TimeoutDelete)) + if err != nil { + return diag.FromErr(err) + } + + err = api.DeleteModel(&inference.DeleteModelRequest{ + Region: region, + ModelID: id, + }, scw.WithContext(ctx)) + + if err != nil && !httperrors.Is404(err) { + return diag.FromErr(err) + } + + return nil +} diff --git a/internal/services/inference/model_data_source.go b/internal/services/inference/model_data_source.go new file mode 100644 index 0000000000..f77e647f50 --- /dev/null +++ b/internal/services/inference/model_data_source.go @@ -0,0 +1,88 @@ +package inference + +import ( + "context" + "errors" + + "github.com/hashicorp/terraform-plugin-sdk/v2/diag" + "github.com/hashicorp/terraform-plugin-sdk/v2/helper/schema" + "github.com/scaleway/scaleway-sdk-go/api/inference/v1" + "github.com/scaleway/scaleway-sdk-go/scw" + "github.com/scaleway/terraform-provider-scaleway/v2/internal/datasource" + "github.com/scaleway/terraform-provider-scaleway/v2/internal/types" + "github.com/scaleway/terraform-provider-scaleway/v2/internal/verify" +) + +func DataSourceModel() *schema.Resource { + dsSchema := datasource.SchemaFromResourceSchema(ResourceModel().Schema) + + datasource.AddOptionalFieldsToSchema(dsSchema, "url", "name") + dsSchema["name"].ConflictsWith = []string{"model_id"} + dsSchema["model_id"] = &schema.Schema{ + Type: schema.TypeString, + Optional: true, + Description: "The ID of the model", + ValidateDiagFunc: verify.IsUUIDWithLocality(), + ConflictsWith: []string{"name"}, + } + + return &schema.Resource{ + ReadContext: DataSourceModelRead, + Schema: dsSchema, + } +} + +func DataSourceModelRead(ctx context.Context, d *schema.ResourceData, m interface{}) diag.Diagnostics { + api, region, err := NewAPIWithRegion(d, m) + if err != nil { + return diag.FromErr(err) + } + + modelID, ok := d.GetOk("model_id") + pageSize := uint32(1000) + + if !ok { + modelName := d.Get("name").(string) + + modelList, err := api.ListModels(&inference.ListModelsRequest{ + Region: region, + ProjectID: types.ExpandStringPtr(d.Get("project_id")), + PageSize: &pageSize, + }, scw.WithContext(ctx)) + if err != nil { + return diag.FromErr(err) + } + + foundModel, err := datasource.FindExact( + modelList.Models, + func(model *inference.Model) bool { + return model.Name == modelName + }, + modelName, + ) + if err != nil { + return diag.FromErr(err) + } + + modelID = foundModel.ID + } + + regionalID := datasource.NewRegionalID(modelID, region) + d.SetId(regionalID) + + err = d.Set("model_id", regionalID) + if err != nil { + return diag.FromErr(err) + } + + diags := ResourceModelRead(ctx, d, m) + if diags != nil { + return diags + } + + if d.Id() == "" { + return diag.FromErr(errors.New("model_id is empty")) + } + + return nil +} diff --git a/internal/services/inference/model_data_source_test.go b/internal/services/inference/model_data_source_test.go new file mode 100644 index 0000000000..4f908f981d --- /dev/null +++ b/internal/services/inference/model_data_source_test.go @@ -0,0 +1,78 @@ +package inference_test + +import ( + "fmt" + "testing" + + "github.com/hashicorp/terraform-plugin-sdk/v2/helper/resource" + "github.com/scaleway/terraform-provider-scaleway/v2/internal/acctest" + inferencetestfuncs "github.com/scaleway/terraform-provider-scaleway/v2/internal/services/inference/testfuncs" +) + +func TestAccDataSourceModel_Basic(t *testing.T) { + tt := acctest.NewTestTools(t) + defer tt.Cleanup() + + modelName := "mistral/pixtral-12b-2409:bf16" + resource.ParallelTest(t, resource.TestCase{ + PreCheck: func() { acctest.PreCheck(t) }, + ProviderFactories: tt.ProviderFactories, + Steps: []resource.TestStep{ + { + Config: fmt.Sprintf(` + data "scaleway_inference_model" "my-model" { + name = "%s" + } + +`, modelName), + Check: resource.ComposeTestCheckFunc( + testAccCheckModelExists(tt, "data.scaleway_inference_model.my-model"), + resource.TestCheckResourceAttr("data.scaleway_inference_model.my-model", "name", modelName), + ), + }, + }, + }) +} + +func TestAccDataSourceModel_Custom(t *testing.T) { + tt := acctest.NewTestTools(t) + defer tt.Cleanup() + + modelName := "TestAccDataSourceModel_Custom" + + resource.ParallelTest(t, resource.TestCase{ + PreCheck: func() { acctest.PreCheck(t) }, + ProviderFactories: tt.ProviderFactories, + CheckDestroy: inferencetestfuncs.IsModelDestroyed(tt), + Steps: []resource.TestStep{ + { + Config: fmt.Sprintf(` + resource "scaleway_inference_model" "test" { + name = "%s" + url = "%s" + } + + `, modelName, modelURLCompatible), + Check: resource.ComposeTestCheckFunc( + testAccCheckModelExists(tt, "scaleway_inference_model.test"), + resource.TestCheckResourceAttr("scaleway_inference_model.test", "name", modelName), + ), + }, + { + Config: fmt.Sprintf(` + resource "scaleway_inference_model" "test" { + name = "%s" + url = "%s" + } + + data "scaleway_inference_model" "my-model" { + name = "%s" + }`, modelName, modelURLCompatible, modelName), + Check: resource.ComposeTestCheckFunc( + testAccCheckModelExists(tt, "data.scaleway_inference_model.my-model"), + resource.TestCheckResourceAttr("data.scaleway_inference_model.my-model", "name", modelName), + ), + }, + }, + }) +} diff --git a/internal/services/inference/model_test.go b/internal/services/inference/model_test.go new file mode 100644 index 0000000000..09f4db5fec --- /dev/null +++ b/internal/services/inference/model_test.go @@ -0,0 +1,101 @@ +package inference_test + +import ( + "fmt" + "testing" + + "github.com/hashicorp/terraform-plugin-sdk/v2/helper/resource" + "github.com/hashicorp/terraform-plugin-sdk/v2/terraform" + inferenceSDK "github.com/scaleway/scaleway-sdk-go/api/inference/v1" + "github.com/scaleway/terraform-provider-scaleway/v2/internal/acctest" + "github.com/scaleway/terraform-provider-scaleway/v2/internal/services/inference" + inferencetestfuncs "github.com/scaleway/terraform-provider-scaleway/v2/internal/services/inference/testfuncs" +) + +const ( + modelURLCompatible = "https://huggingface.co/agentica-org/DeepCoder-14B-Preview" + nodeTypeH100 = "H100" +) + +func TestAccModel_Basic(t *testing.T) { + tt := acctest.NewTestTools(t) + defer tt.Cleanup() + + modelName := "TestAccModel_Basic" + + resource.ParallelTest(t, resource.TestCase{ + PreCheck: func() { acctest.PreCheck(t) }, + ProviderFactories: tt.ProviderFactories, + CheckDestroy: inferencetestfuncs.IsModelDestroyed(tt), + Steps: []resource.TestStep{ + { + Config: fmt.Sprintf(` + resource "scaleway_inference_model" "test" { + name = "%s" + url = "%s" + }`, modelName, modelURLCompatible), + Check: resource.ComposeTestCheckFunc( + testAccCheckModelExists(tt, "scaleway_inference_model.test"), + resource.TestCheckResourceAttr("scaleway_inference_model.test", "name", modelName), + ), + }, + }, + }) +} + +func TestAccModel_DeployModelOnServer(t *testing.T) { + tt := acctest.NewTestTools(t) + defer tt.Cleanup() + + modelName := "TestAccModel_DeployModelOnServer" + + resource.ParallelTest(t, resource.TestCase{ + PreCheck: func() { acctest.PreCheck(t) }, + ProviderFactories: tt.ProviderFactories, + CheckDestroy: inferencetestfuncs.IsModelDestroyed(tt), + Steps: []resource.TestStep{ + { + Config: fmt.Sprintf(` + resource "scaleway_inference_model" "test" { + name = "%s" + url = "%s" + } + resource "scaleway_inference_deployment" "main" { + name = "%s" + node_type = "%s" + model_id = scaleway_inference_model.test.id + public_endpoint { + is_enabled = true + } + accept_eula = true + } + `, modelName, modelURLCompatible, modelName, nodeTypeH100), + Check: resource.ComposeTestCheckFunc( + testAccCheckDeploymentExists(tt, "scaleway_inference_deployment.main"), + resource.TestCheckResourceAttr("scaleway_inference_deployment.main", "model_name", modelName), + ), + }, + }, + }) +} + +func testAccCheckModelExists(tt *acctest.TestTools, n string) resource.TestCheckFunc { + return func(state *terraform.State) error { + rs, ok := state.RootModule().Resources[n] + if !ok { + return fmt.Errorf("can't find model resource name: %s", n) + } + + api, region, id, err := inference.NewAPIWithRegionAndID(tt.Meta, rs.Primary.ID) + if err != nil { + return err + } + + _, err = api.GetModel(&inferenceSDK.GetModelRequest{ + Region: region, + ModelID: id, + }) + + return err + } +} diff --git a/internal/services/inference/testdata/data-source-model-basic.cassette.yaml b/internal/services/inference/testdata/data-source-model-basic.cassette.yaml new file mode 100644 index 0000000000..026a64256c --- /dev/null +++ b/internal/services/inference/testdata/data-source-model-basic.cassette.yaml @@ -0,0 +1,542 @@ +--- +version: 2 +interactions: + - id: 0 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models?order_by=display_rank_asc&page_size=1000 + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 50297 + uncompressed: false + body: '{"models":[{"created_at":"2025-04-04T13:11:00.900800Z","description":"Multimodal model for text generation an image understanding supporting up to 128k context window.","has_eula":false,"id":"5c40e594-d40d-452a-991e-5082225155e1","name":"google/gemma-3-27b-it:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":80000,"quantization_bits":8},{"allowed":true,"max_context_size":40000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":54904369444,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-05-09T16:45:10.128397Z"},{"created_at":"2025-04-28T18:48:01.860457Z","description":"","has_eula":false,"id":"a19296a6-4cef-447a-99bc-8f6c3ee30df4","name":"TestAccCustomModel_Basic","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725346,"status":"ready","tags":["custom"],"updated_at":null},{"created_at":"2025-04-30T13:29:24.004776Z","description":"","has_eula":false,"id":"eabb7f74-24a1-4173-911b-26924c1be619","name":"TestAccCustomModel_DeployModelOnServer","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725346,"status":"ready","tags":["custom"],"updated_at":null},{"created_at":"2025-03-27T16:48:11.513249Z","description":"Efficient 8B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"a51ce791-9546-4c28-aa44-24850d84778b","name":"deepseek/deepseek-r1-distill-llama-8b:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":90000,"quantization_bits":8},{"allowed":true,"max_context_size":39000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":16070465043,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:49.797687Z"},{"created_at":"2025-03-27T16:48:14.190404Z","description":"Efficient 8B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"b8dc7f2d-95d6-48ae-a076-a99e76b76e1f","name":"deepseek/deepseek-r1-distill-llama-8b:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":90000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9093169346,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-04-14T09:05:26.354374Z"},{"created_at":"2025-04-04T15:51:25.414165Z","description":"Highly efficient multimodal model with vision and chat capabilities supporting up to 128k context window.","has_eula":false,"id":"efcf0b60-999a-4c1e-981e-b68a428c4702","name":"mistral/mistral-small-3.1-24b-instruct-2503:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":75000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":96077777613,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-05-09T13:51:56.986698Z"},{"created_at":"2025-04-04T15:51:27.773573Z","description":"Highly efficient multimodal model with vision and chat capabilities supporting up to 128k context window.","has_eula":false,"id":"906c0feb-0eb0-4037-94aa-afd4d845b94f","name":"mistral/mistral-small-3.1-24b-instruct-2503:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":96077777613,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-04-08T14:26:24.388332Z"},{"created_at":"2025-03-27T16:47:41.108667Z","description":"Efficient 70B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"014919c1-00cc-43c2-98f2-4ffd263e6f33","name":"deepseek/deepseek-r1-distill-llama-70b:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":56960,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":141117442445,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:48.796286Z"},{"created_at":"2025-03-27T16:47:42.762505Z","description":"Efficient 70B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"bbfeeb62-2428-415d-ad0d-537af9aff946","name":"deepseek/deepseek-r1-distill-llama-70b:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72679175005,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-06T15:17:35.683881Z"},{"created_at":"2025-03-27T16:48:40.045689Z","description":"Efficient 8B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","name":"meta/llama-3.1-8b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":93000,"quantization_bits":8},{"allowed":true,"max_context_size":40000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":32132582323,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:53.288962Z"},{"created_at":"2025-03-27T16:50:12.267422Z","description":"Highly advanced coding model with a 128k context window, excelling in code generation, repairing, and reasoning.","has_eula":false,"id":"a3205fd3-ac4a-47cf-9074-82166d214bac","name":"qwen/qwen2.5-coder-32b-instruct:int8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":35080374444,"status":"ready","tags":["instruct","chat","code","featured"],"updated_at":"2025-05-09T13:52:04.105122Z"},{"created_at":"2025-03-27T16:49:51.968791Z","description":"A large language model customized by NVIDIA in order to improve the helpfulness of generated responses.","has_eula":true,"id":"4e6c9cea-57a1-4215-8a11-24ab51b9d1c8","name":"nvidia/llama-3.1-nemotron-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72679219797,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:01.331740Z"},{"created_at":"2025-05-13T12:13:50.994Z","description":"Best-in-class vision language model by research lab Allen Institute for AI. Available under the Apache 2.0 license.","has_eula":false,"id":"864e7786-4b86-4f4b-8534-25da1fc46a74","name":"allenai/molmo-72b-0924:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":45000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":293245208984,"status":"ready","tags":["instruct","chat","vision"],"updated_at":"2025-05-13T13:34:01.318606Z"},{"created_at":"2025-03-27T16:49:37.342054Z","description":"Efficient 8B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"775cbef7-6527-415d-9e6b-39d574cf39ec","name":"meta/llama-3.1-8b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":93000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9090504772,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:00.700210Z"},{"created_at":"2025-03-27T16:48:15.818596Z","description":"First generation of 8B-param model by Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"bc10c88e-4d18-4854-8250-77aff4763eca","name":"meta/llama-3-8b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":32132572668,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:51.995701Z"},{"created_at":"2025-03-27T16:49:33.359621Z","description":"First generation of 8B-param model by Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"b5a94646-9390-4ced-acba-9b078e63a794","name":"meta/llama-3-8b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9090489355,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:59.473065Z"},{"created_at":"2025-03-27T16:48:42.138410Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"126ad0c4-cfde-4b05-924f-f04c6343ccb2","name":"meta/llama-3.3-70b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":60000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":282254830887,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:53.868968Z"},{"created_at":"2025-03-27T16:50:09.605796Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"1678195b-5af6-4c27-8fdc-16aa84c68c34","name":"meta/llama-3.3-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72687332869,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-07T10:19:23.153808Z"},{"created_at":"2025-03-27T16:48:35.312110Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"7cbe0417-172a-4601-8940-3b71e4d0c8cb","name":"meta/llama-3.1-70b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":60000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":282246710880,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:52.677798Z"},{"created_at":"2025-03-27T16:49:35.836269Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"03150ad5-de83-4c74-afe0-3eeeb67d71a3","name":"meta/llama-3.1-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72665889083,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:52:00.003235Z"},{"created_at":"2025-03-27T16:49:31.715567Z","description":"First generation of 70B-param model from Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"b0c5a8fe-5c9e-49cc-942a-6c4ebaadde67","name":"meta/llama-3-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72665872089,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:58.899458Z"},{"created_at":"2025-03-27T16:49:17.458153Z","description":"A state-of-the-art 24B model with a 32k context window, designed for multilingual chat and agentic applications.","has_eula":false,"id":"1e555754-47fb-4dba-a82c-66f3f1fa9294","name":"mistral/mistral-small-24b-instruct-2501:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":94321843451,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:55.176379Z"},{"created_at":"2025-03-27T16:50:07.300436Z","description":"A state-of-the-art 24B model with a 32k context window, designed for multilingual chat and agentic applications.","has_eula":false,"id":"7bb28f2c-3719-4d71-9bcb-17db392a7118","name":"mistral/mistral-small-24b-instruct-2501:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":20000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":24938988520,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:55.726891Z"},{"created_at":"2025-04-15T10:51:31.291792Z","description":"Vision language model able to analyze images and offer insights without compromising on instruction following.","has_eula":false,"id":"1999f4f5-f038-4039-94ba-11a851917df5","name":"mistral/pixtral-12b-2409:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":50000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":25384844091,"status":"ready","tags":["vision","chat","featured"],"updated_at":"2025-05-09T13:51:58.281971Z"},{"created_at":"2025-03-27T16:49:14.593008Z","description":"A very efficient language model by Mistral AI, optimized for instruction-following tasks. Available under the Apache 2.0 license.","has_eula":false,"id":"bf6be106-c53d-4b93-bb33-1a4bd4d0b573","name":"mistral/mistral-7b-instruct-v0.3:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":28995471292,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:54.595513Z"},{"created_at":"2025-03-27T16:50:06.301430Z","description":"A state-of-the-art 12B model with a 128k context window, designed for multilingual chat applications.","has_eula":false,"id":"07681325-c743-4796-8b7d-1f0b35d4a8e0","name":"mistral/mistral-nemo-instruct-2407:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":13605604415,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-06T15:17:43.837103Z"},{"created_at":"2025-03-27T16:50:08.291821Z","description":"A high-quality Mixture of Experts (MoE) model with open weights by Mistral AI, licensed under Apache 2.0.","has_eula":false,"id":"1aa87d1e-9996-4c54-aa1c-5b900bf59fd4","name":"mistral/mixtral-8x7b-instruct-v0.1:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":46970879717,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:02.960404Z"},{"created_at":"2025-03-27T16:49:19.120192Z","description":"A high-quality Mixture of Experts (MoE) model with open weights by Mistral AI, licensed under Apache 2.0.","has_eula":false,"id":"11ed6599-f460-4e41-b266-87bc9a108fdd","name":"mistral/mixtral-8x7b-instruct-v0.1:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":190483875108,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:57.661626Z"},{"created_at":"2025-03-27T16:46:54.314987Z","description":"An embedding model spanning a broad range of languages and state-of-the-art results on multilingual benchmarks.","has_eula":true,"id":"d58efec4-b667-48e2-8ad8-bcc26c175ae6","name":"baai/bge-multilingual-gemma2:fp32","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]}]}],"parameter_size_bits":32,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":36989461520,"status":"ready","tags":["embedding","featured"],"updated_at":"2025-03-27T17:40:09.534954Z"}],"total_count":29}' + headers: + Content-Length: + - "50297" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Wed, 14 May 2025 16:04:52 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge02) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - 157a83bd-c2b3-44ba-ad5f-a67fa0ab62cc + status: 200 OK + code: 200 + duration: 293.412084ms + - id: 1 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models/1999f4f5-f038-4039-94ba-11a851917df5 + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 1753 + uncompressed: false + body: '{"created_at":"2025-04-15T10:51:31.291792Z","description":"Vision language model able to analyze images and offer insights without compromising on instruction following.","has_eula":false,"id":"1999f4f5-f038-4039-94ba-11a851917df5","name":"mistral/pixtral-12b-2409:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":50000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":25384844091,"status":"ready","tags":["vision","chat","featured"],"updated_at":"2025-05-09T13:51:58.281971Z"}' + headers: + Content-Length: + - "1753" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Wed, 14 May 2025 16:04:52 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge02) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - 7080b849-e80d-4a68-af90-2482d9cd09f7 + status: 200 OK + code: 200 + duration: 149.1175ms + - id: 2 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models?order_by=display_rank_asc&page_size=1000 + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 50297 + uncompressed: false + body: '{"models":[{"created_at":"2025-04-04T13:11:00.900800Z","description":"Multimodal model for text generation an image understanding supporting up to 128k context window.","has_eula":false,"id":"5c40e594-d40d-452a-991e-5082225155e1","name":"google/gemma-3-27b-it:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":80000,"quantization_bits":8},{"allowed":true,"max_context_size":40000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":54904369444,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-05-09T16:45:10.128397Z"},{"created_at":"2025-04-28T18:48:01.860457Z","description":"","has_eula":false,"id":"a19296a6-4cef-447a-99bc-8f6c3ee30df4","name":"TestAccCustomModel_Basic","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725346,"status":"ready","tags":["custom"],"updated_at":null},{"created_at":"2025-04-30T13:29:24.004776Z","description":"","has_eula":false,"id":"eabb7f74-24a1-4173-911b-26924c1be619","name":"TestAccCustomModel_DeployModelOnServer","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725346,"status":"ready","tags":["custom"],"updated_at":null},{"created_at":"2025-03-27T16:48:11.513249Z","description":"Efficient 8B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"a51ce791-9546-4c28-aa44-24850d84778b","name":"deepseek/deepseek-r1-distill-llama-8b:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":90000,"quantization_bits":8},{"allowed":true,"max_context_size":39000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":16070465043,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:49.797687Z"},{"created_at":"2025-03-27T16:48:14.190404Z","description":"Efficient 8B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"b8dc7f2d-95d6-48ae-a076-a99e76b76e1f","name":"deepseek/deepseek-r1-distill-llama-8b:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":90000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9093169346,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-04-14T09:05:26.354374Z"},{"created_at":"2025-04-04T15:51:25.414165Z","description":"Highly efficient multimodal model with vision and chat capabilities supporting up to 128k context window.","has_eula":false,"id":"efcf0b60-999a-4c1e-981e-b68a428c4702","name":"mistral/mistral-small-3.1-24b-instruct-2503:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":75000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":96077777613,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-05-09T13:51:56.986698Z"},{"created_at":"2025-04-04T15:51:27.773573Z","description":"Highly efficient multimodal model with vision and chat capabilities supporting up to 128k context window.","has_eula":false,"id":"906c0feb-0eb0-4037-94aa-afd4d845b94f","name":"mistral/mistral-small-3.1-24b-instruct-2503:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":96077777613,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-04-08T14:26:24.388332Z"},{"created_at":"2025-03-27T16:47:41.108667Z","description":"Efficient 70B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"014919c1-00cc-43c2-98f2-4ffd263e6f33","name":"deepseek/deepseek-r1-distill-llama-70b:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":56960,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":141117442445,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:48.796286Z"},{"created_at":"2025-03-27T16:47:42.762505Z","description":"Efficient 70B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"bbfeeb62-2428-415d-ad0d-537af9aff946","name":"deepseek/deepseek-r1-distill-llama-70b:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72679175005,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-06T15:17:35.683881Z"},{"created_at":"2025-03-27T16:48:40.045689Z","description":"Efficient 8B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","name":"meta/llama-3.1-8b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":93000,"quantization_bits":8},{"allowed":true,"max_context_size":40000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":32132582323,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:53.288962Z"},{"created_at":"2025-03-27T16:50:12.267422Z","description":"Highly advanced coding model with a 128k context window, excelling in code generation, repairing, and reasoning.","has_eula":false,"id":"a3205fd3-ac4a-47cf-9074-82166d214bac","name":"qwen/qwen2.5-coder-32b-instruct:int8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":35080374444,"status":"ready","tags":["instruct","chat","code","featured"],"updated_at":"2025-05-09T13:52:04.105122Z"},{"created_at":"2025-03-27T16:49:51.968791Z","description":"A large language model customized by NVIDIA in order to improve the helpfulness of generated responses.","has_eula":true,"id":"4e6c9cea-57a1-4215-8a11-24ab51b9d1c8","name":"nvidia/llama-3.1-nemotron-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72679219797,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:01.331740Z"},{"created_at":"2025-05-13T12:13:50.994Z","description":"Best-in-class vision language model by research lab Allen Institute for AI. Available under the Apache 2.0 license.","has_eula":false,"id":"864e7786-4b86-4f4b-8534-25da1fc46a74","name":"allenai/molmo-72b-0924:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":45000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":293245208984,"status":"ready","tags":["instruct","chat","vision"],"updated_at":"2025-05-13T13:34:01.318606Z"},{"created_at":"2025-03-27T16:49:37.342054Z","description":"Efficient 8B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"775cbef7-6527-415d-9e6b-39d574cf39ec","name":"meta/llama-3.1-8b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":93000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9090504772,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:00.700210Z"},{"created_at":"2025-03-27T16:48:15.818596Z","description":"First generation of 8B-param model by Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"bc10c88e-4d18-4854-8250-77aff4763eca","name":"meta/llama-3-8b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":32132572668,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:51.995701Z"},{"created_at":"2025-03-27T16:49:33.359621Z","description":"First generation of 8B-param model by Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"b5a94646-9390-4ced-acba-9b078e63a794","name":"meta/llama-3-8b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9090489355,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:59.473065Z"},{"created_at":"2025-03-27T16:48:42.138410Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"126ad0c4-cfde-4b05-924f-f04c6343ccb2","name":"meta/llama-3.3-70b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":60000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":282254830887,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:53.868968Z"},{"created_at":"2025-03-27T16:50:09.605796Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"1678195b-5af6-4c27-8fdc-16aa84c68c34","name":"meta/llama-3.3-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72687332869,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-07T10:19:23.153808Z"},{"created_at":"2025-03-27T16:48:35.312110Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"7cbe0417-172a-4601-8940-3b71e4d0c8cb","name":"meta/llama-3.1-70b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":60000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":282246710880,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:52.677798Z"},{"created_at":"2025-03-27T16:49:35.836269Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"03150ad5-de83-4c74-afe0-3eeeb67d71a3","name":"meta/llama-3.1-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72665889083,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:52:00.003235Z"},{"created_at":"2025-03-27T16:49:31.715567Z","description":"First generation of 70B-param model from Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"b0c5a8fe-5c9e-49cc-942a-6c4ebaadde67","name":"meta/llama-3-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72665872089,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:58.899458Z"},{"created_at":"2025-03-27T16:49:17.458153Z","description":"A state-of-the-art 24B model with a 32k context window, designed for multilingual chat and agentic applications.","has_eula":false,"id":"1e555754-47fb-4dba-a82c-66f3f1fa9294","name":"mistral/mistral-small-24b-instruct-2501:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":94321843451,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:55.176379Z"},{"created_at":"2025-03-27T16:50:07.300436Z","description":"A state-of-the-art 24B model with a 32k context window, designed for multilingual chat and agentic applications.","has_eula":false,"id":"7bb28f2c-3719-4d71-9bcb-17db392a7118","name":"mistral/mistral-small-24b-instruct-2501:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":20000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":24938988520,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:55.726891Z"},{"created_at":"2025-04-15T10:51:31.291792Z","description":"Vision language model able to analyze images and offer insights without compromising on instruction following.","has_eula":false,"id":"1999f4f5-f038-4039-94ba-11a851917df5","name":"mistral/pixtral-12b-2409:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":50000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":25384844091,"status":"ready","tags":["vision","chat","featured"],"updated_at":"2025-05-09T13:51:58.281971Z"},{"created_at":"2025-03-27T16:49:14.593008Z","description":"A very efficient language model by Mistral AI, optimized for instruction-following tasks. Available under the Apache 2.0 license.","has_eula":false,"id":"bf6be106-c53d-4b93-bb33-1a4bd4d0b573","name":"mistral/mistral-7b-instruct-v0.3:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":28995471292,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:54.595513Z"},{"created_at":"2025-03-27T16:50:06.301430Z","description":"A state-of-the-art 12B model with a 128k context window, designed for multilingual chat applications.","has_eula":false,"id":"07681325-c743-4796-8b7d-1f0b35d4a8e0","name":"mistral/mistral-nemo-instruct-2407:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":13605604415,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-06T15:17:43.837103Z"},{"created_at":"2025-03-27T16:50:08.291821Z","description":"A high-quality Mixture of Experts (MoE) model with open weights by Mistral AI, licensed under Apache 2.0.","has_eula":false,"id":"1aa87d1e-9996-4c54-aa1c-5b900bf59fd4","name":"mistral/mixtral-8x7b-instruct-v0.1:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":46970879717,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:02.960404Z"},{"created_at":"2025-03-27T16:49:19.120192Z","description":"A high-quality Mixture of Experts (MoE) model with open weights by Mistral AI, licensed under Apache 2.0.","has_eula":false,"id":"11ed6599-f460-4e41-b266-87bc9a108fdd","name":"mistral/mixtral-8x7b-instruct-v0.1:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":190483875108,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:57.661626Z"},{"created_at":"2025-03-27T16:46:54.314987Z","description":"An embedding model spanning a broad range of languages and state-of-the-art results on multilingual benchmarks.","has_eula":true,"id":"d58efec4-b667-48e2-8ad8-bcc26c175ae6","name":"baai/bge-multilingual-gemma2:fp32","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]}]}],"parameter_size_bits":32,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":36989461520,"status":"ready","tags":["embedding","featured"],"updated_at":"2025-03-27T17:40:09.534954Z"}],"total_count":29}' + headers: + Content-Length: + - "50297" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Wed, 14 May 2025 16:04:53 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge02) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - 2adc88fc-12b5-494e-9fd3-04a32691cacb + status: 200 OK + code: 200 + duration: 216.725541ms + - id: 3 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models/1999f4f5-f038-4039-94ba-11a851917df5 + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 1753 + uncompressed: false + body: '{"created_at":"2025-04-15T10:51:31.291792Z","description":"Vision language model able to analyze images and offer insights without compromising on instruction following.","has_eula":false,"id":"1999f4f5-f038-4039-94ba-11a851917df5","name":"mistral/pixtral-12b-2409:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":50000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":25384844091,"status":"ready","tags":["vision","chat","featured"],"updated_at":"2025-05-09T13:51:58.281971Z"}' + headers: + Content-Length: + - "1753" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Wed, 14 May 2025 16:04:53 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge02) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - 6d9e219a-af3c-450c-adb6-e72749365dfe + status: 200 OK + code: 200 + duration: 41.010625ms + - id: 4 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models/1999f4f5-f038-4039-94ba-11a851917df5 + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 1753 + uncompressed: false + body: '{"created_at":"2025-04-15T10:51:31.291792Z","description":"Vision language model able to analyze images and offer insights without compromising on instruction following.","has_eula":false,"id":"1999f4f5-f038-4039-94ba-11a851917df5","name":"mistral/pixtral-12b-2409:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":50000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":25384844091,"status":"ready","tags":["vision","chat","featured"],"updated_at":"2025-05-09T13:51:58.281971Z"}' + headers: + Content-Length: + - "1753" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Wed, 14 May 2025 16:04:53 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge02) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - bb92cb4a-3b46-48f7-829c-f5b580b14d0e + status: 200 OK + code: 200 + duration: 47.172ms + - id: 5 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models?order_by=display_rank_asc&page_size=1000 + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 50297 + uncompressed: false + body: '{"models":[{"created_at":"2025-04-04T13:11:00.900800Z","description":"Multimodal model for text generation an image understanding supporting up to 128k context window.","has_eula":false,"id":"5c40e594-d40d-452a-991e-5082225155e1","name":"google/gemma-3-27b-it:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":80000,"quantization_bits":8},{"allowed":true,"max_context_size":40000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":54904369444,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-05-09T16:45:10.128397Z"},{"created_at":"2025-04-28T18:48:01.860457Z","description":"","has_eula":false,"id":"a19296a6-4cef-447a-99bc-8f6c3ee30df4","name":"TestAccCustomModel_Basic","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725346,"status":"ready","tags":["custom"],"updated_at":null},{"created_at":"2025-04-30T13:29:24.004776Z","description":"","has_eula":false,"id":"eabb7f74-24a1-4173-911b-26924c1be619","name":"TestAccCustomModel_DeployModelOnServer","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725346,"status":"ready","tags":["custom"],"updated_at":null},{"created_at":"2025-03-27T16:48:11.513249Z","description":"Efficient 8B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"a51ce791-9546-4c28-aa44-24850d84778b","name":"deepseek/deepseek-r1-distill-llama-8b:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":90000,"quantization_bits":8},{"allowed":true,"max_context_size":39000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":16070465043,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:49.797687Z"},{"created_at":"2025-03-27T16:48:14.190404Z","description":"Efficient 8B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"b8dc7f2d-95d6-48ae-a076-a99e76b76e1f","name":"deepseek/deepseek-r1-distill-llama-8b:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":90000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9093169346,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-04-14T09:05:26.354374Z"},{"created_at":"2025-04-04T15:51:25.414165Z","description":"Highly efficient multimodal model with vision and chat capabilities supporting up to 128k context window.","has_eula":false,"id":"efcf0b60-999a-4c1e-981e-b68a428c4702","name":"mistral/mistral-small-3.1-24b-instruct-2503:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":75000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":96077777613,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-05-09T13:51:56.986698Z"},{"created_at":"2025-04-04T15:51:27.773573Z","description":"Highly efficient multimodal model with vision and chat capabilities supporting up to 128k context window.","has_eula":false,"id":"906c0feb-0eb0-4037-94aa-afd4d845b94f","name":"mistral/mistral-small-3.1-24b-instruct-2503:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":96077777613,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-04-08T14:26:24.388332Z"},{"created_at":"2025-03-27T16:47:41.108667Z","description":"Efficient 70B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"014919c1-00cc-43c2-98f2-4ffd263e6f33","name":"deepseek/deepseek-r1-distill-llama-70b:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":56960,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":141117442445,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:48.796286Z"},{"created_at":"2025-03-27T16:47:42.762505Z","description":"Efficient 70B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"bbfeeb62-2428-415d-ad0d-537af9aff946","name":"deepseek/deepseek-r1-distill-llama-70b:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72679175005,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-06T15:17:35.683881Z"},{"created_at":"2025-03-27T16:48:40.045689Z","description":"Efficient 8B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","name":"meta/llama-3.1-8b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":93000,"quantization_bits":8},{"allowed":true,"max_context_size":40000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":32132582323,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:53.288962Z"},{"created_at":"2025-03-27T16:50:12.267422Z","description":"Highly advanced coding model with a 128k context window, excelling in code generation, repairing, and reasoning.","has_eula":false,"id":"a3205fd3-ac4a-47cf-9074-82166d214bac","name":"qwen/qwen2.5-coder-32b-instruct:int8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":35080374444,"status":"ready","tags":["instruct","chat","code","featured"],"updated_at":"2025-05-09T13:52:04.105122Z"},{"created_at":"2025-03-27T16:49:51.968791Z","description":"A large language model customized by NVIDIA in order to improve the helpfulness of generated responses.","has_eula":true,"id":"4e6c9cea-57a1-4215-8a11-24ab51b9d1c8","name":"nvidia/llama-3.1-nemotron-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72679219797,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:01.331740Z"},{"created_at":"2025-05-13T12:13:50.994Z","description":"Best-in-class vision language model by research lab Allen Institute for AI. Available under the Apache 2.0 license.","has_eula":false,"id":"864e7786-4b86-4f4b-8534-25da1fc46a74","name":"allenai/molmo-72b-0924:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":45000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":293245208984,"status":"ready","tags":["instruct","chat","vision"],"updated_at":"2025-05-13T13:34:01.318606Z"},{"created_at":"2025-03-27T16:49:37.342054Z","description":"Efficient 8B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"775cbef7-6527-415d-9e6b-39d574cf39ec","name":"meta/llama-3.1-8b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":93000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9090504772,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:00.700210Z"},{"created_at":"2025-03-27T16:48:15.818596Z","description":"First generation of 8B-param model by Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"bc10c88e-4d18-4854-8250-77aff4763eca","name":"meta/llama-3-8b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":32132572668,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:51.995701Z"},{"created_at":"2025-03-27T16:49:33.359621Z","description":"First generation of 8B-param model by Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"b5a94646-9390-4ced-acba-9b078e63a794","name":"meta/llama-3-8b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9090489355,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:59.473065Z"},{"created_at":"2025-03-27T16:48:42.138410Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"126ad0c4-cfde-4b05-924f-f04c6343ccb2","name":"meta/llama-3.3-70b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":60000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":282254830887,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:53.868968Z"},{"created_at":"2025-03-27T16:50:09.605796Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"1678195b-5af6-4c27-8fdc-16aa84c68c34","name":"meta/llama-3.3-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72687332869,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-07T10:19:23.153808Z"},{"created_at":"2025-03-27T16:48:35.312110Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"7cbe0417-172a-4601-8940-3b71e4d0c8cb","name":"meta/llama-3.1-70b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":60000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":282246710880,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:52.677798Z"},{"created_at":"2025-03-27T16:49:35.836269Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"03150ad5-de83-4c74-afe0-3eeeb67d71a3","name":"meta/llama-3.1-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72665889083,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:52:00.003235Z"},{"created_at":"2025-03-27T16:49:31.715567Z","description":"First generation of 70B-param model from Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"b0c5a8fe-5c9e-49cc-942a-6c4ebaadde67","name":"meta/llama-3-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72665872089,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:58.899458Z"},{"created_at":"2025-03-27T16:49:17.458153Z","description":"A state-of-the-art 24B model with a 32k context window, designed for multilingual chat and agentic applications.","has_eula":false,"id":"1e555754-47fb-4dba-a82c-66f3f1fa9294","name":"mistral/mistral-small-24b-instruct-2501:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":94321843451,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:55.176379Z"},{"created_at":"2025-03-27T16:50:07.300436Z","description":"A state-of-the-art 24B model with a 32k context window, designed for multilingual chat and agentic applications.","has_eula":false,"id":"7bb28f2c-3719-4d71-9bcb-17db392a7118","name":"mistral/mistral-small-24b-instruct-2501:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":20000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":24938988520,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:55.726891Z"},{"created_at":"2025-04-15T10:51:31.291792Z","description":"Vision language model able to analyze images and offer insights without compromising on instruction following.","has_eula":false,"id":"1999f4f5-f038-4039-94ba-11a851917df5","name":"mistral/pixtral-12b-2409:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":50000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":25384844091,"status":"ready","tags":["vision","chat","featured"],"updated_at":"2025-05-09T13:51:58.281971Z"},{"created_at":"2025-03-27T16:49:14.593008Z","description":"A very efficient language model by Mistral AI, optimized for instruction-following tasks. Available under the Apache 2.0 license.","has_eula":false,"id":"bf6be106-c53d-4b93-bb33-1a4bd4d0b573","name":"mistral/mistral-7b-instruct-v0.3:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":28995471292,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:54.595513Z"},{"created_at":"2025-03-27T16:50:06.301430Z","description":"A state-of-the-art 12B model with a 128k context window, designed for multilingual chat applications.","has_eula":false,"id":"07681325-c743-4796-8b7d-1f0b35d4a8e0","name":"mistral/mistral-nemo-instruct-2407:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":13605604415,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-06T15:17:43.837103Z"},{"created_at":"2025-03-27T16:50:08.291821Z","description":"A high-quality Mixture of Experts (MoE) model with open weights by Mistral AI, licensed under Apache 2.0.","has_eula":false,"id":"1aa87d1e-9996-4c54-aa1c-5b900bf59fd4","name":"mistral/mixtral-8x7b-instruct-v0.1:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":46970879717,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:02.960404Z"},{"created_at":"2025-03-27T16:49:19.120192Z","description":"A high-quality Mixture of Experts (MoE) model with open weights by Mistral AI, licensed under Apache 2.0.","has_eula":false,"id":"11ed6599-f460-4e41-b266-87bc9a108fdd","name":"mistral/mixtral-8x7b-instruct-v0.1:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":190483875108,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:57.661626Z"},{"created_at":"2025-03-27T16:46:54.314987Z","description":"An embedding model spanning a broad range of languages and state-of-the-art results on multilingual benchmarks.","has_eula":true,"id":"d58efec4-b667-48e2-8ad8-bcc26c175ae6","name":"baai/bge-multilingual-gemma2:fp32","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]}]}],"parameter_size_bits":32,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":36989461520,"status":"ready","tags":["embedding","featured"],"updated_at":"2025-03-27T17:40:09.534954Z"}],"total_count":29}' + headers: + Content-Length: + - "50297" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Wed, 14 May 2025 16:04:54 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge02) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - d6d40d7e-19bd-4118-8db2-57ac58e137d9 + status: 200 OK + code: 200 + duration: 180.770584ms + - id: 6 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models/1999f4f5-f038-4039-94ba-11a851917df5 + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 1753 + uncompressed: false + body: '{"created_at":"2025-04-15T10:51:31.291792Z","description":"Vision language model able to analyze images and offer insights without compromising on instruction following.","has_eula":false,"id":"1999f4f5-f038-4039-94ba-11a851917df5","name":"mistral/pixtral-12b-2409:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":50000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":25384844091,"status":"ready","tags":["vision","chat","featured"],"updated_at":"2025-05-09T13:51:58.281971Z"}' + headers: + Content-Length: + - "1753" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Wed, 14 May 2025 16:04:54 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge02) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - b52df8d8-346e-46cb-ba3a-59c02e03cfde + status: 200 OK + code: 200 + duration: 31.929375ms + - id: 7 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models?order_by=display_rank_asc&page_size=1000 + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 50297 + uncompressed: false + body: '{"models":[{"created_at":"2025-04-04T13:11:00.900800Z","description":"Multimodal model for text generation an image understanding supporting up to 128k context window.","has_eula":false,"id":"5c40e594-d40d-452a-991e-5082225155e1","name":"google/gemma-3-27b-it:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":80000,"quantization_bits":8},{"allowed":true,"max_context_size":40000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":54904369444,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-05-09T16:45:10.128397Z"},{"created_at":"2025-04-28T18:48:01.860457Z","description":"","has_eula":false,"id":"a19296a6-4cef-447a-99bc-8f6c3ee30df4","name":"TestAccCustomModel_Basic","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725346,"status":"ready","tags":["custom"],"updated_at":null},{"created_at":"2025-04-30T13:29:24.004776Z","description":"","has_eula":false,"id":"eabb7f74-24a1-4173-911b-26924c1be619","name":"TestAccCustomModel_DeployModelOnServer","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725346,"status":"ready","tags":["custom"],"updated_at":null},{"created_at":"2025-03-27T16:48:11.513249Z","description":"Efficient 8B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"a51ce791-9546-4c28-aa44-24850d84778b","name":"deepseek/deepseek-r1-distill-llama-8b:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":90000,"quantization_bits":8},{"allowed":true,"max_context_size":39000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":16070465043,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:49.797687Z"},{"created_at":"2025-03-27T16:48:14.190404Z","description":"Efficient 8B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"b8dc7f2d-95d6-48ae-a076-a99e76b76e1f","name":"deepseek/deepseek-r1-distill-llama-8b:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":90000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9093169346,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-04-14T09:05:26.354374Z"},{"created_at":"2025-04-04T15:51:25.414165Z","description":"Highly efficient multimodal model with vision and chat capabilities supporting up to 128k context window.","has_eula":false,"id":"efcf0b60-999a-4c1e-981e-b68a428c4702","name":"mistral/mistral-small-3.1-24b-instruct-2503:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":75000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":96077777613,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-05-09T13:51:56.986698Z"},{"created_at":"2025-04-04T15:51:27.773573Z","description":"Highly efficient multimodal model with vision and chat capabilities supporting up to 128k context window.","has_eula":false,"id":"906c0feb-0eb0-4037-94aa-afd4d845b94f","name":"mistral/mistral-small-3.1-24b-instruct-2503:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":96077777613,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-04-08T14:26:24.388332Z"},{"created_at":"2025-03-27T16:47:41.108667Z","description":"Efficient 70B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"014919c1-00cc-43c2-98f2-4ffd263e6f33","name":"deepseek/deepseek-r1-distill-llama-70b:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":56960,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":141117442445,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:48.796286Z"},{"created_at":"2025-03-27T16:47:42.762505Z","description":"Efficient 70B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"bbfeeb62-2428-415d-ad0d-537af9aff946","name":"deepseek/deepseek-r1-distill-llama-70b:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72679175005,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-06T15:17:35.683881Z"},{"created_at":"2025-03-27T16:48:40.045689Z","description":"Efficient 8B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","name":"meta/llama-3.1-8b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":93000,"quantization_bits":8},{"allowed":true,"max_context_size":40000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":32132582323,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:53.288962Z"},{"created_at":"2025-03-27T16:50:12.267422Z","description":"Highly advanced coding model with a 128k context window, excelling in code generation, repairing, and reasoning.","has_eula":false,"id":"a3205fd3-ac4a-47cf-9074-82166d214bac","name":"qwen/qwen2.5-coder-32b-instruct:int8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":35080374444,"status":"ready","tags":["instruct","chat","code","featured"],"updated_at":"2025-05-09T13:52:04.105122Z"},{"created_at":"2025-03-27T16:49:51.968791Z","description":"A large language model customized by NVIDIA in order to improve the helpfulness of generated responses.","has_eula":true,"id":"4e6c9cea-57a1-4215-8a11-24ab51b9d1c8","name":"nvidia/llama-3.1-nemotron-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72679219797,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:01.331740Z"},{"created_at":"2025-05-13T12:13:50.994Z","description":"Best-in-class vision language model by research lab Allen Institute for AI. Available under the Apache 2.0 license.","has_eula":false,"id":"864e7786-4b86-4f4b-8534-25da1fc46a74","name":"allenai/molmo-72b-0924:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":45000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":293245208984,"status":"ready","tags":["instruct","chat","vision"],"updated_at":"2025-05-13T13:34:01.318606Z"},{"created_at":"2025-03-27T16:49:37.342054Z","description":"Efficient 8B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"775cbef7-6527-415d-9e6b-39d574cf39ec","name":"meta/llama-3.1-8b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":93000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9090504772,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:00.700210Z"},{"created_at":"2025-03-27T16:48:15.818596Z","description":"First generation of 8B-param model by Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"bc10c88e-4d18-4854-8250-77aff4763eca","name":"meta/llama-3-8b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":32132572668,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:51.995701Z"},{"created_at":"2025-03-27T16:49:33.359621Z","description":"First generation of 8B-param model by Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"b5a94646-9390-4ced-acba-9b078e63a794","name":"meta/llama-3-8b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9090489355,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:59.473065Z"},{"created_at":"2025-03-27T16:48:42.138410Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"126ad0c4-cfde-4b05-924f-f04c6343ccb2","name":"meta/llama-3.3-70b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":60000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":282254830887,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:53.868968Z"},{"created_at":"2025-03-27T16:50:09.605796Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"1678195b-5af6-4c27-8fdc-16aa84c68c34","name":"meta/llama-3.3-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72687332869,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-07T10:19:23.153808Z"},{"created_at":"2025-03-27T16:48:35.312110Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"7cbe0417-172a-4601-8940-3b71e4d0c8cb","name":"meta/llama-3.1-70b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":60000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":282246710880,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:52.677798Z"},{"created_at":"2025-03-27T16:49:35.836269Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"03150ad5-de83-4c74-afe0-3eeeb67d71a3","name":"meta/llama-3.1-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72665889083,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:52:00.003235Z"},{"created_at":"2025-03-27T16:49:31.715567Z","description":"First generation of 70B-param model from Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"b0c5a8fe-5c9e-49cc-942a-6c4ebaadde67","name":"meta/llama-3-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72665872089,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:58.899458Z"},{"created_at":"2025-03-27T16:49:17.458153Z","description":"A state-of-the-art 24B model with a 32k context window, designed for multilingual chat and agentic applications.","has_eula":false,"id":"1e555754-47fb-4dba-a82c-66f3f1fa9294","name":"mistral/mistral-small-24b-instruct-2501:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":94321843451,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:55.176379Z"},{"created_at":"2025-03-27T16:50:07.300436Z","description":"A state-of-the-art 24B model with a 32k context window, designed for multilingual chat and agentic applications.","has_eula":false,"id":"7bb28f2c-3719-4d71-9bcb-17db392a7118","name":"mistral/mistral-small-24b-instruct-2501:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":20000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":24938988520,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:55.726891Z"},{"created_at":"2025-04-15T10:51:31.291792Z","description":"Vision language model able to analyze images and offer insights without compromising on instruction following.","has_eula":false,"id":"1999f4f5-f038-4039-94ba-11a851917df5","name":"mistral/pixtral-12b-2409:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":50000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":25384844091,"status":"ready","tags":["vision","chat","featured"],"updated_at":"2025-05-09T13:51:58.281971Z"},{"created_at":"2025-03-27T16:49:14.593008Z","description":"A very efficient language model by Mistral AI, optimized for instruction-following tasks. Available under the Apache 2.0 license.","has_eula":false,"id":"bf6be106-c53d-4b93-bb33-1a4bd4d0b573","name":"mistral/mistral-7b-instruct-v0.3:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":28995471292,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:54.595513Z"},{"created_at":"2025-03-27T16:50:06.301430Z","description":"A state-of-the-art 12B model with a 128k context window, designed for multilingual chat applications.","has_eula":false,"id":"07681325-c743-4796-8b7d-1f0b35d4a8e0","name":"mistral/mistral-nemo-instruct-2407:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":13605604415,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-06T15:17:43.837103Z"},{"created_at":"2025-03-27T16:50:08.291821Z","description":"A high-quality Mixture of Experts (MoE) model with open weights by Mistral AI, licensed under Apache 2.0.","has_eula":false,"id":"1aa87d1e-9996-4c54-aa1c-5b900bf59fd4","name":"mistral/mixtral-8x7b-instruct-v0.1:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":46970879717,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:02.960404Z"},{"created_at":"2025-03-27T16:49:19.120192Z","description":"A high-quality Mixture of Experts (MoE) model with open weights by Mistral AI, licensed under Apache 2.0.","has_eula":false,"id":"11ed6599-f460-4e41-b266-87bc9a108fdd","name":"mistral/mixtral-8x7b-instruct-v0.1:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":190483875108,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:57.661626Z"},{"created_at":"2025-03-27T16:46:54.314987Z","description":"An embedding model spanning a broad range of languages and state-of-the-art results on multilingual benchmarks.","has_eula":true,"id":"d58efec4-b667-48e2-8ad8-bcc26c175ae6","name":"baai/bge-multilingual-gemma2:fp32","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]}]}],"parameter_size_bits":32,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":36989461520,"status":"ready","tags":["embedding","featured"],"updated_at":"2025-03-27T17:40:09.534954Z"}],"total_count":29}' + headers: + Content-Length: + - "50297" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Wed, 14 May 2025 16:04:54 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge02) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - 4fc544bf-a552-4b36-a7ef-3bb9fc874e93 + status: 200 OK + code: 200 + duration: 187.327125ms + - id: 8 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models/1999f4f5-f038-4039-94ba-11a851917df5 + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 1753 + uncompressed: false + body: '{"created_at":"2025-04-15T10:51:31.291792Z","description":"Vision language model able to analyze images and offer insights without compromising on instruction following.","has_eula":false,"id":"1999f4f5-f038-4039-94ba-11a851917df5","name":"mistral/pixtral-12b-2409:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":50000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":25384844091,"status":"ready","tags":["vision","chat","featured"],"updated_at":"2025-05-09T13:51:58.281971Z"}' + headers: + Content-Length: + - "1753" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Wed, 14 May 2025 16:04:54 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge02) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - da48a560-594e-47b4-a0f3-1f0e92bf231d + status: 200 OK + code: 200 + duration: 41.073125ms + - id: 9 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models?order_by=display_rank_asc&page_size=1000 + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 50297 + uncompressed: false + body: '{"models":[{"created_at":"2025-04-04T13:11:00.900800Z","description":"Multimodal model for text generation an image understanding supporting up to 128k context window.","has_eula":false,"id":"5c40e594-d40d-452a-991e-5082225155e1","name":"google/gemma-3-27b-it:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":80000,"quantization_bits":8},{"allowed":true,"max_context_size":40000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":54904369444,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-05-09T16:45:10.128397Z"},{"created_at":"2025-04-28T18:48:01.860457Z","description":"","has_eula":false,"id":"a19296a6-4cef-447a-99bc-8f6c3ee30df4","name":"TestAccCustomModel_Basic","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725346,"status":"ready","tags":["custom"],"updated_at":null},{"created_at":"2025-04-30T13:29:24.004776Z","description":"","has_eula":false,"id":"eabb7f74-24a1-4173-911b-26924c1be619","name":"TestAccCustomModel_DeployModelOnServer","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725346,"status":"ready","tags":["custom"],"updated_at":null},{"created_at":"2025-03-27T16:48:11.513249Z","description":"Efficient 8B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"a51ce791-9546-4c28-aa44-24850d84778b","name":"deepseek/deepseek-r1-distill-llama-8b:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":90000,"quantization_bits":8},{"allowed":true,"max_context_size":39000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":16070465043,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:49.797687Z"},{"created_at":"2025-03-27T16:48:14.190404Z","description":"Efficient 8B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"b8dc7f2d-95d6-48ae-a076-a99e76b76e1f","name":"deepseek/deepseek-r1-distill-llama-8b:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":90000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9093169346,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-04-14T09:05:26.354374Z"},{"created_at":"2025-04-04T15:51:25.414165Z","description":"Highly efficient multimodal model with vision and chat capabilities supporting up to 128k context window.","has_eula":false,"id":"efcf0b60-999a-4c1e-981e-b68a428c4702","name":"mistral/mistral-small-3.1-24b-instruct-2503:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":75000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":96077777613,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-05-09T13:51:56.986698Z"},{"created_at":"2025-04-04T15:51:27.773573Z","description":"Highly efficient multimodal model with vision and chat capabilities supporting up to 128k context window.","has_eula":false,"id":"906c0feb-0eb0-4037-94aa-afd4d845b94f","name":"mistral/mistral-small-3.1-24b-instruct-2503:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":96077777613,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-04-08T14:26:24.388332Z"},{"created_at":"2025-03-27T16:47:41.108667Z","description":"Efficient 70B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"014919c1-00cc-43c2-98f2-4ffd263e6f33","name":"deepseek/deepseek-r1-distill-llama-70b:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":56960,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":141117442445,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:48.796286Z"},{"created_at":"2025-03-27T16:47:42.762505Z","description":"Efficient 70B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"bbfeeb62-2428-415d-ad0d-537af9aff946","name":"deepseek/deepseek-r1-distill-llama-70b:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72679175005,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-06T15:17:35.683881Z"},{"created_at":"2025-03-27T16:48:40.045689Z","description":"Efficient 8B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","name":"meta/llama-3.1-8b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":93000,"quantization_bits":8},{"allowed":true,"max_context_size":40000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":32132582323,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:53.288962Z"},{"created_at":"2025-03-27T16:50:12.267422Z","description":"Highly advanced coding model with a 128k context window, excelling in code generation, repairing, and reasoning.","has_eula":false,"id":"a3205fd3-ac4a-47cf-9074-82166d214bac","name":"qwen/qwen2.5-coder-32b-instruct:int8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":35080374444,"status":"ready","tags":["instruct","chat","code","featured"],"updated_at":"2025-05-09T13:52:04.105122Z"},{"created_at":"2025-03-27T16:49:51.968791Z","description":"A large language model customized by NVIDIA in order to improve the helpfulness of generated responses.","has_eula":true,"id":"4e6c9cea-57a1-4215-8a11-24ab51b9d1c8","name":"nvidia/llama-3.1-nemotron-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72679219797,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:01.331740Z"},{"created_at":"2025-05-13T12:13:50.994Z","description":"Best-in-class vision language model by research lab Allen Institute for AI. Available under the Apache 2.0 license.","has_eula":false,"id":"864e7786-4b86-4f4b-8534-25da1fc46a74","name":"allenai/molmo-72b-0924:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":45000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":293245208984,"status":"ready","tags":["instruct","chat","vision"],"updated_at":"2025-05-13T13:34:01.318606Z"},{"created_at":"2025-03-27T16:49:37.342054Z","description":"Efficient 8B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"775cbef7-6527-415d-9e6b-39d574cf39ec","name":"meta/llama-3.1-8b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":93000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9090504772,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:00.700210Z"},{"created_at":"2025-03-27T16:48:15.818596Z","description":"First generation of 8B-param model by Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"bc10c88e-4d18-4854-8250-77aff4763eca","name":"meta/llama-3-8b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":32132572668,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:51.995701Z"},{"created_at":"2025-03-27T16:49:33.359621Z","description":"First generation of 8B-param model by Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"b5a94646-9390-4ced-acba-9b078e63a794","name":"meta/llama-3-8b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9090489355,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:59.473065Z"},{"created_at":"2025-03-27T16:48:42.138410Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"126ad0c4-cfde-4b05-924f-f04c6343ccb2","name":"meta/llama-3.3-70b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":60000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":282254830887,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:53.868968Z"},{"created_at":"2025-03-27T16:50:09.605796Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"1678195b-5af6-4c27-8fdc-16aa84c68c34","name":"meta/llama-3.3-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72687332869,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-07T10:19:23.153808Z"},{"created_at":"2025-03-27T16:48:35.312110Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"7cbe0417-172a-4601-8940-3b71e4d0c8cb","name":"meta/llama-3.1-70b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":60000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":282246710880,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:52.677798Z"},{"created_at":"2025-03-27T16:49:35.836269Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"03150ad5-de83-4c74-afe0-3eeeb67d71a3","name":"meta/llama-3.1-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72665889083,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:52:00.003235Z"},{"created_at":"2025-03-27T16:49:31.715567Z","description":"First generation of 70B-param model from Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"b0c5a8fe-5c9e-49cc-942a-6c4ebaadde67","name":"meta/llama-3-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72665872089,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:58.899458Z"},{"created_at":"2025-03-27T16:49:17.458153Z","description":"A state-of-the-art 24B model with a 32k context window, designed for multilingual chat and agentic applications.","has_eula":false,"id":"1e555754-47fb-4dba-a82c-66f3f1fa9294","name":"mistral/mistral-small-24b-instruct-2501:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":94321843451,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:55.176379Z"},{"created_at":"2025-03-27T16:50:07.300436Z","description":"A state-of-the-art 24B model with a 32k context window, designed for multilingual chat and agentic applications.","has_eula":false,"id":"7bb28f2c-3719-4d71-9bcb-17db392a7118","name":"mistral/mistral-small-24b-instruct-2501:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":20000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":24938988520,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:55.726891Z"},{"created_at":"2025-04-15T10:51:31.291792Z","description":"Vision language model able to analyze images and offer insights without compromising on instruction following.","has_eula":false,"id":"1999f4f5-f038-4039-94ba-11a851917df5","name":"mistral/pixtral-12b-2409:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":50000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":25384844091,"status":"ready","tags":["vision","chat","featured"],"updated_at":"2025-05-09T13:51:58.281971Z"},{"created_at":"2025-03-27T16:49:14.593008Z","description":"A very efficient language model by Mistral AI, optimized for instruction-following tasks. Available under the Apache 2.0 license.","has_eula":false,"id":"bf6be106-c53d-4b93-bb33-1a4bd4d0b573","name":"mistral/mistral-7b-instruct-v0.3:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":28995471292,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:54.595513Z"},{"created_at":"2025-03-27T16:50:06.301430Z","description":"A state-of-the-art 12B model with a 128k context window, designed for multilingual chat applications.","has_eula":false,"id":"07681325-c743-4796-8b7d-1f0b35d4a8e0","name":"mistral/mistral-nemo-instruct-2407:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":13605604415,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-06T15:17:43.837103Z"},{"created_at":"2025-03-27T16:50:08.291821Z","description":"A high-quality Mixture of Experts (MoE) model with open weights by Mistral AI, licensed under Apache 2.0.","has_eula":false,"id":"1aa87d1e-9996-4c54-aa1c-5b900bf59fd4","name":"mistral/mixtral-8x7b-instruct-v0.1:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":46970879717,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:02.960404Z"},{"created_at":"2025-03-27T16:49:19.120192Z","description":"A high-quality Mixture of Experts (MoE) model with open weights by Mistral AI, licensed under Apache 2.0.","has_eula":false,"id":"11ed6599-f460-4e41-b266-87bc9a108fdd","name":"mistral/mixtral-8x7b-instruct-v0.1:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":190483875108,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:57.661626Z"},{"created_at":"2025-03-27T16:46:54.314987Z","description":"An embedding model spanning a broad range of languages and state-of-the-art results on multilingual benchmarks.","has_eula":true,"id":"d58efec4-b667-48e2-8ad8-bcc26c175ae6","name":"baai/bge-multilingual-gemma2:fp32","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]}]}],"parameter_size_bits":32,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":36989461520,"status":"ready","tags":["embedding","featured"],"updated_at":"2025-03-27T17:40:09.534954Z"}],"total_count":29}' + headers: + Content-Length: + - "50297" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Wed, 14 May 2025 16:04:55 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge02) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - 90940d78-8556-4b3a-9779-15fc4edf8144 + status: 200 OK + code: 200 + duration: 201.329542ms + - id: 10 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models/1999f4f5-f038-4039-94ba-11a851917df5 + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 1753 + uncompressed: false + body: '{"created_at":"2025-04-15T10:51:31.291792Z","description":"Vision language model able to analyze images and offer insights without compromising on instruction following.","has_eula":false,"id":"1999f4f5-f038-4039-94ba-11a851917df5","name":"mistral/pixtral-12b-2409:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":50000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":25384844091,"status":"ready","tags":["vision","chat","featured"],"updated_at":"2025-05-09T13:51:58.281971Z"}' + headers: + Content-Length: + - "1753" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Wed, 14 May 2025 16:04:55 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge02) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - e1e5daa0-92d2-46b6-90a1-12ef87a83d59 + status: 200 OK + code: 200 + duration: 34.163333ms diff --git a/internal/services/inference/testdata/data-source-model-custom.cassette.yaml b/internal/services/inference/testdata/data-source-model-custom.cassette.yaml new file mode 100644 index 0000000000..da4b3ecad4 --- /dev/null +++ b/internal/services/inference/testdata/data-source-model-custom.cassette.yaml @@ -0,0 +1,1326 @@ +--- +version: 2 +interactions: + - id: 0 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 169 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: '{"name":"TestAccDataSourceModel_Custom","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","source":{"url":"https://huggingface.co/agentica-org/DeepCoder-14B-Preview"}}' + form: {} + headers: + Content-Type: + - application/json + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models + method: POST + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 1588 + uncompressed: false + body: '{"created_at":"2025-05-15T08:42:37.110629Z","description":"","has_eula":false,"id":"929553e1-1b34-45d7-8a67-67d1e7147ef6","name":"TestAccDataSourceModel_Custom","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":0,"status":"preparing","tags":["custom"],"updated_at":null}' + headers: + Content-Length: + - "1588" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Thu, 15 May 2025 08:42:37 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge01) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - 87e39267-c84a-4d4c-94ca-53a386f63737 + status: 200 OK + code: 200 + duration: 785.254458ms + - id: 1 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models/929553e1-1b34-45d7-8a67-67d1e7147ef6 + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 1588 + uncompressed: false + body: '{"created_at":"2025-05-15T08:42:37.110629Z","description":"","has_eula":false,"id":"929553e1-1b34-45d7-8a67-67d1e7147ef6","name":"TestAccDataSourceModel_Custom","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":0,"status":"preparing","tags":["custom"],"updated_at":null}' + headers: + Content-Length: + - "1588" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Thu, 15 May 2025 08:42:37 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge01) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - 6e28e08f-791c-4db8-bc25-f1e74ffcff23 + status: 200 OK + code: 200 + duration: 202.767208ms + - id: 2 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models/929553e1-1b34-45d7-8a67-67d1e7147ef6 + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 1588 + uncompressed: false + body: '{"created_at":"2025-05-15T08:42:37.110629Z","description":"","has_eula":false,"id":"929553e1-1b34-45d7-8a67-67d1e7147ef6","name":"TestAccDataSourceModel_Custom","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":0,"status":"preparing","tags":["custom"],"updated_at":null}' + headers: + Content-Length: + - "1588" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Thu, 15 May 2025 08:43:37 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge01) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - 44f439e2-45a9-4f56-b45a-7c451efdfc9f + status: 200 OK + code: 200 + duration: 102.233458ms + - id: 3 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models/929553e1-1b34-45d7-8a67-67d1e7147ef6 + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 1588 + uncompressed: false + body: '{"created_at":"2025-05-15T08:42:37.110629Z","description":"","has_eula":false,"id":"929553e1-1b34-45d7-8a67-67d1e7147ef6","name":"TestAccDataSourceModel_Custom","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":0,"status":"preparing","tags":["custom"],"updated_at":null}' + headers: + Content-Length: + - "1588" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Thu, 15 May 2025 08:44:37 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge02) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - 95edd0af-1a59-4f17-96dd-c4eafee56685 + status: 200 OK + code: 200 + duration: 95.121792ms + - id: 4 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models/929553e1-1b34-45d7-8a67-67d1e7147ef6 + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 1588 + uncompressed: false + body: '{"created_at":"2025-05-15T08:42:37.110629Z","description":"","has_eula":false,"id":"929553e1-1b34-45d7-8a67-67d1e7147ef6","name":"TestAccDataSourceModel_Custom","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":0,"status":"preparing","tags":["custom"],"updated_at":null}' + headers: + Content-Length: + - "1588" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Thu, 15 May 2025 08:45:37 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge02) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - d10654dc-7e5e-43f5-be9a-667fa09371a4 + status: 200 OK + code: 200 + duration: 183.46525ms + - id: 5 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models/929553e1-1b34-45d7-8a67-67d1e7147ef6 + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 1588 + uncompressed: false + body: '{"created_at":"2025-05-15T08:42:37.110629Z","description":"","has_eula":false,"id":"929553e1-1b34-45d7-8a67-67d1e7147ef6","name":"TestAccDataSourceModel_Custom","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":0,"status":"preparing","tags":["custom"],"updated_at":null}' + headers: + Content-Length: + - "1588" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Thu, 15 May 2025 08:46:38 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge03) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - cf448fe6-4f4f-42df-b41c-b29404cd938b + status: 200 OK + code: 200 + duration: 187.352708ms + - id: 6 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models/929553e1-1b34-45d7-8a67-67d1e7147ef6 + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 1594 + uncompressed: false + body: '{"created_at":"2025-05-15T08:42:37.110629Z","description":"","has_eula":false,"id":"929553e1-1b34-45d7-8a67-67d1e7147ef6","name":"TestAccDataSourceModel_Custom","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725385,"status":"ready","tags":["custom"],"updated_at":null}' + headers: + Content-Length: + - "1594" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Thu, 15 May 2025 08:47:38 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge01) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - 609c09ed-8b01-4726-92fa-7516edafe82b + status: 200 OK + code: 200 + duration: 109.411583ms + - id: 7 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models/929553e1-1b34-45d7-8a67-67d1e7147ef6 + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 1594 + uncompressed: false + body: '{"created_at":"2025-05-15T08:42:37.110629Z","description":"","has_eula":false,"id":"929553e1-1b34-45d7-8a67-67d1e7147ef6","name":"TestAccDataSourceModel_Custom","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725385,"status":"ready","tags":["custom"],"updated_at":null}' + headers: + Content-Length: + - "1594" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Thu, 15 May 2025 08:47:38 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge01) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - 6251bb89-1047-4fba-9c29-6a3982169e6a + status: 200 OK + code: 200 + duration: 67.107ms + - id: 8 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models/929553e1-1b34-45d7-8a67-67d1e7147ef6 + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 1594 + uncompressed: false + body: '{"created_at":"2025-05-15T08:42:37.110629Z","description":"","has_eula":false,"id":"929553e1-1b34-45d7-8a67-67d1e7147ef6","name":"TestAccDataSourceModel_Custom","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725385,"status":"ready","tags":["custom"],"updated_at":null}' + headers: + Content-Length: + - "1594" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Thu, 15 May 2025 08:47:38 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge01) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - 7c1ee214-4d0a-45c1-9272-6e3847083596 + status: 200 OK + code: 200 + duration: 47.381625ms + - id: 9 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models/929553e1-1b34-45d7-8a67-67d1e7147ef6 + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 1594 + uncompressed: false + body: '{"created_at":"2025-05-15T08:42:37.110629Z","description":"","has_eula":false,"id":"929553e1-1b34-45d7-8a67-67d1e7147ef6","name":"TestAccDataSourceModel_Custom","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725385,"status":"ready","tags":["custom"],"updated_at":null}' + headers: + Content-Length: + - "1594" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Thu, 15 May 2025 08:47:39 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge01) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - db935394-8cfb-48ef-8a92-204816754689 + status: 200 OK + code: 200 + duration: 93.608667ms + - id: 10 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models/929553e1-1b34-45d7-8a67-67d1e7147ef6 + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 1594 + uncompressed: false + body: '{"created_at":"2025-05-15T08:42:37.110629Z","description":"","has_eula":false,"id":"929553e1-1b34-45d7-8a67-67d1e7147ef6","name":"TestAccDataSourceModel_Custom","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725385,"status":"ready","tags":["custom"],"updated_at":null}' + headers: + Content-Length: + - "1594" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Thu, 15 May 2025 08:47:40 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge01) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - 51ac4370-3e64-4657-b5b9-d977bcac93d1 + status: 200 OK + code: 200 + duration: 44.921625ms + - id: 11 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models?order_by=display_rank_asc&page_size=1000 + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 51893 + uncompressed: false + body: '{"models":[{"created_at":"2025-04-04T13:11:00.900800Z","description":"Multimodal model for text generation an image understanding supporting up to 128k context window.","has_eula":false,"id":"5c40e594-d40d-452a-991e-5082225155e1","name":"google/gemma-3-27b-it:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":80000,"quantization_bits":8},{"allowed":true,"max_context_size":40000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":54904369444,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-05-09T16:45:10.128397Z"},{"created_at":"2025-04-28T18:48:01.860457Z","description":"","has_eula":false,"id":"a19296a6-4cef-447a-99bc-8f6c3ee30df4","name":"TestAccCustomModel_Basic","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725346,"status":"ready","tags":["custom"],"updated_at":null},{"created_at":"2025-04-30T13:29:24.004776Z","description":"","has_eula":false,"id":"eabb7f74-24a1-4173-911b-26924c1be619","name":"TestAccCustomModel_DeployModelOnServer","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725346,"status":"ready","tags":["custom"],"updated_at":null},{"created_at":"2025-05-15T08:42:37.110629Z","description":"","has_eula":false,"id":"929553e1-1b34-45d7-8a67-67d1e7147ef6","name":"TestAccDataSourceModel_Custom","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725385,"status":"ready","tags":["custom"],"updated_at":null},{"created_at":"2025-03-27T16:48:11.513249Z","description":"Efficient 8B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"a51ce791-9546-4c28-aa44-24850d84778b","name":"deepseek/deepseek-r1-distill-llama-8b:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":90000,"quantization_bits":8},{"allowed":true,"max_context_size":39000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":16070465043,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:49.797687Z"},{"created_at":"2025-03-27T16:48:14.190404Z","description":"Efficient 8B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"b8dc7f2d-95d6-48ae-a076-a99e76b76e1f","name":"deepseek/deepseek-r1-distill-llama-8b:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":90000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9093169346,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-04-14T09:05:26.354374Z"},{"created_at":"2025-04-04T15:51:25.414165Z","description":"Highly efficient multimodal model with vision and chat capabilities supporting up to 128k context window.","has_eula":false,"id":"efcf0b60-999a-4c1e-981e-b68a428c4702","name":"mistral/mistral-small-3.1-24b-instruct-2503:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":75000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":96077777613,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-05-09T13:51:56.986698Z"},{"created_at":"2025-04-04T15:51:27.773573Z","description":"Highly efficient multimodal model with vision and chat capabilities supporting up to 128k context window.","has_eula":false,"id":"906c0feb-0eb0-4037-94aa-afd4d845b94f","name":"mistral/mistral-small-3.1-24b-instruct-2503:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":96077777613,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-04-08T14:26:24.388332Z"},{"created_at":"2025-03-27T16:47:41.108667Z","description":"Efficient 70B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"014919c1-00cc-43c2-98f2-4ffd263e6f33","name":"deepseek/deepseek-r1-distill-llama-70b:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":56960,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":141117442445,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:48.796286Z"},{"created_at":"2025-03-27T16:47:42.762505Z","description":"Efficient 70B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"bbfeeb62-2428-415d-ad0d-537af9aff946","name":"deepseek/deepseek-r1-distill-llama-70b:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72679175005,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-06T15:17:35.683881Z"},{"created_at":"2025-03-27T16:48:40.045689Z","description":"Efficient 8B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","name":"meta/llama-3.1-8b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":93000,"quantization_bits":8},{"allowed":true,"max_context_size":40000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":32132582323,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:53.288962Z"},{"created_at":"2025-03-27T16:50:12.267422Z","description":"Highly advanced coding model with a 128k context window, excelling in code generation, repairing, and reasoning.","has_eula":false,"id":"a3205fd3-ac4a-47cf-9074-82166d214bac","name":"qwen/qwen2.5-coder-32b-instruct:int8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":35080374444,"status":"ready","tags":["instruct","chat","code","featured"],"updated_at":"2025-05-09T13:52:04.105122Z"},{"created_at":"2025-03-27T16:49:51.968791Z","description":"A large language model customized by NVIDIA in order to improve the helpfulness of generated responses.","has_eula":true,"id":"4e6c9cea-57a1-4215-8a11-24ab51b9d1c8","name":"nvidia/llama-3.1-nemotron-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72679219797,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:01.331740Z"},{"created_at":"2025-05-13T12:13:50.994Z","description":"Best-in-class vision language model by research lab Allen Institute for AI. Available under the Apache 2.0 license.","has_eula":false,"id":"864e7786-4b86-4f4b-8534-25da1fc46a74","name":"allenai/molmo-72b-0924:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":45000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":293245208984,"status":"ready","tags":["instruct","chat","vision"],"updated_at":"2025-05-13T13:34:01.318606Z"},{"created_at":"2025-03-27T16:49:37.342054Z","description":"Efficient 8B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"775cbef7-6527-415d-9e6b-39d574cf39ec","name":"meta/llama-3.1-8b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":93000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9090504772,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:00.700210Z"},{"created_at":"2025-03-27T16:48:15.818596Z","description":"First generation of 8B-param model by Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"bc10c88e-4d18-4854-8250-77aff4763eca","name":"meta/llama-3-8b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":32132572668,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:51.995701Z"},{"created_at":"2025-03-27T16:49:33.359621Z","description":"First generation of 8B-param model by Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"b5a94646-9390-4ced-acba-9b078e63a794","name":"meta/llama-3-8b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9090489355,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:59.473065Z"},{"created_at":"2025-03-27T16:48:42.138410Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"126ad0c4-cfde-4b05-924f-f04c6343ccb2","name":"meta/llama-3.3-70b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":60000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":282254830887,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:53.868968Z"},{"created_at":"2025-03-27T16:50:09.605796Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"1678195b-5af6-4c27-8fdc-16aa84c68c34","name":"meta/llama-3.3-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72687332869,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-07T10:19:23.153808Z"},{"created_at":"2025-03-27T16:48:35.312110Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"7cbe0417-172a-4601-8940-3b71e4d0c8cb","name":"meta/llama-3.1-70b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":60000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":282246710880,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:52.677798Z"},{"created_at":"2025-03-27T16:49:35.836269Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"03150ad5-de83-4c74-afe0-3eeeb67d71a3","name":"meta/llama-3.1-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72665889083,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:52:00.003235Z"},{"created_at":"2025-03-27T16:49:31.715567Z","description":"First generation of 70B-param model from Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"b0c5a8fe-5c9e-49cc-942a-6c4ebaadde67","name":"meta/llama-3-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72665872089,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:58.899458Z"},{"created_at":"2025-03-27T16:49:17.458153Z","description":"A state-of-the-art 24B model with a 32k context window, designed for multilingual chat and agentic applications.","has_eula":false,"id":"1e555754-47fb-4dba-a82c-66f3f1fa9294","name":"mistral/mistral-small-24b-instruct-2501:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":94321843451,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:55.176379Z"},{"created_at":"2025-03-27T16:50:07.300436Z","description":"A state-of-the-art 24B model with a 32k context window, designed for multilingual chat and agentic applications.","has_eula":false,"id":"7bb28f2c-3719-4d71-9bcb-17db392a7118","name":"mistral/mistral-small-24b-instruct-2501:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":20000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":24938988520,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:55.726891Z"},{"created_at":"2025-04-15T10:51:31.291792Z","description":"Vision language model able to analyze images and offer insights without compromising on instruction following.","has_eula":false,"id":"1999f4f5-f038-4039-94ba-11a851917df5","name":"mistral/pixtral-12b-2409:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":50000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":25384844091,"status":"ready","tags":["vision","chat","featured"],"updated_at":"2025-05-09T13:51:58.281971Z"},{"created_at":"2025-03-27T16:49:14.593008Z","description":"A very efficient language model by Mistral AI, optimized for instruction-following tasks. Available under the Apache 2.0 license.","has_eula":false,"id":"bf6be106-c53d-4b93-bb33-1a4bd4d0b573","name":"mistral/mistral-7b-instruct-v0.3:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":28995471292,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:54.595513Z"},{"created_at":"2025-03-27T16:50:06.301430Z","description":"A state-of-the-art 12B model with a 128k context window, designed for multilingual chat applications.","has_eula":false,"id":"07681325-c743-4796-8b7d-1f0b35d4a8e0","name":"mistral/mistral-nemo-instruct-2407:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":13605604415,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-06T15:17:43.837103Z"},{"created_at":"2025-03-27T16:50:08.291821Z","description":"A high-quality Mixture of Experts (MoE) model with open weights by Mistral AI, licensed under Apache 2.0.","has_eula":false,"id":"1aa87d1e-9996-4c54-aa1c-5b900bf59fd4","name":"mistral/mixtral-8x7b-instruct-v0.1:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":46970879717,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:02.960404Z"},{"created_at":"2025-03-27T16:49:19.120192Z","description":"A high-quality Mixture of Experts (MoE) model with open weights by Mistral AI, licensed under Apache 2.0.","has_eula":false,"id":"11ed6599-f460-4e41-b266-87bc9a108fdd","name":"mistral/mixtral-8x7b-instruct-v0.1:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":190483875108,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:57.661626Z"},{"created_at":"2025-03-27T16:46:54.314987Z","description":"An embedding model spanning a broad range of languages and state-of-the-art results on multilingual benchmarks.","has_eula":true,"id":"d58efec4-b667-48e2-8ad8-bcc26c175ae6","name":"baai/bge-multilingual-gemma2:fp32","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]}]}],"parameter_size_bits":32,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":36989461520,"status":"ready","tags":["embedding","featured"],"updated_at":"2025-03-27T17:40:09.534954Z"}],"total_count":30}' + headers: + Content-Length: + - "51893" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Thu, 15 May 2025 08:47:40 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge01) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - 47b91eb7-32cf-4330-8b48-91969f5ec313 + status: 200 OK + code: 200 + duration: 195.04675ms + - id: 12 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models/929553e1-1b34-45d7-8a67-67d1e7147ef6 + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 1594 + uncompressed: false + body: '{"created_at":"2025-05-15T08:42:37.110629Z","description":"","has_eula":false,"id":"929553e1-1b34-45d7-8a67-67d1e7147ef6","name":"TestAccDataSourceModel_Custom","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725385,"status":"ready","tags":["custom"],"updated_at":null}' + headers: + Content-Length: + - "1594" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Thu, 15 May 2025 08:47:40 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge01) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - a195b5ff-28e6-4e2f-be18-4d98c139bc33 + status: 200 OK + code: 200 + duration: 39.400333ms + - id: 13 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models?order_by=display_rank_asc&page_size=1000 + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 51893 + uncompressed: false + body: '{"models":[{"created_at":"2025-04-04T13:11:00.900800Z","description":"Multimodal model for text generation an image understanding supporting up to 128k context window.","has_eula":false,"id":"5c40e594-d40d-452a-991e-5082225155e1","name":"google/gemma-3-27b-it:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":80000,"quantization_bits":8},{"allowed":true,"max_context_size":40000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":54904369444,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-05-09T16:45:10.128397Z"},{"created_at":"2025-04-28T18:48:01.860457Z","description":"","has_eula":false,"id":"a19296a6-4cef-447a-99bc-8f6c3ee30df4","name":"TestAccCustomModel_Basic","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725346,"status":"ready","tags":["custom"],"updated_at":null},{"created_at":"2025-04-30T13:29:24.004776Z","description":"","has_eula":false,"id":"eabb7f74-24a1-4173-911b-26924c1be619","name":"TestAccCustomModel_DeployModelOnServer","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725346,"status":"ready","tags":["custom"],"updated_at":null},{"created_at":"2025-05-15T08:42:37.110629Z","description":"","has_eula":false,"id":"929553e1-1b34-45d7-8a67-67d1e7147ef6","name":"TestAccDataSourceModel_Custom","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725385,"status":"ready","tags":["custom"],"updated_at":null},{"created_at":"2025-03-27T16:48:11.513249Z","description":"Efficient 8B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"a51ce791-9546-4c28-aa44-24850d84778b","name":"deepseek/deepseek-r1-distill-llama-8b:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":90000,"quantization_bits":8},{"allowed":true,"max_context_size":39000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":16070465043,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:49.797687Z"},{"created_at":"2025-03-27T16:48:14.190404Z","description":"Efficient 8B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"b8dc7f2d-95d6-48ae-a076-a99e76b76e1f","name":"deepseek/deepseek-r1-distill-llama-8b:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":90000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9093169346,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-04-14T09:05:26.354374Z"},{"created_at":"2025-04-04T15:51:25.414165Z","description":"Highly efficient multimodal model with vision and chat capabilities supporting up to 128k context window.","has_eula":false,"id":"efcf0b60-999a-4c1e-981e-b68a428c4702","name":"mistral/mistral-small-3.1-24b-instruct-2503:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":75000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":96077777613,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-05-09T13:51:56.986698Z"},{"created_at":"2025-04-04T15:51:27.773573Z","description":"Highly efficient multimodal model with vision and chat capabilities supporting up to 128k context window.","has_eula":false,"id":"906c0feb-0eb0-4037-94aa-afd4d845b94f","name":"mistral/mistral-small-3.1-24b-instruct-2503:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":96077777613,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-04-08T14:26:24.388332Z"},{"created_at":"2025-03-27T16:47:41.108667Z","description":"Efficient 70B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"014919c1-00cc-43c2-98f2-4ffd263e6f33","name":"deepseek/deepseek-r1-distill-llama-70b:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":56960,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":141117442445,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:48.796286Z"},{"created_at":"2025-03-27T16:47:42.762505Z","description":"Efficient 70B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"bbfeeb62-2428-415d-ad0d-537af9aff946","name":"deepseek/deepseek-r1-distill-llama-70b:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72679175005,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-06T15:17:35.683881Z"},{"created_at":"2025-03-27T16:48:40.045689Z","description":"Efficient 8B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","name":"meta/llama-3.1-8b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":93000,"quantization_bits":8},{"allowed":true,"max_context_size":40000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":32132582323,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:53.288962Z"},{"created_at":"2025-03-27T16:50:12.267422Z","description":"Highly advanced coding model with a 128k context window, excelling in code generation, repairing, and reasoning.","has_eula":false,"id":"a3205fd3-ac4a-47cf-9074-82166d214bac","name":"qwen/qwen2.5-coder-32b-instruct:int8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":35080374444,"status":"ready","tags":["instruct","chat","code","featured"],"updated_at":"2025-05-09T13:52:04.105122Z"},{"created_at":"2025-03-27T16:49:51.968791Z","description":"A large language model customized by NVIDIA in order to improve the helpfulness of generated responses.","has_eula":true,"id":"4e6c9cea-57a1-4215-8a11-24ab51b9d1c8","name":"nvidia/llama-3.1-nemotron-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72679219797,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:01.331740Z"},{"created_at":"2025-05-13T12:13:50.994Z","description":"Best-in-class vision language model by research lab Allen Institute for AI. Available under the Apache 2.0 license.","has_eula":false,"id":"864e7786-4b86-4f4b-8534-25da1fc46a74","name":"allenai/molmo-72b-0924:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":45000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":293245208984,"status":"ready","tags":["instruct","chat","vision"],"updated_at":"2025-05-13T13:34:01.318606Z"},{"created_at":"2025-03-27T16:49:37.342054Z","description":"Efficient 8B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"775cbef7-6527-415d-9e6b-39d574cf39ec","name":"meta/llama-3.1-8b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":93000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9090504772,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:00.700210Z"},{"created_at":"2025-03-27T16:48:15.818596Z","description":"First generation of 8B-param model by Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"bc10c88e-4d18-4854-8250-77aff4763eca","name":"meta/llama-3-8b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":32132572668,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:51.995701Z"},{"created_at":"2025-03-27T16:49:33.359621Z","description":"First generation of 8B-param model by Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"b5a94646-9390-4ced-acba-9b078e63a794","name":"meta/llama-3-8b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9090489355,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:59.473065Z"},{"created_at":"2025-03-27T16:48:42.138410Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"126ad0c4-cfde-4b05-924f-f04c6343ccb2","name":"meta/llama-3.3-70b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":60000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":282254830887,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:53.868968Z"},{"created_at":"2025-03-27T16:50:09.605796Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"1678195b-5af6-4c27-8fdc-16aa84c68c34","name":"meta/llama-3.3-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72687332869,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-07T10:19:23.153808Z"},{"created_at":"2025-03-27T16:48:35.312110Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"7cbe0417-172a-4601-8940-3b71e4d0c8cb","name":"meta/llama-3.1-70b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":60000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":282246710880,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:52.677798Z"},{"created_at":"2025-03-27T16:49:35.836269Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"03150ad5-de83-4c74-afe0-3eeeb67d71a3","name":"meta/llama-3.1-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72665889083,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:52:00.003235Z"},{"created_at":"2025-03-27T16:49:31.715567Z","description":"First generation of 70B-param model from Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"b0c5a8fe-5c9e-49cc-942a-6c4ebaadde67","name":"meta/llama-3-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72665872089,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:58.899458Z"},{"created_at":"2025-03-27T16:49:17.458153Z","description":"A state-of-the-art 24B model with a 32k context window, designed for multilingual chat and agentic applications.","has_eula":false,"id":"1e555754-47fb-4dba-a82c-66f3f1fa9294","name":"mistral/mistral-small-24b-instruct-2501:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":94321843451,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:55.176379Z"},{"created_at":"2025-03-27T16:50:07.300436Z","description":"A state-of-the-art 24B model with a 32k context window, designed for multilingual chat and agentic applications.","has_eula":false,"id":"7bb28f2c-3719-4d71-9bcb-17db392a7118","name":"mistral/mistral-small-24b-instruct-2501:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":20000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":24938988520,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:55.726891Z"},{"created_at":"2025-04-15T10:51:31.291792Z","description":"Vision language model able to analyze images and offer insights without compromising on instruction following.","has_eula":false,"id":"1999f4f5-f038-4039-94ba-11a851917df5","name":"mistral/pixtral-12b-2409:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":50000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":25384844091,"status":"ready","tags":["vision","chat","featured"],"updated_at":"2025-05-09T13:51:58.281971Z"},{"created_at":"2025-03-27T16:49:14.593008Z","description":"A very efficient language model by Mistral AI, optimized for instruction-following tasks. Available under the Apache 2.0 license.","has_eula":false,"id":"bf6be106-c53d-4b93-bb33-1a4bd4d0b573","name":"mistral/mistral-7b-instruct-v0.3:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":28995471292,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:54.595513Z"},{"created_at":"2025-03-27T16:50:06.301430Z","description":"A state-of-the-art 12B model with a 128k context window, designed for multilingual chat applications.","has_eula":false,"id":"07681325-c743-4796-8b7d-1f0b35d4a8e0","name":"mistral/mistral-nemo-instruct-2407:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":13605604415,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-06T15:17:43.837103Z"},{"created_at":"2025-03-27T16:50:08.291821Z","description":"A high-quality Mixture of Experts (MoE) model with open weights by Mistral AI, licensed under Apache 2.0.","has_eula":false,"id":"1aa87d1e-9996-4c54-aa1c-5b900bf59fd4","name":"mistral/mixtral-8x7b-instruct-v0.1:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":46970879717,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:02.960404Z"},{"created_at":"2025-03-27T16:49:19.120192Z","description":"A high-quality Mixture of Experts (MoE) model with open weights by Mistral AI, licensed under Apache 2.0.","has_eula":false,"id":"11ed6599-f460-4e41-b266-87bc9a108fdd","name":"mistral/mixtral-8x7b-instruct-v0.1:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":190483875108,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:57.661626Z"},{"created_at":"2025-03-27T16:46:54.314987Z","description":"An embedding model spanning a broad range of languages and state-of-the-art results on multilingual benchmarks.","has_eula":true,"id":"d58efec4-b667-48e2-8ad8-bcc26c175ae6","name":"baai/bge-multilingual-gemma2:fp32","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]}]}],"parameter_size_bits":32,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":36989461520,"status":"ready","tags":["embedding","featured"],"updated_at":"2025-03-27T17:40:09.534954Z"}],"total_count":30}' + headers: + Content-Length: + - "51893" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Thu, 15 May 2025 08:47:40 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge01) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - 81e6f7ed-4892-4798-9853-26382f970777 + status: 200 OK + code: 200 + duration: 214.294875ms + - id: 14 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models/929553e1-1b34-45d7-8a67-67d1e7147ef6 + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 1594 + uncompressed: false + body: '{"created_at":"2025-05-15T08:42:37.110629Z","description":"","has_eula":false,"id":"929553e1-1b34-45d7-8a67-67d1e7147ef6","name":"TestAccDataSourceModel_Custom","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725385,"status":"ready","tags":["custom"],"updated_at":null}' + headers: + Content-Length: + - "1594" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Thu, 15 May 2025 08:47:40 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge01) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - 6edfb21d-e561-474b-b5b0-f713d599bb55 + status: 200 OK + code: 200 + duration: 37.8305ms + - id: 15 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models/929553e1-1b34-45d7-8a67-67d1e7147ef6 + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 1594 + uncompressed: false + body: '{"created_at":"2025-05-15T08:42:37.110629Z","description":"","has_eula":false,"id":"929553e1-1b34-45d7-8a67-67d1e7147ef6","name":"TestAccDataSourceModel_Custom","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725385,"status":"ready","tags":["custom"],"updated_at":null}' + headers: + Content-Length: + - "1594" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Thu, 15 May 2025 08:47:41 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge01) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - 7c96d410-ab5d-481d-93b2-0d380694c8cb + status: 200 OK + code: 200 + duration: 48.426208ms + - id: 16 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models?order_by=display_rank_asc&page_size=1000 + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 51893 + uncompressed: false + body: '{"models":[{"created_at":"2025-04-04T13:11:00.900800Z","description":"Multimodal model for text generation an image understanding supporting up to 128k context window.","has_eula":false,"id":"5c40e594-d40d-452a-991e-5082225155e1","name":"google/gemma-3-27b-it:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":80000,"quantization_bits":8},{"allowed":true,"max_context_size":40000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":54904369444,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-05-09T16:45:10.128397Z"},{"created_at":"2025-04-28T18:48:01.860457Z","description":"","has_eula":false,"id":"a19296a6-4cef-447a-99bc-8f6c3ee30df4","name":"TestAccCustomModel_Basic","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725346,"status":"ready","tags":["custom"],"updated_at":null},{"created_at":"2025-04-30T13:29:24.004776Z","description":"","has_eula":false,"id":"eabb7f74-24a1-4173-911b-26924c1be619","name":"TestAccCustomModel_DeployModelOnServer","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725346,"status":"ready","tags":["custom"],"updated_at":null},{"created_at":"2025-05-15T08:42:37.110629Z","description":"","has_eula":false,"id":"929553e1-1b34-45d7-8a67-67d1e7147ef6","name":"TestAccDataSourceModel_Custom","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725385,"status":"ready","tags":["custom"],"updated_at":null},{"created_at":"2025-03-27T16:48:11.513249Z","description":"Efficient 8B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"a51ce791-9546-4c28-aa44-24850d84778b","name":"deepseek/deepseek-r1-distill-llama-8b:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":90000,"quantization_bits":8},{"allowed":true,"max_context_size":39000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":16070465043,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:49.797687Z"},{"created_at":"2025-03-27T16:48:14.190404Z","description":"Efficient 8B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"b8dc7f2d-95d6-48ae-a076-a99e76b76e1f","name":"deepseek/deepseek-r1-distill-llama-8b:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":90000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9093169346,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-04-14T09:05:26.354374Z"},{"created_at":"2025-04-04T15:51:25.414165Z","description":"Highly efficient multimodal model with vision and chat capabilities supporting up to 128k context window.","has_eula":false,"id":"efcf0b60-999a-4c1e-981e-b68a428c4702","name":"mistral/mistral-small-3.1-24b-instruct-2503:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":75000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":96077777613,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-05-09T13:51:56.986698Z"},{"created_at":"2025-04-04T15:51:27.773573Z","description":"Highly efficient multimodal model with vision and chat capabilities supporting up to 128k context window.","has_eula":false,"id":"906c0feb-0eb0-4037-94aa-afd4d845b94f","name":"mistral/mistral-small-3.1-24b-instruct-2503:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":96077777613,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-04-08T14:26:24.388332Z"},{"created_at":"2025-03-27T16:47:41.108667Z","description":"Efficient 70B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"014919c1-00cc-43c2-98f2-4ffd263e6f33","name":"deepseek/deepseek-r1-distill-llama-70b:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":56960,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":141117442445,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:48.796286Z"},{"created_at":"2025-03-27T16:47:42.762505Z","description":"Efficient 70B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"bbfeeb62-2428-415d-ad0d-537af9aff946","name":"deepseek/deepseek-r1-distill-llama-70b:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72679175005,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-06T15:17:35.683881Z"},{"created_at":"2025-03-27T16:48:40.045689Z","description":"Efficient 8B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","name":"meta/llama-3.1-8b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":93000,"quantization_bits":8},{"allowed":true,"max_context_size":40000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":32132582323,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:53.288962Z"},{"created_at":"2025-03-27T16:50:12.267422Z","description":"Highly advanced coding model with a 128k context window, excelling in code generation, repairing, and reasoning.","has_eula":false,"id":"a3205fd3-ac4a-47cf-9074-82166d214bac","name":"qwen/qwen2.5-coder-32b-instruct:int8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":35080374444,"status":"ready","tags":["instruct","chat","code","featured"],"updated_at":"2025-05-09T13:52:04.105122Z"},{"created_at":"2025-03-27T16:49:51.968791Z","description":"A large language model customized by NVIDIA in order to improve the helpfulness of generated responses.","has_eula":true,"id":"4e6c9cea-57a1-4215-8a11-24ab51b9d1c8","name":"nvidia/llama-3.1-nemotron-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72679219797,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:01.331740Z"},{"created_at":"2025-05-13T12:13:50.994Z","description":"Best-in-class vision language model by research lab Allen Institute for AI. Available under the Apache 2.0 license.","has_eula":false,"id":"864e7786-4b86-4f4b-8534-25da1fc46a74","name":"allenai/molmo-72b-0924:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":45000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":293245208984,"status":"ready","tags":["instruct","chat","vision"],"updated_at":"2025-05-13T13:34:01.318606Z"},{"created_at":"2025-03-27T16:49:37.342054Z","description":"Efficient 8B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"775cbef7-6527-415d-9e6b-39d574cf39ec","name":"meta/llama-3.1-8b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":93000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9090504772,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:00.700210Z"},{"created_at":"2025-03-27T16:48:15.818596Z","description":"First generation of 8B-param model by Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"bc10c88e-4d18-4854-8250-77aff4763eca","name":"meta/llama-3-8b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":32132572668,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:51.995701Z"},{"created_at":"2025-03-27T16:49:33.359621Z","description":"First generation of 8B-param model by Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"b5a94646-9390-4ced-acba-9b078e63a794","name":"meta/llama-3-8b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9090489355,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:59.473065Z"},{"created_at":"2025-03-27T16:48:42.138410Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"126ad0c4-cfde-4b05-924f-f04c6343ccb2","name":"meta/llama-3.3-70b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":60000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":282254830887,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:53.868968Z"},{"created_at":"2025-03-27T16:50:09.605796Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"1678195b-5af6-4c27-8fdc-16aa84c68c34","name":"meta/llama-3.3-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72687332869,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-07T10:19:23.153808Z"},{"created_at":"2025-03-27T16:48:35.312110Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"7cbe0417-172a-4601-8940-3b71e4d0c8cb","name":"meta/llama-3.1-70b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":60000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":282246710880,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:52.677798Z"},{"created_at":"2025-03-27T16:49:35.836269Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"03150ad5-de83-4c74-afe0-3eeeb67d71a3","name":"meta/llama-3.1-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72665889083,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:52:00.003235Z"},{"created_at":"2025-03-27T16:49:31.715567Z","description":"First generation of 70B-param model from Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"b0c5a8fe-5c9e-49cc-942a-6c4ebaadde67","name":"meta/llama-3-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72665872089,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:58.899458Z"},{"created_at":"2025-03-27T16:49:17.458153Z","description":"A state-of-the-art 24B model with a 32k context window, designed for multilingual chat and agentic applications.","has_eula":false,"id":"1e555754-47fb-4dba-a82c-66f3f1fa9294","name":"mistral/mistral-small-24b-instruct-2501:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":94321843451,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:55.176379Z"},{"created_at":"2025-03-27T16:50:07.300436Z","description":"A state-of-the-art 24B model with a 32k context window, designed for multilingual chat and agentic applications.","has_eula":false,"id":"7bb28f2c-3719-4d71-9bcb-17db392a7118","name":"mistral/mistral-small-24b-instruct-2501:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":20000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":24938988520,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:55.726891Z"},{"created_at":"2025-04-15T10:51:31.291792Z","description":"Vision language model able to analyze images and offer insights without compromising on instruction following.","has_eula":false,"id":"1999f4f5-f038-4039-94ba-11a851917df5","name":"mistral/pixtral-12b-2409:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":50000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":25384844091,"status":"ready","tags":["vision","chat","featured"],"updated_at":"2025-05-09T13:51:58.281971Z"},{"created_at":"2025-03-27T16:49:14.593008Z","description":"A very efficient language model by Mistral AI, optimized for instruction-following tasks. Available under the Apache 2.0 license.","has_eula":false,"id":"bf6be106-c53d-4b93-bb33-1a4bd4d0b573","name":"mistral/mistral-7b-instruct-v0.3:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":28995471292,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:54.595513Z"},{"created_at":"2025-03-27T16:50:06.301430Z","description":"A state-of-the-art 12B model with a 128k context window, designed for multilingual chat applications.","has_eula":false,"id":"07681325-c743-4796-8b7d-1f0b35d4a8e0","name":"mistral/mistral-nemo-instruct-2407:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":13605604415,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-06T15:17:43.837103Z"},{"created_at":"2025-03-27T16:50:08.291821Z","description":"A high-quality Mixture of Experts (MoE) model with open weights by Mistral AI, licensed under Apache 2.0.","has_eula":false,"id":"1aa87d1e-9996-4c54-aa1c-5b900bf59fd4","name":"mistral/mixtral-8x7b-instruct-v0.1:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":46970879717,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:02.960404Z"},{"created_at":"2025-03-27T16:49:19.120192Z","description":"A high-quality Mixture of Experts (MoE) model with open weights by Mistral AI, licensed under Apache 2.0.","has_eula":false,"id":"11ed6599-f460-4e41-b266-87bc9a108fdd","name":"mistral/mixtral-8x7b-instruct-v0.1:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":190483875108,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:57.661626Z"},{"created_at":"2025-03-27T16:46:54.314987Z","description":"An embedding model spanning a broad range of languages and state-of-the-art results on multilingual benchmarks.","has_eula":true,"id":"d58efec4-b667-48e2-8ad8-bcc26c175ae6","name":"baai/bge-multilingual-gemma2:fp32","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]}]}],"parameter_size_bits":32,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":36989461520,"status":"ready","tags":["embedding","featured"],"updated_at":"2025-03-27T17:40:09.534954Z"}],"total_count":30}' + headers: + Content-Length: + - "51893" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Thu, 15 May 2025 08:47:41 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge01) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - 18b9df43-a02d-4f0b-b50b-9b14f59ee01b + status: 200 OK + code: 200 + duration: 182.948833ms + - id: 17 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models/929553e1-1b34-45d7-8a67-67d1e7147ef6 + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 1594 + uncompressed: false + body: '{"created_at":"2025-05-15T08:42:37.110629Z","description":"","has_eula":false,"id":"929553e1-1b34-45d7-8a67-67d1e7147ef6","name":"TestAccDataSourceModel_Custom","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725385,"status":"ready","tags":["custom"],"updated_at":null}' + headers: + Content-Length: + - "1594" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Thu, 15 May 2025 08:47:41 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge01) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - 045e110a-bd86-4378-b0d8-32961b8624ea + status: 200 OK + code: 200 + duration: 48.260083ms + - id: 18 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models/929553e1-1b34-45d7-8a67-67d1e7147ef6 + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 1594 + uncompressed: false + body: '{"created_at":"2025-05-15T08:42:37.110629Z","description":"","has_eula":false,"id":"929553e1-1b34-45d7-8a67-67d1e7147ef6","name":"TestAccDataSourceModel_Custom","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725385,"status":"ready","tags":["custom"],"updated_at":null}' + headers: + Content-Length: + - "1594" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Thu, 15 May 2025 08:47:42 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge01) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - 16287391-d324-473b-abfc-d7a06638db1b + status: 200 OK + code: 200 + duration: 64.98525ms + - id: 19 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models?order_by=display_rank_asc&page_size=1000 + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 51893 + uncompressed: false + body: '{"models":[{"created_at":"2025-04-04T13:11:00.900800Z","description":"Multimodal model for text generation an image understanding supporting up to 128k context window.","has_eula":false,"id":"5c40e594-d40d-452a-991e-5082225155e1","name":"google/gemma-3-27b-it:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":80000,"quantization_bits":8},{"allowed":true,"max_context_size":40000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":54904369444,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-05-09T16:45:10.128397Z"},{"created_at":"2025-04-28T18:48:01.860457Z","description":"","has_eula":false,"id":"a19296a6-4cef-447a-99bc-8f6c3ee30df4","name":"TestAccCustomModel_Basic","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725346,"status":"ready","tags":["custom"],"updated_at":null},{"created_at":"2025-04-30T13:29:24.004776Z","description":"","has_eula":false,"id":"eabb7f74-24a1-4173-911b-26924c1be619","name":"TestAccCustomModel_DeployModelOnServer","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725346,"status":"ready","tags":["custom"],"updated_at":null},{"created_at":"2025-05-15T08:42:37.110629Z","description":"","has_eula":false,"id":"929553e1-1b34-45d7-8a67-67d1e7147ef6","name":"TestAccDataSourceModel_Custom","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725385,"status":"ready","tags":["custom"],"updated_at":null},{"created_at":"2025-03-27T16:48:11.513249Z","description":"Efficient 8B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"a51ce791-9546-4c28-aa44-24850d84778b","name":"deepseek/deepseek-r1-distill-llama-8b:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":90000,"quantization_bits":8},{"allowed":true,"max_context_size":39000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":16070465043,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:49.797687Z"},{"created_at":"2025-03-27T16:48:14.190404Z","description":"Efficient 8B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"b8dc7f2d-95d6-48ae-a076-a99e76b76e1f","name":"deepseek/deepseek-r1-distill-llama-8b:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":90000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9093169346,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-04-14T09:05:26.354374Z"},{"created_at":"2025-04-04T15:51:25.414165Z","description":"Highly efficient multimodal model with vision and chat capabilities supporting up to 128k context window.","has_eula":false,"id":"efcf0b60-999a-4c1e-981e-b68a428c4702","name":"mistral/mistral-small-3.1-24b-instruct-2503:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":75000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":96077777613,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-05-09T13:51:56.986698Z"},{"created_at":"2025-04-04T15:51:27.773573Z","description":"Highly efficient multimodal model with vision and chat capabilities supporting up to 128k context window.","has_eula":false,"id":"906c0feb-0eb0-4037-94aa-afd4d845b94f","name":"mistral/mistral-small-3.1-24b-instruct-2503:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":96077777613,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-04-08T14:26:24.388332Z"},{"created_at":"2025-03-27T16:47:41.108667Z","description":"Efficient 70B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"014919c1-00cc-43c2-98f2-4ffd263e6f33","name":"deepseek/deepseek-r1-distill-llama-70b:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":56960,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":141117442445,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:48.796286Z"},{"created_at":"2025-03-27T16:47:42.762505Z","description":"Efficient 70B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"bbfeeb62-2428-415d-ad0d-537af9aff946","name":"deepseek/deepseek-r1-distill-llama-70b:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72679175005,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-06T15:17:35.683881Z"},{"created_at":"2025-03-27T16:48:40.045689Z","description":"Efficient 8B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","name":"meta/llama-3.1-8b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":93000,"quantization_bits":8},{"allowed":true,"max_context_size":40000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":32132582323,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:53.288962Z"},{"created_at":"2025-03-27T16:50:12.267422Z","description":"Highly advanced coding model with a 128k context window, excelling in code generation, repairing, and reasoning.","has_eula":false,"id":"a3205fd3-ac4a-47cf-9074-82166d214bac","name":"qwen/qwen2.5-coder-32b-instruct:int8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":35080374444,"status":"ready","tags":["instruct","chat","code","featured"],"updated_at":"2025-05-09T13:52:04.105122Z"},{"created_at":"2025-03-27T16:49:51.968791Z","description":"A large language model customized by NVIDIA in order to improve the helpfulness of generated responses.","has_eula":true,"id":"4e6c9cea-57a1-4215-8a11-24ab51b9d1c8","name":"nvidia/llama-3.1-nemotron-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72679219797,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:01.331740Z"},{"created_at":"2025-05-13T12:13:50.994Z","description":"Best-in-class vision language model by research lab Allen Institute for AI. Available under the Apache 2.0 license.","has_eula":false,"id":"864e7786-4b86-4f4b-8534-25da1fc46a74","name":"allenai/molmo-72b-0924:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":45000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":293245208984,"status":"ready","tags":["instruct","chat","vision"],"updated_at":"2025-05-13T13:34:01.318606Z"},{"created_at":"2025-03-27T16:49:37.342054Z","description":"Efficient 8B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"775cbef7-6527-415d-9e6b-39d574cf39ec","name":"meta/llama-3.1-8b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":93000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9090504772,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:00.700210Z"},{"created_at":"2025-03-27T16:48:15.818596Z","description":"First generation of 8B-param model by Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"bc10c88e-4d18-4854-8250-77aff4763eca","name":"meta/llama-3-8b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":32132572668,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:51.995701Z"},{"created_at":"2025-03-27T16:49:33.359621Z","description":"First generation of 8B-param model by Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"b5a94646-9390-4ced-acba-9b078e63a794","name":"meta/llama-3-8b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9090489355,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:59.473065Z"},{"created_at":"2025-03-27T16:48:42.138410Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"126ad0c4-cfde-4b05-924f-f04c6343ccb2","name":"meta/llama-3.3-70b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":60000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":282254830887,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:53.868968Z"},{"created_at":"2025-03-27T16:50:09.605796Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"1678195b-5af6-4c27-8fdc-16aa84c68c34","name":"meta/llama-3.3-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72687332869,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-07T10:19:23.153808Z"},{"created_at":"2025-03-27T16:48:35.312110Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"7cbe0417-172a-4601-8940-3b71e4d0c8cb","name":"meta/llama-3.1-70b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":60000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":282246710880,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:52.677798Z"},{"created_at":"2025-03-27T16:49:35.836269Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"03150ad5-de83-4c74-afe0-3eeeb67d71a3","name":"meta/llama-3.1-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72665889083,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:52:00.003235Z"},{"created_at":"2025-03-27T16:49:31.715567Z","description":"First generation of 70B-param model from Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"b0c5a8fe-5c9e-49cc-942a-6c4ebaadde67","name":"meta/llama-3-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72665872089,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:58.899458Z"},{"created_at":"2025-03-27T16:49:17.458153Z","description":"A state-of-the-art 24B model with a 32k context window, designed for multilingual chat and agentic applications.","has_eula":false,"id":"1e555754-47fb-4dba-a82c-66f3f1fa9294","name":"mistral/mistral-small-24b-instruct-2501:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":94321843451,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:55.176379Z"},{"created_at":"2025-03-27T16:50:07.300436Z","description":"A state-of-the-art 24B model with a 32k context window, designed for multilingual chat and agentic applications.","has_eula":false,"id":"7bb28f2c-3719-4d71-9bcb-17db392a7118","name":"mistral/mistral-small-24b-instruct-2501:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":20000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":24938988520,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:55.726891Z"},{"created_at":"2025-04-15T10:51:31.291792Z","description":"Vision language model able to analyze images and offer insights without compromising on instruction following.","has_eula":false,"id":"1999f4f5-f038-4039-94ba-11a851917df5","name":"mistral/pixtral-12b-2409:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":50000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":25384844091,"status":"ready","tags":["vision","chat","featured"],"updated_at":"2025-05-09T13:51:58.281971Z"},{"created_at":"2025-03-27T16:49:14.593008Z","description":"A very efficient language model by Mistral AI, optimized for instruction-following tasks. Available under the Apache 2.0 license.","has_eula":false,"id":"bf6be106-c53d-4b93-bb33-1a4bd4d0b573","name":"mistral/mistral-7b-instruct-v0.3:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":28995471292,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:54.595513Z"},{"created_at":"2025-03-27T16:50:06.301430Z","description":"A state-of-the-art 12B model with a 128k context window, designed for multilingual chat applications.","has_eula":false,"id":"07681325-c743-4796-8b7d-1f0b35d4a8e0","name":"mistral/mistral-nemo-instruct-2407:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":13605604415,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-06T15:17:43.837103Z"},{"created_at":"2025-03-27T16:50:08.291821Z","description":"A high-quality Mixture of Experts (MoE) model with open weights by Mistral AI, licensed under Apache 2.0.","has_eula":false,"id":"1aa87d1e-9996-4c54-aa1c-5b900bf59fd4","name":"mistral/mixtral-8x7b-instruct-v0.1:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":46970879717,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:02.960404Z"},{"created_at":"2025-03-27T16:49:19.120192Z","description":"A high-quality Mixture of Experts (MoE) model with open weights by Mistral AI, licensed under Apache 2.0.","has_eula":false,"id":"11ed6599-f460-4e41-b266-87bc9a108fdd","name":"mistral/mixtral-8x7b-instruct-v0.1:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":190483875108,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:57.661626Z"},{"created_at":"2025-03-27T16:46:54.314987Z","description":"An embedding model spanning a broad range of languages and state-of-the-art results on multilingual benchmarks.","has_eula":true,"id":"d58efec4-b667-48e2-8ad8-bcc26c175ae6","name":"baai/bge-multilingual-gemma2:fp32","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]}]}],"parameter_size_bits":32,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":36989461520,"status":"ready","tags":["embedding","featured"],"updated_at":"2025-03-27T17:40:09.534954Z"}],"total_count":30}' + headers: + Content-Length: + - "51893" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Thu, 15 May 2025 08:47:42 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge01) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - 58c8f99e-95b0-4598-ae22-0f1a28c34fa2 + status: 200 OK + code: 200 + duration: 206.674083ms + - id: 20 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models/929553e1-1b34-45d7-8a67-67d1e7147ef6 + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 1594 + uncompressed: false + body: '{"created_at":"2025-05-15T08:42:37.110629Z","description":"","has_eula":false,"id":"929553e1-1b34-45d7-8a67-67d1e7147ef6","name":"TestAccDataSourceModel_Custom","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725385,"status":"ready","tags":["custom"],"updated_at":null}' + headers: + Content-Length: + - "1594" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Thu, 15 May 2025 08:47:42 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge01) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - ef4b9004-f023-4082-918f-4d40ab102c5d + status: 200 OK + code: 200 + duration: 50.609125ms + - id: 21 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models?order_by=display_rank_asc&page_size=1000 + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 51893 + uncompressed: false + body: '{"models":[{"created_at":"2025-04-04T13:11:00.900800Z","description":"Multimodal model for text generation an image understanding supporting up to 128k context window.","has_eula":false,"id":"5c40e594-d40d-452a-991e-5082225155e1","name":"google/gemma-3-27b-it:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":80000,"quantization_bits":8},{"allowed":true,"max_context_size":40000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":54904369444,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-05-09T16:45:10.128397Z"},{"created_at":"2025-04-28T18:48:01.860457Z","description":"","has_eula":false,"id":"a19296a6-4cef-447a-99bc-8f6c3ee30df4","name":"TestAccCustomModel_Basic","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725346,"status":"ready","tags":["custom"],"updated_at":null},{"created_at":"2025-04-30T13:29:24.004776Z","description":"","has_eula":false,"id":"eabb7f74-24a1-4173-911b-26924c1be619","name":"TestAccCustomModel_DeployModelOnServer","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725346,"status":"ready","tags":["custom"],"updated_at":null},{"created_at":"2025-05-15T08:42:37.110629Z","description":"","has_eula":false,"id":"929553e1-1b34-45d7-8a67-67d1e7147ef6","name":"TestAccDataSourceModel_Custom","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725385,"status":"ready","tags":["custom"],"updated_at":null},{"created_at":"2025-03-27T16:48:11.513249Z","description":"Efficient 8B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"a51ce791-9546-4c28-aa44-24850d84778b","name":"deepseek/deepseek-r1-distill-llama-8b:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":90000,"quantization_bits":8},{"allowed":true,"max_context_size":39000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":16070465043,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:49.797687Z"},{"created_at":"2025-03-27T16:48:14.190404Z","description":"Efficient 8B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"b8dc7f2d-95d6-48ae-a076-a99e76b76e1f","name":"deepseek/deepseek-r1-distill-llama-8b:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":90000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9093169346,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-04-14T09:05:26.354374Z"},{"created_at":"2025-04-04T15:51:25.414165Z","description":"Highly efficient multimodal model with vision and chat capabilities supporting up to 128k context window.","has_eula":false,"id":"efcf0b60-999a-4c1e-981e-b68a428c4702","name":"mistral/mistral-small-3.1-24b-instruct-2503:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":75000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":96077777613,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-05-09T13:51:56.986698Z"},{"created_at":"2025-04-04T15:51:27.773573Z","description":"Highly efficient multimodal model with vision and chat capabilities supporting up to 128k context window.","has_eula":false,"id":"906c0feb-0eb0-4037-94aa-afd4d845b94f","name":"mistral/mistral-small-3.1-24b-instruct-2503:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":96077777613,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-04-08T14:26:24.388332Z"},{"created_at":"2025-03-27T16:47:41.108667Z","description":"Efficient 70B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"014919c1-00cc-43c2-98f2-4ffd263e6f33","name":"deepseek/deepseek-r1-distill-llama-70b:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":56960,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":141117442445,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:48.796286Z"},{"created_at":"2025-03-27T16:47:42.762505Z","description":"Efficient 70B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"bbfeeb62-2428-415d-ad0d-537af9aff946","name":"deepseek/deepseek-r1-distill-llama-70b:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72679175005,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-06T15:17:35.683881Z"},{"created_at":"2025-03-27T16:48:40.045689Z","description":"Efficient 8B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","name":"meta/llama-3.1-8b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":93000,"quantization_bits":8},{"allowed":true,"max_context_size":40000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":32132582323,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:53.288962Z"},{"created_at":"2025-03-27T16:50:12.267422Z","description":"Highly advanced coding model with a 128k context window, excelling in code generation, repairing, and reasoning.","has_eula":false,"id":"a3205fd3-ac4a-47cf-9074-82166d214bac","name":"qwen/qwen2.5-coder-32b-instruct:int8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":35080374444,"status":"ready","tags":["instruct","chat","code","featured"],"updated_at":"2025-05-09T13:52:04.105122Z"},{"created_at":"2025-03-27T16:49:51.968791Z","description":"A large language model customized by NVIDIA in order to improve the helpfulness of generated responses.","has_eula":true,"id":"4e6c9cea-57a1-4215-8a11-24ab51b9d1c8","name":"nvidia/llama-3.1-nemotron-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72679219797,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:01.331740Z"},{"created_at":"2025-05-13T12:13:50.994Z","description":"Best-in-class vision language model by research lab Allen Institute for AI. Available under the Apache 2.0 license.","has_eula":false,"id":"864e7786-4b86-4f4b-8534-25da1fc46a74","name":"allenai/molmo-72b-0924:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":45000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":293245208984,"status":"ready","tags":["instruct","chat","vision"],"updated_at":"2025-05-13T13:34:01.318606Z"},{"created_at":"2025-03-27T16:49:37.342054Z","description":"Efficient 8B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"775cbef7-6527-415d-9e6b-39d574cf39ec","name":"meta/llama-3.1-8b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":93000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9090504772,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:00.700210Z"},{"created_at":"2025-03-27T16:48:15.818596Z","description":"First generation of 8B-param model by Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"bc10c88e-4d18-4854-8250-77aff4763eca","name":"meta/llama-3-8b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":32132572668,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:51.995701Z"},{"created_at":"2025-03-27T16:49:33.359621Z","description":"First generation of 8B-param model by Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"b5a94646-9390-4ced-acba-9b078e63a794","name":"meta/llama-3-8b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9090489355,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:59.473065Z"},{"created_at":"2025-03-27T16:48:42.138410Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"126ad0c4-cfde-4b05-924f-f04c6343ccb2","name":"meta/llama-3.3-70b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":60000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":282254830887,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:53.868968Z"},{"created_at":"2025-03-27T16:50:09.605796Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"1678195b-5af6-4c27-8fdc-16aa84c68c34","name":"meta/llama-3.3-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72687332869,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-07T10:19:23.153808Z"},{"created_at":"2025-03-27T16:48:35.312110Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"7cbe0417-172a-4601-8940-3b71e4d0c8cb","name":"meta/llama-3.1-70b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":60000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":282246710880,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:52.677798Z"},{"created_at":"2025-03-27T16:49:35.836269Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"03150ad5-de83-4c74-afe0-3eeeb67d71a3","name":"meta/llama-3.1-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72665889083,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:52:00.003235Z"},{"created_at":"2025-03-27T16:49:31.715567Z","description":"First generation of 70B-param model from Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"b0c5a8fe-5c9e-49cc-942a-6c4ebaadde67","name":"meta/llama-3-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72665872089,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:58.899458Z"},{"created_at":"2025-03-27T16:49:17.458153Z","description":"A state-of-the-art 24B model with a 32k context window, designed for multilingual chat and agentic applications.","has_eula":false,"id":"1e555754-47fb-4dba-a82c-66f3f1fa9294","name":"mistral/mistral-small-24b-instruct-2501:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":94321843451,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:55.176379Z"},{"created_at":"2025-03-27T16:50:07.300436Z","description":"A state-of-the-art 24B model with a 32k context window, designed for multilingual chat and agentic applications.","has_eula":false,"id":"7bb28f2c-3719-4d71-9bcb-17db392a7118","name":"mistral/mistral-small-24b-instruct-2501:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":20000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":24938988520,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:55.726891Z"},{"created_at":"2025-04-15T10:51:31.291792Z","description":"Vision language model able to analyze images and offer insights without compromising on instruction following.","has_eula":false,"id":"1999f4f5-f038-4039-94ba-11a851917df5","name":"mistral/pixtral-12b-2409:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":50000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":25384844091,"status":"ready","tags":["vision","chat","featured"],"updated_at":"2025-05-09T13:51:58.281971Z"},{"created_at":"2025-03-27T16:49:14.593008Z","description":"A very efficient language model by Mistral AI, optimized for instruction-following tasks. Available under the Apache 2.0 license.","has_eula":false,"id":"bf6be106-c53d-4b93-bb33-1a4bd4d0b573","name":"mistral/mistral-7b-instruct-v0.3:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":28995471292,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:54.595513Z"},{"created_at":"2025-03-27T16:50:06.301430Z","description":"A state-of-the-art 12B model with a 128k context window, designed for multilingual chat applications.","has_eula":false,"id":"07681325-c743-4796-8b7d-1f0b35d4a8e0","name":"mistral/mistral-nemo-instruct-2407:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":13605604415,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-06T15:17:43.837103Z"},{"created_at":"2025-03-27T16:50:08.291821Z","description":"A high-quality Mixture of Experts (MoE) model with open weights by Mistral AI, licensed under Apache 2.0.","has_eula":false,"id":"1aa87d1e-9996-4c54-aa1c-5b900bf59fd4","name":"mistral/mixtral-8x7b-instruct-v0.1:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":46970879717,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:02.960404Z"},{"created_at":"2025-03-27T16:49:19.120192Z","description":"A high-quality Mixture of Experts (MoE) model with open weights by Mistral AI, licensed under Apache 2.0.","has_eula":false,"id":"11ed6599-f460-4e41-b266-87bc9a108fdd","name":"mistral/mixtral-8x7b-instruct-v0.1:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":190483875108,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:57.661626Z"},{"created_at":"2025-03-27T16:46:54.314987Z","description":"An embedding model spanning a broad range of languages and state-of-the-art results on multilingual benchmarks.","has_eula":true,"id":"d58efec4-b667-48e2-8ad8-bcc26c175ae6","name":"baai/bge-multilingual-gemma2:fp32","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]}]}],"parameter_size_bits":32,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":36989461520,"status":"ready","tags":["embedding","featured"],"updated_at":"2025-03-27T17:40:09.534954Z"}],"total_count":30}' + headers: + Content-Length: + - "51893" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Thu, 15 May 2025 08:47:43 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge01) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - 01bbfc81-cd51-4d96-96a8-66345710625e + status: 200 OK + code: 200 + duration: 157.656834ms + - id: 22 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models/929553e1-1b34-45d7-8a67-67d1e7147ef6 + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 1594 + uncompressed: false + body: '{"created_at":"2025-05-15T08:42:37.110629Z","description":"","has_eula":false,"id":"929553e1-1b34-45d7-8a67-67d1e7147ef6","name":"TestAccDataSourceModel_Custom","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725385,"status":"ready","tags":["custom"],"updated_at":null}' + headers: + Content-Length: + - "1594" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Thu, 15 May 2025 08:47:43 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge01) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - c50dd06d-b3ae-4107-be31-404e8c19f068 + status: 200 OK + code: 200 + duration: 40.353708ms + - id: 23 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models/929553e1-1b34-45d7-8a67-67d1e7147ef6 + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 1594 + uncompressed: false + body: '{"created_at":"2025-05-15T08:42:37.110629Z","description":"","has_eula":false,"id":"929553e1-1b34-45d7-8a67-67d1e7147ef6","name":"TestAccDataSourceModel_Custom","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725385,"status":"ready","tags":["custom"],"updated_at":null}' + headers: + Content-Length: + - "1594" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Thu, 15 May 2025 08:47:43 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge01) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - 77ee5ed7-53ff-4c80-9dd8-9805863b8307 + status: 200 OK + code: 200 + duration: 48.764625ms + - id: 24 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models/929553e1-1b34-45d7-8a67-67d1e7147ef6 + method: DELETE + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 0 + uncompressed: false + body: "" + headers: + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Thu, 15 May 2025 08:47:43 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge01) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - 866cc1b1-304f-45f7-92d4-0b0caa5cb1a4 + status: 204 No Content + code: 204 + duration: 157.280542ms + - id: 25 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models/929553e1-1b34-45d7-8a67-67d1e7147ef6 + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 126 + uncompressed: false + body: '{"message":"resource is not found","resource":"Model","resource_id":"929553e1-1b34-45d7-8a67-67d1e7147ef6","type":"not_found"}' + headers: + Content-Length: + - "126" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Thu, 15 May 2025 08:47:44 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge01) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - fe2e437f-6dbe-4977-952c-4431b33aa289 + status: 404 Not Found + code: 404 + duration: 26.940583ms + - id: 26 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models/929553e1-1b34-45d7-8a67-67d1e7147ef6 + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 126 + uncompressed: false + body: '{"message":"resource is not found","resource":"Model","resource_id":"929553e1-1b34-45d7-8a67-67d1e7147ef6","type":"not_found"}' + headers: + Content-Length: + - "126" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Thu, 15 May 2025 08:47:44 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge01) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - 9a8ca6f9-27fd-4f8b-a29c-d0ca2f1838cc + status: 404 Not Found + code: 404 + duration: 24.155459ms diff --git a/internal/services/inference/testdata/deployment-basic.cassette.yaml b/internal/services/inference/testdata/deployment-basic.cassette.yaml index dcc5044625..1790820652 100644 --- a/internal/services/inference/testdata/deployment-basic.cassette.yaml +++ b/internal/services/inference/testdata/deployment-basic.cassette.yaml @@ -6,41 +6,39 @@ interactions: proto: HTTP/1.1 proto_major: 1 proto_minor: 1 - content_length: 235 + content_length: 0 transfer_encoding: [] trailer: {} host: api.scaleway.com remote_addr: "" request_uri: "" - body: '{"name":"test-inference-deployment-basic","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","model_name":"meta/llama-3.1-8b-instruct:fp8","accept_eula":true,"node_type":"L4","tags":[],"endpoints":[{"public":{},"disable_auth":false}]}' + body: "" form: {} headers: - Content-Type: - - application/json User-Agent: - - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.23.0; darwin; arm64) terraform-provider/develop terraform/terraform-tests - url: https://api.scaleway.com/inference/v1beta1/regions/fr-par/deployments - method: POST + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models?order_by=display_rank_asc&page_size=1000 + method: GET response: proto: HTTP/2.0 proto_major: 2 proto_minor: 0 transfer_encoding: [] trailer: {} - content_length: 611 + content_length: 50297 uncompressed: false - body: '{"created_at":"2024-10-24T13:29:02.345060Z","endpoints":[{"disable_auth":false,"id":"323ee315-02b1-4ddd-acb2-c94a4ed78039","public_access":{},"url":"https://20043a05-db6e-4ef5-bec6-40089e6d13d0.ifr.fr-par.scaleway.com"}],"error_message":null,"id":"20043a05-db6e-4ef5-bec6-40089e6d13d0","max_size":1,"min_size":1,"model_id":"d33fb5fd-75ca-4dfb-8952-8af8b8b28be5","model_name":"meta/llama-3.1-8b-instruct:fp8","name":"test-inference-deployment-basic","node_type":"L4","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","region":"fr-par","size":0,"status":"creating","tags":[],"updated_at":null}' + body: '{"models":[{"created_at":"2025-04-04T13:11:00.900800Z","description":"Multimodal model for text generation an image understanding supporting up to 128k context window.","has_eula":false,"id":"5c40e594-d40d-452a-991e-5082225155e1","name":"google/gemma-3-27b-it:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":80000,"quantization_bits":8},{"allowed":true,"max_context_size":40000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":54904369444,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-05-09T16:45:10.128397Z"},{"created_at":"2025-04-28T18:48:01.860457Z","description":"","has_eula":false,"id":"a19296a6-4cef-447a-99bc-8f6c3ee30df4","name":"TestAccCustomModel_Basic","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725346,"status":"ready","tags":["custom"],"updated_at":null},{"created_at":"2025-04-30T13:29:24.004776Z","description":"","has_eula":false,"id":"eabb7f74-24a1-4173-911b-26924c1be619","name":"TestAccCustomModel_DeployModelOnServer","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725346,"status":"ready","tags":["custom"],"updated_at":null},{"created_at":"2025-03-27T16:48:11.513249Z","description":"Efficient 8B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"a51ce791-9546-4c28-aa44-24850d84778b","name":"deepseek/deepseek-r1-distill-llama-8b:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":90000,"quantization_bits":8},{"allowed":true,"max_context_size":39000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":16070465043,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:49.797687Z"},{"created_at":"2025-03-27T16:48:14.190404Z","description":"Efficient 8B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"b8dc7f2d-95d6-48ae-a076-a99e76b76e1f","name":"deepseek/deepseek-r1-distill-llama-8b:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":90000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9093169346,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-04-14T09:05:26.354374Z"},{"created_at":"2025-04-04T15:51:25.414165Z","description":"Highly efficient multimodal model with vision and chat capabilities supporting up to 128k context window.","has_eula":false,"id":"efcf0b60-999a-4c1e-981e-b68a428c4702","name":"mistral/mistral-small-3.1-24b-instruct-2503:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":75000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":96077777613,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-05-09T13:51:56.986698Z"},{"created_at":"2025-04-04T15:51:27.773573Z","description":"Highly efficient multimodal model with vision and chat capabilities supporting up to 128k context window.","has_eula":false,"id":"906c0feb-0eb0-4037-94aa-afd4d845b94f","name":"mistral/mistral-small-3.1-24b-instruct-2503:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":96077777613,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-04-08T14:26:24.388332Z"},{"created_at":"2025-03-27T16:47:41.108667Z","description":"Efficient 70B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"014919c1-00cc-43c2-98f2-4ffd263e6f33","name":"deepseek/deepseek-r1-distill-llama-70b:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":56960,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":141117442445,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:48.796286Z"},{"created_at":"2025-03-27T16:47:42.762505Z","description":"Efficient 70B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"bbfeeb62-2428-415d-ad0d-537af9aff946","name":"deepseek/deepseek-r1-distill-llama-70b:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72679175005,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-06T15:17:35.683881Z"},{"created_at":"2025-03-27T16:48:40.045689Z","description":"Efficient 8B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","name":"meta/llama-3.1-8b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":93000,"quantization_bits":8},{"allowed":true,"max_context_size":40000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":32132582323,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:53.288962Z"},{"created_at":"2025-03-27T16:50:12.267422Z","description":"Highly advanced coding model with a 128k context window, excelling in code generation, repairing, and reasoning.","has_eula":false,"id":"a3205fd3-ac4a-47cf-9074-82166d214bac","name":"qwen/qwen2.5-coder-32b-instruct:int8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":35080374444,"status":"ready","tags":["instruct","chat","code","featured"],"updated_at":"2025-05-09T13:52:04.105122Z"},{"created_at":"2025-03-27T16:49:51.968791Z","description":"A large language model customized by NVIDIA in order to improve the helpfulness of generated responses.","has_eula":true,"id":"4e6c9cea-57a1-4215-8a11-24ab51b9d1c8","name":"nvidia/llama-3.1-nemotron-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72679219797,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:01.331740Z"},{"created_at":"2025-05-13T12:13:50.994Z","description":"Best-in-class vision language model by research lab Allen Institute for AI. Available under the Apache 2.0 license.","has_eula":false,"id":"864e7786-4b86-4f4b-8534-25da1fc46a74","name":"allenai/molmo-72b-0924:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":45000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":293245208984,"status":"ready","tags":["instruct","chat","vision"],"updated_at":"2025-05-13T13:34:01.318606Z"},{"created_at":"2025-03-27T16:49:37.342054Z","description":"Efficient 8B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"775cbef7-6527-415d-9e6b-39d574cf39ec","name":"meta/llama-3.1-8b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":93000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9090504772,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:00.700210Z"},{"created_at":"2025-03-27T16:48:15.818596Z","description":"First generation of 8B-param model by Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"bc10c88e-4d18-4854-8250-77aff4763eca","name":"meta/llama-3-8b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":32132572668,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:51.995701Z"},{"created_at":"2025-03-27T16:49:33.359621Z","description":"First generation of 8B-param model by Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"b5a94646-9390-4ced-acba-9b078e63a794","name":"meta/llama-3-8b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9090489355,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:59.473065Z"},{"created_at":"2025-03-27T16:48:42.138410Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"126ad0c4-cfde-4b05-924f-f04c6343ccb2","name":"meta/llama-3.3-70b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":60000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":282254830887,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:53.868968Z"},{"created_at":"2025-03-27T16:50:09.605796Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"1678195b-5af6-4c27-8fdc-16aa84c68c34","name":"meta/llama-3.3-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72687332869,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-07T10:19:23.153808Z"},{"created_at":"2025-03-27T16:48:35.312110Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"7cbe0417-172a-4601-8940-3b71e4d0c8cb","name":"meta/llama-3.1-70b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":60000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":282246710880,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:52.677798Z"},{"created_at":"2025-03-27T16:49:35.836269Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"03150ad5-de83-4c74-afe0-3eeeb67d71a3","name":"meta/llama-3.1-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72665889083,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:52:00.003235Z"},{"created_at":"2025-03-27T16:49:31.715567Z","description":"First generation of 70B-param model from Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"b0c5a8fe-5c9e-49cc-942a-6c4ebaadde67","name":"meta/llama-3-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72665872089,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:58.899458Z"},{"created_at":"2025-03-27T16:49:17.458153Z","description":"A state-of-the-art 24B model with a 32k context window, designed for multilingual chat and agentic applications.","has_eula":false,"id":"1e555754-47fb-4dba-a82c-66f3f1fa9294","name":"mistral/mistral-small-24b-instruct-2501:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":94321843451,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:55.176379Z"},{"created_at":"2025-03-27T16:50:07.300436Z","description":"A state-of-the-art 24B model with a 32k context window, designed for multilingual chat and agentic applications.","has_eula":false,"id":"7bb28f2c-3719-4d71-9bcb-17db392a7118","name":"mistral/mistral-small-24b-instruct-2501:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":20000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":24938988520,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:55.726891Z"},{"created_at":"2025-04-15T10:51:31.291792Z","description":"Vision language model able to analyze images and offer insights without compromising on instruction following.","has_eula":false,"id":"1999f4f5-f038-4039-94ba-11a851917df5","name":"mistral/pixtral-12b-2409:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":50000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":25384844091,"status":"ready","tags":["vision","chat","featured"],"updated_at":"2025-05-09T13:51:58.281971Z"},{"created_at":"2025-03-27T16:49:14.593008Z","description":"A very efficient language model by Mistral AI, optimized for instruction-following tasks. Available under the Apache 2.0 license.","has_eula":false,"id":"bf6be106-c53d-4b93-bb33-1a4bd4d0b573","name":"mistral/mistral-7b-instruct-v0.3:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":28995471292,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:54.595513Z"},{"created_at":"2025-03-27T16:50:06.301430Z","description":"A state-of-the-art 12B model with a 128k context window, designed for multilingual chat applications.","has_eula":false,"id":"07681325-c743-4796-8b7d-1f0b35d4a8e0","name":"mistral/mistral-nemo-instruct-2407:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":13605604415,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-06T15:17:43.837103Z"},{"created_at":"2025-03-27T16:50:08.291821Z","description":"A high-quality Mixture of Experts (MoE) model with open weights by Mistral AI, licensed under Apache 2.0.","has_eula":false,"id":"1aa87d1e-9996-4c54-aa1c-5b900bf59fd4","name":"mistral/mixtral-8x7b-instruct-v0.1:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":46970879717,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:02.960404Z"},{"created_at":"2025-03-27T16:49:19.120192Z","description":"A high-quality Mixture of Experts (MoE) model with open weights by Mistral AI, licensed under Apache 2.0.","has_eula":false,"id":"11ed6599-f460-4e41-b266-87bc9a108fdd","name":"mistral/mixtral-8x7b-instruct-v0.1:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":190483875108,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:57.661626Z"},{"created_at":"2025-03-27T16:46:54.314987Z","description":"An embedding model spanning a broad range of languages and state-of-the-art results on multilingual benchmarks.","has_eula":true,"id":"d58efec4-b667-48e2-8ad8-bcc26c175ae6","name":"baai/bge-multilingual-gemma2:fp32","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]}]}],"parameter_size_bits":32,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":36989461520,"status":"ready","tags":["embedding","featured"],"updated_at":"2025-03-27T17:40:09.534954Z"}],"total_count":29}' headers: Content-Length: - - "611" + - "50297" Content-Security-Policy: - default-src 'none'; frame-ancestors 'none' Content-Type: - application/json Date: - - Thu, 24 Oct 2024 13:29:02 GMT + - Thu, 15 May 2025 08:52:49 GMT Server: - - Scaleway API Gateway (fr-par-2;edge01) + - Scaleway API Gateway (fr-par-1;edge02) Strict-Transport-Security: - max-age=63072000 X-Content-Type-Options: @@ -48,10 +46,10 @@ interactions: X-Frame-Options: - DENY X-Request-Id: - - 3c6beb18-11d7-4431-8b99-ccb043f0afde + - c8f640c2-9e3e-40c6-83ca-9be4b7c6f652 status: 200 OK code: 200 - duration: 416.942708ms + duration: 358.939417ms - id: 1 request: proto: HTTP/1.1 @@ -67,8 +65,8 @@ interactions: form: {} headers: User-Agent: - - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.23.0; darwin; arm64) terraform-provider/develop terraform/terraform-tests - url: https://api.scaleway.com/inference/v1beta1/regions/fr-par/deployments/20043a05-db6e-4ef5-bec6-40089e6d13d0 + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models/7205dbce-cc80-4b2a-bb7f-3fd3a804afc3 method: GET response: proto: HTTP/2.0 @@ -76,20 +74,20 @@ interactions: proto_minor: 0 transfer_encoding: [] trailer: {} - content_length: 611 + content_length: 1723 uncompressed: false - body: '{"created_at":"2024-10-24T13:29:02.345060Z","endpoints":[{"disable_auth":false,"id":"323ee315-02b1-4ddd-acb2-c94a4ed78039","public_access":{},"url":"https://20043a05-db6e-4ef5-bec6-40089e6d13d0.ifr.fr-par.scaleway.com"}],"error_message":null,"id":"20043a05-db6e-4ef5-bec6-40089e6d13d0","max_size":1,"min_size":1,"model_id":"d33fb5fd-75ca-4dfb-8952-8af8b8b28be5","model_name":"meta/llama-3.1-8b-instruct:fp8","name":"test-inference-deployment-basic","node_type":"L4","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","region":"fr-par","size":0,"status":"creating","tags":[],"updated_at":null}' + body: '{"created_at":"2025-03-27T16:48:40.045689Z","description":"Efficient 8B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","name":"meta/llama-3.1-8b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":93000,"quantization_bits":8},{"allowed":true,"max_context_size":40000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":32132582323,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:53.288962Z"}' headers: Content-Length: - - "611" + - "1723" Content-Security-Policy: - default-src 'none'; frame-ancestors 'none' Content-Type: - application/json Date: - - Thu, 24 Oct 2024 13:29:02 GMT + - Thu, 15 May 2025 08:52:49 GMT Server: - - Scaleway API Gateway (fr-par-2;edge01) + - Scaleway API Gateway (fr-par-1;edge02) Strict-Transport-Security: - max-age=63072000 X-Content-Type-Options: @@ -97,10 +95,10 @@ interactions: X-Frame-Options: - DENY X-Request-Id: - - bc545a27-f2e4-471f-9033-6b22e40273ff + - afe3de9a-a703-4809-ac07-e6b20ce53a46 status: 200 OK code: 200 - duration: 97.465458ms + duration: 147.549708ms - id: 2 request: proto: HTTP/1.1 @@ -116,8 +114,8 @@ interactions: form: {} headers: User-Agent: - - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.23.0; darwin; arm64) terraform-provider/develop terraform/terraform-tests - url: https://api.scaleway.com/inference/v1beta1/regions/fr-par/deployments/20043a05-db6e-4ef5-bec6-40089e6d13d0 + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models?order_by=display_rank_asc&page_size=1000 method: GET response: proto: HTTP/2.0 @@ -125,20 +123,20 @@ interactions: proto_minor: 0 transfer_encoding: [] trailer: {} - content_length: 611 + content_length: 50297 uncompressed: false - body: '{"created_at":"2024-10-24T13:29:02.345060Z","endpoints":[{"disable_auth":false,"id":"323ee315-02b1-4ddd-acb2-c94a4ed78039","public_access":{},"url":"https://20043a05-db6e-4ef5-bec6-40089e6d13d0.ifr.fr-par.scaleway.com"}],"error_message":null,"id":"20043a05-db6e-4ef5-bec6-40089e6d13d0","max_size":1,"min_size":1,"model_id":"d33fb5fd-75ca-4dfb-8952-8af8b8b28be5","model_name":"meta/llama-3.1-8b-instruct:fp8","name":"test-inference-deployment-basic","node_type":"L4","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","region":"fr-par","size":0,"status":"creating","tags":[],"updated_at":null}' + body: '{"models":[{"created_at":"2025-04-04T13:11:00.900800Z","description":"Multimodal model for text generation an image understanding supporting up to 128k context window.","has_eula":false,"id":"5c40e594-d40d-452a-991e-5082225155e1","name":"google/gemma-3-27b-it:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":80000,"quantization_bits":8},{"allowed":true,"max_context_size":40000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":54904369444,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-05-09T16:45:10.128397Z"},{"created_at":"2025-04-28T18:48:01.860457Z","description":"","has_eula":false,"id":"a19296a6-4cef-447a-99bc-8f6c3ee30df4","name":"TestAccCustomModel_Basic","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725346,"status":"ready","tags":["custom"],"updated_at":null},{"created_at":"2025-04-30T13:29:24.004776Z","description":"","has_eula":false,"id":"eabb7f74-24a1-4173-911b-26924c1be619","name":"TestAccCustomModel_DeployModelOnServer","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725346,"status":"ready","tags":["custom"],"updated_at":null},{"created_at":"2025-03-27T16:48:11.513249Z","description":"Efficient 8B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"a51ce791-9546-4c28-aa44-24850d84778b","name":"deepseek/deepseek-r1-distill-llama-8b:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":90000,"quantization_bits":8},{"allowed":true,"max_context_size":39000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":16070465043,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:49.797687Z"},{"created_at":"2025-03-27T16:48:14.190404Z","description":"Efficient 8B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"b8dc7f2d-95d6-48ae-a076-a99e76b76e1f","name":"deepseek/deepseek-r1-distill-llama-8b:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":90000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9093169346,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-04-14T09:05:26.354374Z"},{"created_at":"2025-04-04T15:51:25.414165Z","description":"Highly efficient multimodal model with vision and chat capabilities supporting up to 128k context window.","has_eula":false,"id":"efcf0b60-999a-4c1e-981e-b68a428c4702","name":"mistral/mistral-small-3.1-24b-instruct-2503:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":75000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":96077777613,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-05-09T13:51:56.986698Z"},{"created_at":"2025-04-04T15:51:27.773573Z","description":"Highly efficient multimodal model with vision and chat capabilities supporting up to 128k context window.","has_eula":false,"id":"906c0feb-0eb0-4037-94aa-afd4d845b94f","name":"mistral/mistral-small-3.1-24b-instruct-2503:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":96077777613,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-04-08T14:26:24.388332Z"},{"created_at":"2025-03-27T16:47:41.108667Z","description":"Efficient 70B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"014919c1-00cc-43c2-98f2-4ffd263e6f33","name":"deepseek/deepseek-r1-distill-llama-70b:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":56960,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":141117442445,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:48.796286Z"},{"created_at":"2025-03-27T16:47:42.762505Z","description":"Efficient 70B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"bbfeeb62-2428-415d-ad0d-537af9aff946","name":"deepseek/deepseek-r1-distill-llama-70b:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72679175005,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-06T15:17:35.683881Z"},{"created_at":"2025-03-27T16:48:40.045689Z","description":"Efficient 8B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","name":"meta/llama-3.1-8b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":93000,"quantization_bits":8},{"allowed":true,"max_context_size":40000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":32132582323,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:53.288962Z"},{"created_at":"2025-03-27T16:50:12.267422Z","description":"Highly advanced coding model with a 128k context window, excelling in code generation, repairing, and reasoning.","has_eula":false,"id":"a3205fd3-ac4a-47cf-9074-82166d214bac","name":"qwen/qwen2.5-coder-32b-instruct:int8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":35080374444,"status":"ready","tags":["instruct","chat","code","featured"],"updated_at":"2025-05-09T13:52:04.105122Z"},{"created_at":"2025-03-27T16:49:51.968791Z","description":"A large language model customized by NVIDIA in order to improve the helpfulness of generated responses.","has_eula":true,"id":"4e6c9cea-57a1-4215-8a11-24ab51b9d1c8","name":"nvidia/llama-3.1-nemotron-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72679219797,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:01.331740Z"},{"created_at":"2025-05-13T12:13:50.994Z","description":"Best-in-class vision language model by research lab Allen Institute for AI. Available under the Apache 2.0 license.","has_eula":false,"id":"864e7786-4b86-4f4b-8534-25da1fc46a74","name":"allenai/molmo-72b-0924:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":45000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":293245208984,"status":"ready","tags":["instruct","chat","vision"],"updated_at":"2025-05-13T13:34:01.318606Z"},{"created_at":"2025-03-27T16:49:37.342054Z","description":"Efficient 8B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"775cbef7-6527-415d-9e6b-39d574cf39ec","name":"meta/llama-3.1-8b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":93000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9090504772,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:00.700210Z"},{"created_at":"2025-03-27T16:48:15.818596Z","description":"First generation of 8B-param model by Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"bc10c88e-4d18-4854-8250-77aff4763eca","name":"meta/llama-3-8b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":32132572668,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:51.995701Z"},{"created_at":"2025-03-27T16:49:33.359621Z","description":"First generation of 8B-param model by Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"b5a94646-9390-4ced-acba-9b078e63a794","name":"meta/llama-3-8b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9090489355,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:59.473065Z"},{"created_at":"2025-03-27T16:48:42.138410Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"126ad0c4-cfde-4b05-924f-f04c6343ccb2","name":"meta/llama-3.3-70b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":60000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":282254830887,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:53.868968Z"},{"created_at":"2025-03-27T16:50:09.605796Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"1678195b-5af6-4c27-8fdc-16aa84c68c34","name":"meta/llama-3.3-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72687332869,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-07T10:19:23.153808Z"},{"created_at":"2025-03-27T16:48:35.312110Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"7cbe0417-172a-4601-8940-3b71e4d0c8cb","name":"meta/llama-3.1-70b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":60000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":282246710880,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:52.677798Z"},{"created_at":"2025-03-27T16:49:35.836269Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"03150ad5-de83-4c74-afe0-3eeeb67d71a3","name":"meta/llama-3.1-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72665889083,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:52:00.003235Z"},{"created_at":"2025-03-27T16:49:31.715567Z","description":"First generation of 70B-param model from Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"b0c5a8fe-5c9e-49cc-942a-6c4ebaadde67","name":"meta/llama-3-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72665872089,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:58.899458Z"},{"created_at":"2025-03-27T16:49:17.458153Z","description":"A state-of-the-art 24B model with a 32k context window, designed for multilingual chat and agentic applications.","has_eula":false,"id":"1e555754-47fb-4dba-a82c-66f3f1fa9294","name":"mistral/mistral-small-24b-instruct-2501:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":94321843451,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:55.176379Z"},{"created_at":"2025-03-27T16:50:07.300436Z","description":"A state-of-the-art 24B model with a 32k context window, designed for multilingual chat and agentic applications.","has_eula":false,"id":"7bb28f2c-3719-4d71-9bcb-17db392a7118","name":"mistral/mistral-small-24b-instruct-2501:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":20000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":24938988520,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:55.726891Z"},{"created_at":"2025-04-15T10:51:31.291792Z","description":"Vision language model able to analyze images and offer insights without compromising on instruction following.","has_eula":false,"id":"1999f4f5-f038-4039-94ba-11a851917df5","name":"mistral/pixtral-12b-2409:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":50000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":25384844091,"status":"ready","tags":["vision","chat","featured"],"updated_at":"2025-05-09T13:51:58.281971Z"},{"created_at":"2025-03-27T16:49:14.593008Z","description":"A very efficient language model by Mistral AI, optimized for instruction-following tasks. Available under the Apache 2.0 license.","has_eula":false,"id":"bf6be106-c53d-4b93-bb33-1a4bd4d0b573","name":"mistral/mistral-7b-instruct-v0.3:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":28995471292,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:54.595513Z"},{"created_at":"2025-03-27T16:50:06.301430Z","description":"A state-of-the-art 12B model with a 128k context window, designed for multilingual chat applications.","has_eula":false,"id":"07681325-c743-4796-8b7d-1f0b35d4a8e0","name":"mistral/mistral-nemo-instruct-2407:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":13605604415,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-06T15:17:43.837103Z"},{"created_at":"2025-03-27T16:50:08.291821Z","description":"A high-quality Mixture of Experts (MoE) model with open weights by Mistral AI, licensed under Apache 2.0.","has_eula":false,"id":"1aa87d1e-9996-4c54-aa1c-5b900bf59fd4","name":"mistral/mixtral-8x7b-instruct-v0.1:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":46970879717,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:02.960404Z"},{"created_at":"2025-03-27T16:49:19.120192Z","description":"A high-quality Mixture of Experts (MoE) model with open weights by Mistral AI, licensed under Apache 2.0.","has_eula":false,"id":"11ed6599-f460-4e41-b266-87bc9a108fdd","name":"mistral/mixtral-8x7b-instruct-v0.1:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":190483875108,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:57.661626Z"},{"created_at":"2025-03-27T16:46:54.314987Z","description":"An embedding model spanning a broad range of languages and state-of-the-art results on multilingual benchmarks.","has_eula":true,"id":"d58efec4-b667-48e2-8ad8-bcc26c175ae6","name":"baai/bge-multilingual-gemma2:fp32","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]}]}],"parameter_size_bits":32,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":36989461520,"status":"ready","tags":["embedding","featured"],"updated_at":"2025-03-27T17:40:09.534954Z"}],"total_count":29}' headers: Content-Length: - - "611" + - "50297" Content-Security-Policy: - default-src 'none'; frame-ancestors 'none' Content-Type: - application/json Date: - - Thu, 24 Oct 2024 13:30:02 GMT + - Thu, 15 May 2025 08:52:50 GMT Server: - - Scaleway API Gateway (fr-par-2;edge03) + - Scaleway API Gateway (fr-par-1;edge02) Strict-Transport-Security: - max-age=63072000 X-Content-Type-Options: @@ -146,10 +144,10 @@ interactions: X-Frame-Options: - DENY X-Request-Id: - - 2b9833e9-0c25-45a2-b36c-2d388cc11877 + - c86be0b2-ce70-4425-b603-76ef7a9e6983 status: 200 OK code: 200 - duration: 130.5175ms + duration: 178.915708ms - id: 3 request: proto: HTTP/1.1 @@ -165,8 +163,8 @@ interactions: form: {} headers: User-Agent: - - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.23.0; darwin; arm64) terraform-provider/develop terraform/terraform-tests - url: https://api.scaleway.com/inference/v1beta1/regions/fr-par/deployments/20043a05-db6e-4ef5-bec6-40089e6d13d0 + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models/7205dbce-cc80-4b2a-bb7f-3fd3a804afc3 method: GET response: proto: HTTP/2.0 @@ -174,20 +172,20 @@ interactions: proto_minor: 0 transfer_encoding: [] trailer: {} - content_length: 611 + content_length: 1723 uncompressed: false - body: '{"created_at":"2024-10-24T13:29:02.345060Z","endpoints":[{"disable_auth":false,"id":"323ee315-02b1-4ddd-acb2-c94a4ed78039","public_access":{},"url":"https://20043a05-db6e-4ef5-bec6-40089e6d13d0.ifr.fr-par.scaleway.com"}],"error_message":null,"id":"20043a05-db6e-4ef5-bec6-40089e6d13d0","max_size":1,"min_size":1,"model_id":"d33fb5fd-75ca-4dfb-8952-8af8b8b28be5","model_name":"meta/llama-3.1-8b-instruct:fp8","name":"test-inference-deployment-basic","node_type":"L4","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","region":"fr-par","size":0,"status":"creating","tags":[],"updated_at":null}' + body: '{"created_at":"2025-03-27T16:48:40.045689Z","description":"Efficient 8B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","name":"meta/llama-3.1-8b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":93000,"quantization_bits":8},{"allowed":true,"max_context_size":40000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":32132582323,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:53.288962Z"}' headers: Content-Length: - - "611" + - "1723" Content-Security-Policy: - default-src 'none'; frame-ancestors 'none' Content-Type: - application/json Date: - - Thu, 24 Oct 2024 13:31:03 GMT + - Thu, 15 May 2025 08:52:50 GMT Server: - - Scaleway API Gateway (fr-par-2;edge02) + - Scaleway API Gateway (fr-par-1;edge02) Strict-Transport-Security: - max-age=63072000 X-Content-Type-Options: @@ -195,48 +193,50 @@ interactions: X-Frame-Options: - DENY X-Request-Id: - - 0821d0f3-d0fc-4752-997d-dc7698e29681 + - 6f5d8043-2c7c-4c8f-bca2-01fc800349ae status: 200 OK code: 200 - duration: 560.825708ms + duration: 25.847625ms - id: 4 request: proto: HTTP/1.1 proto_major: 1 proto_minor: 1 - content_length: 0 + content_length: 278 transfer_encoding: [] trailer: {} host: api.scaleway.com remote_addr: "" request_uri: "" - body: "" + body: '{"name":"test-inference-deployment-basic","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","model_id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","accept_eula":true,"node_type_name":"L4","tags":[],"min_size":1,"max_size":1,"endpoints":[{"public_network":{},"disable_auth":false}]}' form: {} headers: + Content-Type: + - application/json User-Agent: - - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.23.0; darwin; arm64) terraform-provider/develop terraform/terraform-tests - url: https://api.scaleway.com/inference/v1beta1/regions/fr-par/deployments/20043a05-db6e-4ef5-bec6-40089e6d13d0 - method: GET + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/deployments + method: POST response: proto: HTTP/2.0 proto_major: 2 proto_minor: 0 transfer_encoding: [] trailer: {} - content_length: 611 + content_length: 624 uncompressed: false - body: '{"created_at":"2024-10-24T13:29:02.345060Z","endpoints":[{"disable_auth":false,"id":"323ee315-02b1-4ddd-acb2-c94a4ed78039","public_access":{},"url":"https://20043a05-db6e-4ef5-bec6-40089e6d13d0.ifr.fr-par.scaleway.com"}],"error_message":null,"id":"20043a05-db6e-4ef5-bec6-40089e6d13d0","max_size":1,"min_size":1,"model_id":"d33fb5fd-75ca-4dfb-8952-8af8b8b28be5","model_name":"meta/llama-3.1-8b-instruct:fp8","name":"test-inference-deployment-basic","node_type":"L4","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","region":"fr-par","size":0,"status":"creating","tags":[],"updated_at":null}' + body: '{"created_at":"2025-05-15T08:52:50.831593Z","endpoints":[{"disable_auth":false,"id":"456167a1-2cda-4080-b529-16b7b9bb6e36","public_network":{},"url":"https://4ef1f64f-1d16-472c-a5d9-a40a55424e4e.ifr.fr-par.scaleway.com"}],"id":"4ef1f64f-1d16-472c-a5d9-a40a55424e4e","max_size":1,"min_size":1,"model_id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","model_name":"meta/llama-3.1-8b-instruct:bf16","name":"test-inference-deployment-basic","node_type_name":"L4","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","quantization":{"bits":16},"region":"fr-par","size":0,"status":"creating","tags":[],"updated_at":null}' headers: Content-Length: - - "611" + - "624" Content-Security-Policy: - default-src 'none'; frame-ancestors 'none' Content-Type: - application/json Date: - - Thu, 24 Oct 2024 13:32:03 GMT + - Thu, 15 May 2025 08:52:51 GMT Server: - - Scaleway API Gateway (fr-par-2;edge02) + - Scaleway API Gateway (fr-par-1;edge02) Strict-Transport-Security: - max-age=63072000 X-Content-Type-Options: @@ -244,10 +244,10 @@ interactions: X-Frame-Options: - DENY X-Request-Id: - - 95c1d1c9-b3d0-4ebe-81f6-b495de0ab8e9 + - 5f706c2b-1ccd-4003-a725-6520e6ddc95c status: 200 OK code: 200 - duration: 126.262167ms + duration: 201.534583ms - id: 5 request: proto: HTTP/1.1 @@ -263,8 +263,8 @@ interactions: form: {} headers: User-Agent: - - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.23.0; darwin; arm64) terraform-provider/develop terraform/terraform-tests - url: https://api.scaleway.com/inference/v1beta1/regions/fr-par/deployments/20043a05-db6e-4ef5-bec6-40089e6d13d0 + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/deployments/4ef1f64f-1d16-472c-a5d9-a40a55424e4e method: GET response: proto: HTTP/2.0 @@ -272,20 +272,20 @@ interactions: proto_minor: 0 transfer_encoding: [] trailer: {} - content_length: 611 + content_length: 624 uncompressed: false - body: '{"created_at":"2024-10-24T13:29:02.345060Z","endpoints":[{"disable_auth":false,"id":"323ee315-02b1-4ddd-acb2-c94a4ed78039","public_access":{},"url":"https://20043a05-db6e-4ef5-bec6-40089e6d13d0.ifr.fr-par.scaleway.com"}],"error_message":null,"id":"20043a05-db6e-4ef5-bec6-40089e6d13d0","max_size":1,"min_size":1,"model_id":"d33fb5fd-75ca-4dfb-8952-8af8b8b28be5","model_name":"meta/llama-3.1-8b-instruct:fp8","name":"test-inference-deployment-basic","node_type":"L4","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","region":"fr-par","size":0,"status":"creating","tags":[],"updated_at":null}' + body: '{"created_at":"2025-05-15T08:52:50.831593Z","endpoints":[{"disable_auth":false,"id":"456167a1-2cda-4080-b529-16b7b9bb6e36","public_network":{},"url":"https://4ef1f64f-1d16-472c-a5d9-a40a55424e4e.ifr.fr-par.scaleway.com"}],"id":"4ef1f64f-1d16-472c-a5d9-a40a55424e4e","max_size":1,"min_size":1,"model_id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","model_name":"meta/llama-3.1-8b-instruct:bf16","name":"test-inference-deployment-basic","node_type_name":"L4","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","quantization":{"bits":16},"region":"fr-par","size":0,"status":"creating","tags":[],"updated_at":null}' headers: Content-Length: - - "611" + - "624" Content-Security-Policy: - default-src 'none'; frame-ancestors 'none' Content-Type: - application/json Date: - - Thu, 24 Oct 2024 13:33:03 GMT + - Thu, 15 May 2025 08:52:51 GMT Server: - - Scaleway API Gateway (fr-par-2;edge02) + - Scaleway API Gateway (fr-par-1;edge02) Strict-Transport-Security: - max-age=63072000 X-Content-Type-Options: @@ -293,10 +293,10 @@ interactions: X-Frame-Options: - DENY X-Request-Id: - - 31e84a22-73b8-4fc5-aff7-79d238d479ab + - cd7e871a-834f-4006-af94-2d2944a7d7bc status: 200 OK code: 200 - duration: 97.842875ms + duration: 58.44875ms - id: 6 request: proto: HTTP/1.1 @@ -312,8 +312,8 @@ interactions: form: {} headers: User-Agent: - - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.23.0; darwin; arm64) terraform-provider/develop terraform/terraform-tests - url: https://api.scaleway.com/inference/v1beta1/regions/fr-par/deployments/20043a05-db6e-4ef5-bec6-40089e6d13d0 + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/deployments/4ef1f64f-1d16-472c-a5d9-a40a55424e4e method: GET response: proto: HTTP/2.0 @@ -321,20 +321,20 @@ interactions: proto_minor: 0 transfer_encoding: [] trailer: {} - content_length: 611 + content_length: 624 uncompressed: false - body: '{"created_at":"2024-10-24T13:29:02.345060Z","endpoints":[{"disable_auth":false,"id":"323ee315-02b1-4ddd-acb2-c94a4ed78039","public_access":{},"url":"https://20043a05-db6e-4ef5-bec6-40089e6d13d0.ifr.fr-par.scaleway.com"}],"error_message":null,"id":"20043a05-db6e-4ef5-bec6-40089e6d13d0","max_size":1,"min_size":1,"model_id":"d33fb5fd-75ca-4dfb-8952-8af8b8b28be5","model_name":"meta/llama-3.1-8b-instruct:fp8","name":"test-inference-deployment-basic","node_type":"L4","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","region":"fr-par","size":0,"status":"creating","tags":[],"updated_at":null}' + body: '{"created_at":"2025-05-15T08:52:50.831593Z","endpoints":[{"disable_auth":false,"id":"456167a1-2cda-4080-b529-16b7b9bb6e36","public_network":{},"url":"https://4ef1f64f-1d16-472c-a5d9-a40a55424e4e.ifr.fr-par.scaleway.com"}],"id":"4ef1f64f-1d16-472c-a5d9-a40a55424e4e","max_size":1,"min_size":1,"model_id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","model_name":"meta/llama-3.1-8b-instruct:bf16","name":"test-inference-deployment-basic","node_type_name":"L4","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","quantization":{"bits":16},"region":"fr-par","size":0,"status":"creating","tags":[],"updated_at":null}' headers: Content-Length: - - "611" + - "624" Content-Security-Policy: - default-src 'none'; frame-ancestors 'none' Content-Type: - application/json Date: - - Thu, 24 Oct 2024 13:34:03 GMT + - Thu, 15 May 2025 08:53:51 GMT Server: - - Scaleway API Gateway (fr-par-2;edge02) + - Scaleway API Gateway (fr-par-1;edge02) Strict-Transport-Security: - max-age=63072000 X-Content-Type-Options: @@ -342,10 +342,10 @@ interactions: X-Frame-Options: - DENY X-Request-Id: - - 437186cd-92ca-432a-99a7-7dbf9b0ec5c3 + - e253be30-6c77-458c-bf85-6880b87c874e status: 200 OK code: 200 - duration: 136.54325ms + duration: 133.146666ms - id: 7 request: proto: HTTP/1.1 @@ -361,8 +361,8 @@ interactions: form: {} headers: User-Agent: - - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.23.0; darwin; arm64) terraform-provider/develop terraform/terraform-tests - url: https://api.scaleway.com/inference/v1beta1/regions/fr-par/deployments/20043a05-db6e-4ef5-bec6-40089e6d13d0 + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/deployments/4ef1f64f-1d16-472c-a5d9-a40a55424e4e method: GET response: proto: HTTP/2.0 @@ -370,20 +370,20 @@ interactions: proto_minor: 0 transfer_encoding: [] trailer: {} - content_length: 611 + content_length: 624 uncompressed: false - body: '{"created_at":"2024-10-24T13:29:02.345060Z","endpoints":[{"disable_auth":false,"id":"323ee315-02b1-4ddd-acb2-c94a4ed78039","public_access":{},"url":"https://20043a05-db6e-4ef5-bec6-40089e6d13d0.ifr.fr-par.scaleway.com"}],"error_message":null,"id":"20043a05-db6e-4ef5-bec6-40089e6d13d0","max_size":1,"min_size":1,"model_id":"d33fb5fd-75ca-4dfb-8952-8af8b8b28be5","model_name":"meta/llama-3.1-8b-instruct:fp8","name":"test-inference-deployment-basic","node_type":"L4","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","region":"fr-par","size":0,"status":"creating","tags":[],"updated_at":null}' + body: '{"created_at":"2025-05-15T08:52:50.831593Z","endpoints":[{"disable_auth":false,"id":"456167a1-2cda-4080-b529-16b7b9bb6e36","public_network":{},"url":"https://4ef1f64f-1d16-472c-a5d9-a40a55424e4e.ifr.fr-par.scaleway.com"}],"id":"4ef1f64f-1d16-472c-a5d9-a40a55424e4e","max_size":1,"min_size":1,"model_id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","model_name":"meta/llama-3.1-8b-instruct:bf16","name":"test-inference-deployment-basic","node_type_name":"L4","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","quantization":{"bits":16},"region":"fr-par","size":0,"status":"creating","tags":[],"updated_at":null}' headers: Content-Length: - - "611" + - "624" Content-Security-Policy: - default-src 'none'; frame-ancestors 'none' Content-Type: - application/json Date: - - Thu, 24 Oct 2024 13:35:03 GMT + - Thu, 15 May 2025 08:54:51 GMT Server: - - Scaleway API Gateway (fr-par-2;edge01) + - Scaleway API Gateway (fr-par-1;edge02) Strict-Transport-Security: - max-age=63072000 X-Content-Type-Options: @@ -391,10 +391,10 @@ interactions: X-Frame-Options: - DENY X-Request-Id: - - 26e73cb3-e0f3-48d0-939e-259bb147eb35 + - 33b1402e-2c0d-4dea-bf8e-26b61b1c8c60 status: 200 OK code: 200 - duration: 142.564417ms + duration: 109.249709ms - id: 8 request: proto: HTTP/1.1 @@ -410,8 +410,8 @@ interactions: form: {} headers: User-Agent: - - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.23.0; darwin; arm64) terraform-provider/develop terraform/terraform-tests - url: https://api.scaleway.com/inference/v1beta1/regions/fr-par/deployments/20043a05-db6e-4ef5-bec6-40089e6d13d0 + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/deployments/4ef1f64f-1d16-472c-a5d9-a40a55424e4e method: GET response: proto: HTTP/2.0 @@ -419,20 +419,20 @@ interactions: proto_minor: 0 transfer_encoding: [] trailer: {} - content_length: 611 + content_length: 670 uncompressed: false - body: '{"created_at":"2024-10-24T13:29:02.345060Z","endpoints":[{"disable_auth":false,"id":"323ee315-02b1-4ddd-acb2-c94a4ed78039","public_access":{},"url":"https://20043a05-db6e-4ef5-bec6-40089e6d13d0.ifr.fr-par.scaleway.com"}],"error_message":null,"id":"20043a05-db6e-4ef5-bec6-40089e6d13d0","max_size":1,"min_size":1,"model_id":"d33fb5fd-75ca-4dfb-8952-8af8b8b28be5","model_name":"meta/llama-3.1-8b-instruct:fp8","name":"test-inference-deployment-basic","node_type":"L4","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","region":"fr-par","size":0,"status":"creating","tags":[],"updated_at":null}' + body: '{"created_at":"2025-05-15T08:52:50.831593Z","endpoints":[{"disable_auth":false,"id":"456167a1-2cda-4080-b529-16b7b9bb6e36","public_network":{},"url":"https://4ef1f64f-1d16-472c-a5d9-a40a55424e4e.ifr.fr-par.scaleway.com"}],"error_message":"","id":"4ef1f64f-1d16-472c-a5d9-a40a55424e4e","max_size":1,"min_size":1,"model_id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","model_name":"meta/llama-3.1-8b-instruct:bf16","name":"test-inference-deployment-basic","node_type_name":"L4","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","quantization":{"bits":16},"region":"fr-par","size":0,"status":"deploying","tags":[],"updated_at":"2025-05-15T08:55:56.629724Z"}' headers: Content-Length: - - "611" + - "670" Content-Security-Policy: - default-src 'none'; frame-ancestors 'none' Content-Type: - application/json Date: - - Thu, 24 Oct 2024 13:36:04 GMT + - Thu, 15 May 2025 08:56:08 GMT Server: - - Scaleway API Gateway (fr-par-2;edge02) + - Scaleway API Gateway (fr-par-1;edge02) Strict-Transport-Security: - max-age=63072000 X-Content-Type-Options: @@ -440,10 +440,10 @@ interactions: X-Frame-Options: - DENY X-Request-Id: - - 176a02e5-d2f9-48e3-9fc5-bfb1a76f6b28 + - 35f20d1d-02ee-4f99-9a9d-406f61bbf52c status: 200 OK code: 200 - duration: 150.126583ms + duration: 254.926917ms - id: 9 request: proto: HTTP/1.1 @@ -459,8 +459,8 @@ interactions: form: {} headers: User-Agent: - - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.23.0; darwin; arm64) terraform-provider/develop terraform/terraform-tests - url: https://api.scaleway.com/inference/v1beta1/regions/fr-par/deployments/20043a05-db6e-4ef5-bec6-40089e6d13d0 + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/deployments/4ef1f64f-1d16-472c-a5d9-a40a55424e4e method: GET response: proto: HTTP/2.0 @@ -468,20 +468,20 @@ interactions: proto_minor: 0 transfer_encoding: [] trailer: {} - content_length: 611 + content_length: 670 uncompressed: false - body: '{"created_at":"2024-10-24T13:29:02.345060Z","endpoints":[{"disable_auth":false,"id":"323ee315-02b1-4ddd-acb2-c94a4ed78039","public_access":{},"url":"https://20043a05-db6e-4ef5-bec6-40089e6d13d0.ifr.fr-par.scaleway.com"}],"error_message":null,"id":"20043a05-db6e-4ef5-bec6-40089e6d13d0","max_size":1,"min_size":1,"model_id":"d33fb5fd-75ca-4dfb-8952-8af8b8b28be5","model_name":"meta/llama-3.1-8b-instruct:fp8","name":"test-inference-deployment-basic","node_type":"L4","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","region":"fr-par","size":0,"status":"creating","tags":[],"updated_at":null}' + body: '{"created_at":"2025-05-15T08:52:50.831593Z","endpoints":[{"disable_auth":false,"id":"456167a1-2cda-4080-b529-16b7b9bb6e36","public_network":{},"url":"https://4ef1f64f-1d16-472c-a5d9-a40a55424e4e.ifr.fr-par.scaleway.com"}],"error_message":"","id":"4ef1f64f-1d16-472c-a5d9-a40a55424e4e","max_size":1,"min_size":1,"model_id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","model_name":"meta/llama-3.1-8b-instruct:bf16","name":"test-inference-deployment-basic","node_type_name":"L4","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","quantization":{"bits":16},"region":"fr-par","size":0,"status":"deploying","tags":[],"updated_at":"2025-05-15T08:55:56.629724Z"}' headers: Content-Length: - - "611" + - "670" Content-Security-Policy: - default-src 'none'; frame-ancestors 'none' Content-Type: - application/json Date: - - Thu, 24 Oct 2024 13:37:04 GMT + - Thu, 15 May 2025 08:57:09 GMT Server: - - Scaleway API Gateway (fr-par-2;edge02) + - Scaleway API Gateway (fr-par-1;edge02) Strict-Transport-Security: - max-age=63072000 X-Content-Type-Options: @@ -489,10 +489,10 @@ interactions: X-Frame-Options: - DENY X-Request-Id: - - 6b20f6ef-dfb5-46f1-b632-d74132b5b564 + - ce2f18ab-891b-4350-952e-ff23717a998f status: 200 OK code: 200 - duration: 173.423666ms + duration: 87.476083ms - id: 10 request: proto: HTTP/1.1 @@ -508,8 +508,8 @@ interactions: form: {} headers: User-Agent: - - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.23.0; darwin; arm64) terraform-provider/develop terraform/terraform-tests - url: https://api.scaleway.com/inference/v1beta1/regions/fr-par/deployments/20043a05-db6e-4ef5-bec6-40089e6d13d0 + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/deployments/4ef1f64f-1d16-472c-a5d9-a40a55424e4e method: GET response: proto: HTTP/2.0 @@ -517,20 +517,20 @@ interactions: proto_minor: 0 transfer_encoding: [] trailer: {} - content_length: 637 + content_length: 670 uncompressed: false - body: '{"created_at":"2024-10-24T13:29:02.345060Z","endpoints":[{"disable_auth":false,"id":"323ee315-02b1-4ddd-acb2-c94a4ed78039","public_access":{},"url":"https://20043a05-db6e-4ef5-bec6-40089e6d13d0.ifr.fr-par.scaleway.com"}],"error_message":null,"id":"20043a05-db6e-4ef5-bec6-40089e6d13d0","max_size":1,"min_size":1,"model_id":"d33fb5fd-75ca-4dfb-8952-8af8b8b28be5","model_name":"meta/llama-3.1-8b-instruct:fp8","name":"test-inference-deployment-basic","node_type":"L4","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","region":"fr-par","size":0,"status":"deploying","tags":[],"updated_at":"2024-10-24T13:37:28.607817Z"}' + body: '{"created_at":"2025-05-15T08:52:50.831593Z","endpoints":[{"disable_auth":false,"id":"456167a1-2cda-4080-b529-16b7b9bb6e36","public_network":{},"url":"https://4ef1f64f-1d16-472c-a5d9-a40a55424e4e.ifr.fr-par.scaleway.com"}],"error_message":"","id":"4ef1f64f-1d16-472c-a5d9-a40a55424e4e","max_size":1,"min_size":1,"model_id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","model_name":"meta/llama-3.1-8b-instruct:bf16","name":"test-inference-deployment-basic","node_type_name":"L4","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","quantization":{"bits":16},"region":"fr-par","size":0,"status":"deploying","tags":[],"updated_at":"2025-05-15T08:55:56.629724Z"}' headers: Content-Length: - - "637" + - "670" Content-Security-Policy: - default-src 'none'; frame-ancestors 'none' Content-Type: - application/json Date: - - Thu, 24 Oct 2024 13:38:04 GMT + - Thu, 15 May 2025 08:58:09 GMT Server: - - Scaleway API Gateway (fr-par-2;edge02) + - Scaleway API Gateway (fr-par-1;edge03) Strict-Transport-Security: - max-age=63072000 X-Content-Type-Options: @@ -538,10 +538,10 @@ interactions: X-Frame-Options: - DENY X-Request-Id: - - 7a53020a-2d85-4db5-bfbb-9d93f605e9cd + - 19ab7250-5349-4f37-83f9-6f20a91d1db4 status: 200 OK code: 200 - duration: 113.966625ms + duration: 81.852959ms - id: 11 request: proto: HTTP/1.1 @@ -557,8 +557,8 @@ interactions: form: {} headers: User-Agent: - - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.23.0; darwin; arm64) terraform-provider/develop terraform/terraform-tests - url: https://api.scaleway.com/inference/v1beta1/regions/fr-par/deployments/20043a05-db6e-4ef5-bec6-40089e6d13d0 + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/deployments/4ef1f64f-1d16-472c-a5d9-a40a55424e4e method: GET response: proto: HTTP/2.0 @@ -566,20 +566,20 @@ interactions: proto_minor: 0 transfer_encoding: [] trailer: {} - content_length: 637 + content_length: 670 uncompressed: false - body: '{"created_at":"2024-10-24T13:29:02.345060Z","endpoints":[{"disable_auth":false,"id":"323ee315-02b1-4ddd-acb2-c94a4ed78039","public_access":{},"url":"https://20043a05-db6e-4ef5-bec6-40089e6d13d0.ifr.fr-par.scaleway.com"}],"error_message":null,"id":"20043a05-db6e-4ef5-bec6-40089e6d13d0","max_size":1,"min_size":1,"model_id":"d33fb5fd-75ca-4dfb-8952-8af8b8b28be5","model_name":"meta/llama-3.1-8b-instruct:fp8","name":"test-inference-deployment-basic","node_type":"L4","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","region":"fr-par","size":0,"status":"deploying","tags":[],"updated_at":"2024-10-24T13:37:28.607817Z"}' + body: '{"created_at":"2025-05-15T08:52:50.831593Z","endpoints":[{"disable_auth":false,"id":"456167a1-2cda-4080-b529-16b7b9bb6e36","public_network":{},"url":"https://4ef1f64f-1d16-472c-a5d9-a40a55424e4e.ifr.fr-par.scaleway.com"}],"error_message":"","id":"4ef1f64f-1d16-472c-a5d9-a40a55424e4e","max_size":1,"min_size":1,"model_id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","model_name":"meta/llama-3.1-8b-instruct:bf16","name":"test-inference-deployment-basic","node_type_name":"L4","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","quantization":{"bits":16},"region":"fr-par","size":0,"status":"deploying","tags":[],"updated_at":"2025-05-15T08:55:56.629724Z"}' headers: Content-Length: - - "637" + - "670" Content-Security-Policy: - default-src 'none'; frame-ancestors 'none' Content-Type: - application/json Date: - - Thu, 24 Oct 2024 13:39:04 GMT + - Thu, 15 May 2025 08:59:10 GMT Server: - - Scaleway API Gateway (fr-par-2;edge02) + - Scaleway API Gateway (fr-par-1;edge02) Strict-Transport-Security: - max-age=63072000 X-Content-Type-Options: @@ -587,10 +587,10 @@ interactions: X-Frame-Options: - DENY X-Request-Id: - - 379c066b-d9c2-424a-950c-094380001fde + - bfaafaa8-08bc-4c09-9a7e-c6ac17d39539 status: 200 OK code: 200 - duration: 123.212959ms + duration: 1.100722292s - id: 12 request: proto: HTTP/1.1 @@ -606,8 +606,8 @@ interactions: form: {} headers: User-Agent: - - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.23.0; darwin; arm64) terraform-provider/develop terraform/terraform-tests - url: https://api.scaleway.com/inference/v1beta1/regions/fr-par/deployments/20043a05-db6e-4ef5-bec6-40089e6d13d0 + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/deployments/4ef1f64f-1d16-472c-a5d9-a40a55424e4e method: GET response: proto: HTTP/2.0 @@ -615,20 +615,20 @@ interactions: proto_minor: 0 transfer_encoding: [] trailer: {} - content_length: 637 + content_length: 670 uncompressed: false - body: '{"created_at":"2024-10-24T13:29:02.345060Z","endpoints":[{"disable_auth":false,"id":"323ee315-02b1-4ddd-acb2-c94a4ed78039","public_access":{},"url":"https://20043a05-db6e-4ef5-bec6-40089e6d13d0.ifr.fr-par.scaleway.com"}],"error_message":null,"id":"20043a05-db6e-4ef5-bec6-40089e6d13d0","max_size":1,"min_size":1,"model_id":"d33fb5fd-75ca-4dfb-8952-8af8b8b28be5","model_name":"meta/llama-3.1-8b-instruct:fp8","name":"test-inference-deployment-basic","node_type":"L4","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","region":"fr-par","size":0,"status":"deploying","tags":[],"updated_at":"2024-10-24T13:37:28.607817Z"}' + body: '{"created_at":"2025-05-15T08:52:50.831593Z","endpoints":[{"disable_auth":false,"id":"456167a1-2cda-4080-b529-16b7b9bb6e36","public_network":{},"url":"https://4ef1f64f-1d16-472c-a5d9-a40a55424e4e.ifr.fr-par.scaleway.com"}],"error_message":"","id":"4ef1f64f-1d16-472c-a5d9-a40a55424e4e","max_size":1,"min_size":1,"model_id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","model_name":"meta/llama-3.1-8b-instruct:bf16","name":"test-inference-deployment-basic","node_type_name":"L4","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","quantization":{"bits":16},"region":"fr-par","size":0,"status":"deploying","tags":[],"updated_at":"2025-05-15T08:55:56.629724Z"}' headers: Content-Length: - - "637" + - "670" Content-Security-Policy: - default-src 'none'; frame-ancestors 'none' Content-Type: - application/json Date: - - Thu, 24 Oct 2024 13:40:04 GMT + - Thu, 15 May 2025 09:00:10 GMT Server: - - Scaleway API Gateway (fr-par-2;edge01) + - Scaleway API Gateway (fr-par-1;edge01) Strict-Transport-Security: - max-age=63072000 X-Content-Type-Options: @@ -636,10 +636,10 @@ interactions: X-Frame-Options: - DENY X-Request-Id: - - 553a1c2a-7aae-4853-9222-79ab6a91ee87 + - 6d03b15a-c9b8-4601-8d5d-d02f3e22718a status: 200 OK code: 200 - duration: 121.030542ms + duration: 97.321208ms - id: 13 request: proto: HTTP/1.1 @@ -655,8 +655,8 @@ interactions: form: {} headers: User-Agent: - - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.23.0; darwin; arm64) terraform-provider/develop terraform/terraform-tests - url: https://api.scaleway.com/inference/v1beta1/regions/fr-par/deployments/20043a05-db6e-4ef5-bec6-40089e6d13d0 + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/deployments/4ef1f64f-1d16-472c-a5d9-a40a55424e4e method: GET response: proto: HTTP/2.0 @@ -664,20 +664,20 @@ interactions: proto_minor: 0 transfer_encoding: [] trailer: {} - content_length: 637 + content_length: 670 uncompressed: false - body: '{"created_at":"2024-10-24T13:29:02.345060Z","endpoints":[{"disable_auth":false,"id":"323ee315-02b1-4ddd-acb2-c94a4ed78039","public_access":{},"url":"https://20043a05-db6e-4ef5-bec6-40089e6d13d0.ifr.fr-par.scaleway.com"}],"error_message":null,"id":"20043a05-db6e-4ef5-bec6-40089e6d13d0","max_size":1,"min_size":1,"model_id":"d33fb5fd-75ca-4dfb-8952-8af8b8b28be5","model_name":"meta/llama-3.1-8b-instruct:fp8","name":"test-inference-deployment-basic","node_type":"L4","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","region":"fr-par","size":0,"status":"deploying","tags":[],"updated_at":"2024-10-24T13:37:28.607817Z"}' + body: '{"created_at":"2025-05-15T08:52:50.831593Z","endpoints":[{"disable_auth":false,"id":"456167a1-2cda-4080-b529-16b7b9bb6e36","public_network":{},"url":"https://4ef1f64f-1d16-472c-a5d9-a40a55424e4e.ifr.fr-par.scaleway.com"}],"error_message":"","id":"4ef1f64f-1d16-472c-a5d9-a40a55424e4e","max_size":1,"min_size":1,"model_id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","model_name":"meta/llama-3.1-8b-instruct:bf16","name":"test-inference-deployment-basic","node_type_name":"L4","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","quantization":{"bits":16},"region":"fr-par","size":0,"status":"deploying","tags":[],"updated_at":"2025-05-15T08:55:56.629724Z"}' headers: Content-Length: - - "637" + - "670" Content-Security-Policy: - default-src 'none'; frame-ancestors 'none' Content-Type: - application/json Date: - - Thu, 24 Oct 2024 13:41:04 GMT + - Thu, 15 May 2025 09:01:10 GMT Server: - - Scaleway API Gateway (fr-par-2;edge02) + - Scaleway API Gateway (fr-par-1;edge01) Strict-Transport-Security: - max-age=63072000 X-Content-Type-Options: @@ -685,10 +685,10 @@ interactions: X-Frame-Options: - DENY X-Request-Id: - - 22ad4139-8e1f-4be2-a84f-dbb093b46c30 + - 144a0f6e-ae62-4c92-a6ab-f2164665787c status: 200 OK code: 200 - duration: 107.486291ms + duration: 101.9215ms - id: 14 request: proto: HTTP/1.1 @@ -704,8 +704,8 @@ interactions: form: {} headers: User-Agent: - - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.23.0; darwin; arm64) terraform-provider/develop terraform/terraform-tests - url: https://api.scaleway.com/inference/v1beta1/regions/fr-par/deployments/20043a05-db6e-4ef5-bec6-40089e6d13d0 + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/deployments/4ef1f64f-1d16-472c-a5d9-a40a55424e4e method: GET response: proto: HTTP/2.0 @@ -713,20 +713,20 @@ interactions: proto_minor: 0 transfer_encoding: [] trailer: {} - content_length: 637 + content_length: 670 uncompressed: false - body: '{"created_at":"2024-10-24T13:29:02.345060Z","endpoints":[{"disable_auth":false,"id":"323ee315-02b1-4ddd-acb2-c94a4ed78039","public_access":{},"url":"https://20043a05-db6e-4ef5-bec6-40089e6d13d0.ifr.fr-par.scaleway.com"}],"error_message":null,"id":"20043a05-db6e-4ef5-bec6-40089e6d13d0","max_size":1,"min_size":1,"model_id":"d33fb5fd-75ca-4dfb-8952-8af8b8b28be5","model_name":"meta/llama-3.1-8b-instruct:fp8","name":"test-inference-deployment-basic","node_type":"L4","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","region":"fr-par","size":0,"status":"deploying","tags":[],"updated_at":"2024-10-24T13:37:28.607817Z"}' + body: '{"created_at":"2025-05-15T08:52:50.831593Z","endpoints":[{"disable_auth":false,"id":"456167a1-2cda-4080-b529-16b7b9bb6e36","public_network":{},"url":"https://4ef1f64f-1d16-472c-a5d9-a40a55424e4e.ifr.fr-par.scaleway.com"}],"error_message":"","id":"4ef1f64f-1d16-472c-a5d9-a40a55424e4e","max_size":1,"min_size":1,"model_id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","model_name":"meta/llama-3.1-8b-instruct:bf16","name":"test-inference-deployment-basic","node_type_name":"L4","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","quantization":{"bits":16},"region":"fr-par","size":0,"status":"deploying","tags":[],"updated_at":"2025-05-15T08:55:56.629724Z"}' headers: Content-Length: - - "637" + - "670" Content-Security-Policy: - default-src 'none'; frame-ancestors 'none' Content-Type: - application/json Date: - - Thu, 24 Oct 2024 13:42:04 GMT + - Thu, 15 May 2025 09:02:10 GMT Server: - - Scaleway API Gateway (fr-par-2;edge02) + - Scaleway API Gateway (fr-par-1;edge03) Strict-Transport-Security: - max-age=63072000 X-Content-Type-Options: @@ -734,10 +734,10 @@ interactions: X-Frame-Options: - DENY X-Request-Id: - - 4649fa1b-d7fb-45d9-aa67-eadeeaca6e77 + - d63ffc83-7a0f-4229-b86a-d866ab5dfa70 status: 200 OK code: 200 - duration: 133.87825ms + duration: 121.971875ms - id: 15 request: proto: HTTP/1.1 @@ -753,8 +753,8 @@ interactions: form: {} headers: User-Agent: - - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.23.0; darwin; arm64) terraform-provider/develop terraform/terraform-tests - url: https://api.scaleway.com/inference/v1beta1/regions/fr-par/deployments/20043a05-db6e-4ef5-bec6-40089e6d13d0 + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/deployments/4ef1f64f-1d16-472c-a5d9-a40a55424e4e method: GET response: proto: HTTP/2.0 @@ -762,20 +762,20 @@ interactions: proto_minor: 0 transfer_encoding: [] trailer: {} - content_length: 637 + content_length: 670 uncompressed: false - body: '{"created_at":"2024-10-24T13:29:02.345060Z","endpoints":[{"disable_auth":false,"id":"323ee315-02b1-4ddd-acb2-c94a4ed78039","public_access":{},"url":"https://20043a05-db6e-4ef5-bec6-40089e6d13d0.ifr.fr-par.scaleway.com"}],"error_message":null,"id":"20043a05-db6e-4ef5-bec6-40089e6d13d0","max_size":1,"min_size":1,"model_id":"d33fb5fd-75ca-4dfb-8952-8af8b8b28be5","model_name":"meta/llama-3.1-8b-instruct:fp8","name":"test-inference-deployment-basic","node_type":"L4","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","region":"fr-par","size":0,"status":"deploying","tags":[],"updated_at":"2024-10-24T13:37:28.607817Z"}' + body: '{"created_at":"2025-05-15T08:52:50.831593Z","endpoints":[{"disable_auth":false,"id":"456167a1-2cda-4080-b529-16b7b9bb6e36","public_network":{},"url":"https://4ef1f64f-1d16-472c-a5d9-a40a55424e4e.ifr.fr-par.scaleway.com"}],"error_message":"","id":"4ef1f64f-1d16-472c-a5d9-a40a55424e4e","max_size":1,"min_size":1,"model_id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","model_name":"meta/llama-3.1-8b-instruct:bf16","name":"test-inference-deployment-basic","node_type_name":"L4","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","quantization":{"bits":16},"region":"fr-par","size":0,"status":"deploying","tags":[],"updated_at":"2025-05-15T08:55:56.629724Z"}' headers: Content-Length: - - "637" + - "670" Content-Security-Policy: - default-src 'none'; frame-ancestors 'none' Content-Type: - application/json Date: - - Thu, 24 Oct 2024 13:43:05 GMT + - Thu, 15 May 2025 09:03:10 GMT Server: - - Scaleway API Gateway (fr-par-2;edge02) + - Scaleway API Gateway (fr-par-1;edge02) Strict-Transport-Security: - max-age=63072000 X-Content-Type-Options: @@ -783,10 +783,10 @@ interactions: X-Frame-Options: - DENY X-Request-Id: - - 74a6a807-25e8-4484-b70b-73543f52707c + - cb9f39bc-d85e-44d9-b8e2-e79fb86849d9 status: 200 OK code: 200 - duration: 158.331917ms + duration: 96.822583ms - id: 16 request: proto: HTTP/1.1 @@ -802,8 +802,8 @@ interactions: form: {} headers: User-Agent: - - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.23.0; darwin; arm64) terraform-provider/develop terraform/terraform-tests - url: https://api.scaleway.com/inference/v1beta1/regions/fr-par/deployments/20043a05-db6e-4ef5-bec6-40089e6d13d0 + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/deployments/4ef1f64f-1d16-472c-a5d9-a40a55424e4e method: GET response: proto: HTTP/2.0 @@ -811,20 +811,20 @@ interactions: proto_minor: 0 transfer_encoding: [] trailer: {} - content_length: 637 + content_length: 670 uncompressed: false - body: '{"created_at":"2024-10-24T13:29:02.345060Z","endpoints":[{"disable_auth":false,"id":"323ee315-02b1-4ddd-acb2-c94a4ed78039","public_access":{},"url":"https://20043a05-db6e-4ef5-bec6-40089e6d13d0.ifr.fr-par.scaleway.com"}],"error_message":null,"id":"20043a05-db6e-4ef5-bec6-40089e6d13d0","max_size":1,"min_size":1,"model_id":"d33fb5fd-75ca-4dfb-8952-8af8b8b28be5","model_name":"meta/llama-3.1-8b-instruct:fp8","name":"test-inference-deployment-basic","node_type":"L4","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","region":"fr-par","size":0,"status":"deploying","tags":[],"updated_at":"2024-10-24T13:37:28.607817Z"}' + body: '{"created_at":"2025-05-15T08:52:50.831593Z","endpoints":[{"disable_auth":false,"id":"456167a1-2cda-4080-b529-16b7b9bb6e36","public_network":{},"url":"https://4ef1f64f-1d16-472c-a5d9-a40a55424e4e.ifr.fr-par.scaleway.com"}],"error_message":"","id":"4ef1f64f-1d16-472c-a5d9-a40a55424e4e","max_size":1,"min_size":1,"model_id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","model_name":"meta/llama-3.1-8b-instruct:bf16","name":"test-inference-deployment-basic","node_type_name":"L4","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","quantization":{"bits":16},"region":"fr-par","size":0,"status":"deploying","tags":[],"updated_at":"2025-05-15T08:55:56.629724Z"}' headers: Content-Length: - - "637" + - "670" Content-Security-Policy: - default-src 'none'; frame-ancestors 'none' Content-Type: - application/json Date: - - Thu, 24 Oct 2024 13:44:05 GMT + - Thu, 15 May 2025 09:04:10 GMT Server: - - Scaleway API Gateway (fr-par-2;edge02) + - Scaleway API Gateway (fr-par-1;edge03) Strict-Transport-Security: - max-age=63072000 X-Content-Type-Options: @@ -832,10 +832,10 @@ interactions: X-Frame-Options: - DENY X-Request-Id: - - 033e7084-1f68-41ac-bf94-5f698a6136d1 + - cf5c38e9-7ab2-4a6c-bbd6-e0c87dca7d9f status: 200 OK code: 200 - duration: 134.344292ms + duration: 75.637708ms - id: 17 request: proto: HTTP/1.1 @@ -851,8 +851,8 @@ interactions: form: {} headers: User-Agent: - - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.23.0; darwin; arm64) terraform-provider/develop terraform/terraform-tests - url: https://api.scaleway.com/inference/v1beta1/regions/fr-par/deployments/20043a05-db6e-4ef5-bec6-40089e6d13d0 + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/deployments/4ef1f64f-1d16-472c-a5d9-a40a55424e4e method: GET response: proto: HTTP/2.0 @@ -860,20 +860,20 @@ interactions: proto_minor: 0 transfer_encoding: [] trailer: {} - content_length: 637 + content_length: 670 uncompressed: false - body: '{"created_at":"2024-10-24T13:29:02.345060Z","endpoints":[{"disable_auth":false,"id":"323ee315-02b1-4ddd-acb2-c94a4ed78039","public_access":{},"url":"https://20043a05-db6e-4ef5-bec6-40089e6d13d0.ifr.fr-par.scaleway.com"}],"error_message":null,"id":"20043a05-db6e-4ef5-bec6-40089e6d13d0","max_size":1,"min_size":1,"model_id":"d33fb5fd-75ca-4dfb-8952-8af8b8b28be5","model_name":"meta/llama-3.1-8b-instruct:fp8","name":"test-inference-deployment-basic","node_type":"L4","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","region":"fr-par","size":0,"status":"deploying","tags":[],"updated_at":"2024-10-24T13:37:28.607817Z"}' + body: '{"created_at":"2025-05-15T08:52:50.831593Z","endpoints":[{"disable_auth":false,"id":"456167a1-2cda-4080-b529-16b7b9bb6e36","public_network":{},"url":"https://4ef1f64f-1d16-472c-a5d9-a40a55424e4e.ifr.fr-par.scaleway.com"}],"error_message":"","id":"4ef1f64f-1d16-472c-a5d9-a40a55424e4e","max_size":1,"min_size":1,"model_id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","model_name":"meta/llama-3.1-8b-instruct:bf16","name":"test-inference-deployment-basic","node_type_name":"L4","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","quantization":{"bits":16},"region":"fr-par","size":0,"status":"deploying","tags":[],"updated_at":"2025-05-15T08:55:56.629724Z"}' headers: Content-Length: - - "637" + - "670" Content-Security-Policy: - default-src 'none'; frame-ancestors 'none' Content-Type: - application/json Date: - - Thu, 24 Oct 2024 13:45:05 GMT + - Thu, 15 May 2025 09:05:10 GMT Server: - - Scaleway API Gateway (fr-par-2;edge02) + - Scaleway API Gateway (fr-par-1;edge02) Strict-Transport-Security: - max-age=63072000 X-Content-Type-Options: @@ -881,10 +881,10 @@ interactions: X-Frame-Options: - DENY X-Request-Id: - - 9c6e19ff-fd34-4315-ab65-6b46f542d5dd + - 754f3229-b9c6-4eac-bcdf-6ffe23f465cf status: 200 OK code: 200 - duration: 101.461459ms + duration: 123.70525ms - id: 18 request: proto: HTTP/1.1 @@ -900,8 +900,8 @@ interactions: form: {} headers: User-Agent: - - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.23.0; darwin; arm64) terraform-provider/develop terraform/terraform-tests - url: https://api.scaleway.com/inference/v1beta1/regions/fr-par/deployments/20043a05-db6e-4ef5-bec6-40089e6d13d0 + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/deployments/4ef1f64f-1d16-472c-a5d9-a40a55424e4e method: GET response: proto: HTTP/2.0 @@ -909,20 +909,20 @@ interactions: proto_minor: 0 transfer_encoding: [] trailer: {} - content_length: 633 + content_length: 666 uncompressed: false - body: '{"created_at":"2024-10-24T13:29:02.345060Z","endpoints":[{"disable_auth":false,"id":"323ee315-02b1-4ddd-acb2-c94a4ed78039","public_access":{},"url":"https://20043a05-db6e-4ef5-bec6-40089e6d13d0.ifr.fr-par.scaleway.com"}],"error_message":null,"id":"20043a05-db6e-4ef5-bec6-40089e6d13d0","max_size":1,"min_size":1,"model_id":"d33fb5fd-75ca-4dfb-8952-8af8b8b28be5","model_name":"meta/llama-3.1-8b-instruct:fp8","name":"test-inference-deployment-basic","node_type":"L4","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","region":"fr-par","size":1,"status":"ready","tags":[],"updated_at":"2024-10-24T13:45:54.400813Z"}' + body: '{"created_at":"2025-05-15T08:52:50.831593Z","endpoints":[{"disable_auth":false,"id":"456167a1-2cda-4080-b529-16b7b9bb6e36","public_network":{},"url":"https://4ef1f64f-1d16-472c-a5d9-a40a55424e4e.ifr.fr-par.scaleway.com"}],"error_message":"","id":"4ef1f64f-1d16-472c-a5d9-a40a55424e4e","max_size":1,"min_size":1,"model_id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","model_name":"meta/llama-3.1-8b-instruct:bf16","name":"test-inference-deployment-basic","node_type_name":"L4","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","quantization":{"bits":16},"region":"fr-par","size":1,"status":"ready","tags":[],"updated_at":"2025-05-15T09:05:15.085852Z"}' headers: Content-Length: - - "633" + - "666" Content-Security-Policy: - default-src 'none'; frame-ancestors 'none' Content-Type: - application/json Date: - - Thu, 24 Oct 2024 13:46:05 GMT + - Thu, 15 May 2025 09:06:11 GMT Server: - - Scaleway API Gateway (fr-par-2;edge03) + - Scaleway API Gateway (fr-par-1;edge01) Strict-Transport-Security: - max-age=63072000 X-Content-Type-Options: @@ -930,10 +930,10 @@ interactions: X-Frame-Options: - DENY X-Request-Id: - - 8ecf08ee-7926-4b03-b554-24c2ec7ac9de + - f6453bcf-1be7-47d2-8fe9-d81a90c7a3a6 status: 200 OK code: 200 - duration: 130.801834ms + duration: 110.141542ms - id: 19 request: proto: HTTP/1.1 @@ -949,8 +949,8 @@ interactions: form: {} headers: User-Agent: - - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.23.0; darwin; arm64) terraform-provider/develop terraform/terraform-tests - url: https://api.scaleway.com/inference/v1beta1/regions/fr-par/deployments/20043a05-db6e-4ef5-bec6-40089e6d13d0 + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/deployments/4ef1f64f-1d16-472c-a5d9-a40a55424e4e method: GET response: proto: HTTP/2.0 @@ -958,20 +958,20 @@ interactions: proto_minor: 0 transfer_encoding: [] trailer: {} - content_length: 633 + content_length: 666 uncompressed: false - body: '{"created_at":"2024-10-24T13:29:02.345060Z","endpoints":[{"disable_auth":false,"id":"323ee315-02b1-4ddd-acb2-c94a4ed78039","public_access":{},"url":"https://20043a05-db6e-4ef5-bec6-40089e6d13d0.ifr.fr-par.scaleway.com"}],"error_message":null,"id":"20043a05-db6e-4ef5-bec6-40089e6d13d0","max_size":1,"min_size":1,"model_id":"d33fb5fd-75ca-4dfb-8952-8af8b8b28be5","model_name":"meta/llama-3.1-8b-instruct:fp8","name":"test-inference-deployment-basic","node_type":"L4","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","region":"fr-par","size":1,"status":"ready","tags":[],"updated_at":"2024-10-24T13:45:54.400813Z"}' + body: '{"created_at":"2025-05-15T08:52:50.831593Z","endpoints":[{"disable_auth":false,"id":"456167a1-2cda-4080-b529-16b7b9bb6e36","public_network":{},"url":"https://4ef1f64f-1d16-472c-a5d9-a40a55424e4e.ifr.fr-par.scaleway.com"}],"error_message":"","id":"4ef1f64f-1d16-472c-a5d9-a40a55424e4e","max_size":1,"min_size":1,"model_id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","model_name":"meta/llama-3.1-8b-instruct:bf16","name":"test-inference-deployment-basic","node_type_name":"L4","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","quantization":{"bits":16},"region":"fr-par","size":1,"status":"ready","tags":[],"updated_at":"2025-05-15T09:05:15.085852Z"}' headers: Content-Length: - - "633" + - "666" Content-Security-Policy: - default-src 'none'; frame-ancestors 'none' Content-Type: - application/json Date: - - Thu, 24 Oct 2024 13:46:05 GMT + - Thu, 15 May 2025 09:06:11 GMT Server: - - Scaleway API Gateway (fr-par-2;edge03) + - Scaleway API Gateway (fr-par-1;edge01) Strict-Transport-Security: - max-age=63072000 X-Content-Type-Options: @@ -979,10 +979,10 @@ interactions: X-Frame-Options: - DENY X-Request-Id: - - 7aa048ee-4de5-4dad-9f47-0181b4fd62cd + - 2edddd96-4b35-42f0-81da-7cf77c32cade status: 200 OK code: 200 - duration: 61.764917ms + duration: 57.876084ms - id: 20 request: proto: HTTP/1.1 @@ -998,8 +998,8 @@ interactions: form: {} headers: User-Agent: - - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.23.0; darwin; arm64) terraform-provider/develop terraform/terraform-tests - url: https://api.scaleway.com/inference/v1beta1/regions/fr-par/deployments/20043a05-db6e-4ef5-bec6-40089e6d13d0 + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/deployments/4ef1f64f-1d16-472c-a5d9-a40a55424e4e method: GET response: proto: HTTP/2.0 @@ -1007,20 +1007,20 @@ interactions: proto_minor: 0 transfer_encoding: [] trailer: {} - content_length: 633 + content_length: 666 uncompressed: false - body: '{"created_at":"2024-10-24T13:29:02.345060Z","endpoints":[{"disable_auth":false,"id":"323ee315-02b1-4ddd-acb2-c94a4ed78039","public_access":{},"url":"https://20043a05-db6e-4ef5-bec6-40089e6d13d0.ifr.fr-par.scaleway.com"}],"error_message":null,"id":"20043a05-db6e-4ef5-bec6-40089e6d13d0","max_size":1,"min_size":1,"model_id":"d33fb5fd-75ca-4dfb-8952-8af8b8b28be5","model_name":"meta/llama-3.1-8b-instruct:fp8","name":"test-inference-deployment-basic","node_type":"L4","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","region":"fr-par","size":1,"status":"ready","tags":[],"updated_at":"2024-10-24T13:45:54.400813Z"}' + body: '{"created_at":"2025-05-15T08:52:50.831593Z","endpoints":[{"disable_auth":false,"id":"456167a1-2cda-4080-b529-16b7b9bb6e36","public_network":{},"url":"https://4ef1f64f-1d16-472c-a5d9-a40a55424e4e.ifr.fr-par.scaleway.com"}],"error_message":"","id":"4ef1f64f-1d16-472c-a5d9-a40a55424e4e","max_size":1,"min_size":1,"model_id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","model_name":"meta/llama-3.1-8b-instruct:bf16","name":"test-inference-deployment-basic","node_type_name":"L4","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","quantization":{"bits":16},"region":"fr-par","size":1,"status":"ready","tags":[],"updated_at":"2025-05-15T09:05:15.085852Z"}' headers: Content-Length: - - "633" + - "666" Content-Security-Policy: - default-src 'none'; frame-ancestors 'none' Content-Type: - application/json Date: - - Thu, 24 Oct 2024 13:46:05 GMT + - Thu, 15 May 2025 09:06:11 GMT Server: - - Scaleway API Gateway (fr-par-2;edge03) + - Scaleway API Gateway (fr-par-1;edge01) Strict-Transport-Security: - max-age=63072000 X-Content-Type-Options: @@ -1028,10 +1028,10 @@ interactions: X-Frame-Options: - DENY X-Request-Id: - - d70eb0de-97ad-4fc3-8ca8-ad6dd958028a + - 865320e9-3813-4766-8587-cf137cd1a1e3 status: 200 OK code: 200 - duration: 64.520958ms + duration: 48.350334ms - id: 21 request: proto: HTTP/1.1 @@ -1047,8 +1047,8 @@ interactions: form: {} headers: User-Agent: - - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.23.0; darwin; arm64) terraform-provider/develop terraform/terraform-tests - url: https://api.scaleway.com/inference/v1beta1/regions/fr-par/deployments/20043a05-db6e-4ef5-bec6-40089e6d13d0 + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models?order_by=display_rank_asc&page_size=1000 method: GET response: proto: HTTP/2.0 @@ -1056,20 +1056,20 @@ interactions: proto_minor: 0 transfer_encoding: [] trailer: {} - content_length: 633 + content_length: 50297 uncompressed: false - body: '{"created_at":"2024-10-24T13:29:02.345060Z","endpoints":[{"disable_auth":false,"id":"323ee315-02b1-4ddd-acb2-c94a4ed78039","public_access":{},"url":"https://20043a05-db6e-4ef5-bec6-40089e6d13d0.ifr.fr-par.scaleway.com"}],"error_message":null,"id":"20043a05-db6e-4ef5-bec6-40089e6d13d0","max_size":1,"min_size":1,"model_id":"d33fb5fd-75ca-4dfb-8952-8af8b8b28be5","model_name":"meta/llama-3.1-8b-instruct:fp8","name":"test-inference-deployment-basic","node_type":"L4","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","region":"fr-par","size":1,"status":"ready","tags":[],"updated_at":"2024-10-24T13:45:54.400813Z"}' + body: '{"models":[{"created_at":"2025-04-04T13:11:00.900800Z","description":"Multimodal model for text generation an image understanding supporting up to 128k context window.","has_eula":false,"id":"5c40e594-d40d-452a-991e-5082225155e1","name":"google/gemma-3-27b-it:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":80000,"quantization_bits":8},{"allowed":true,"max_context_size":40000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":54904369444,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-05-09T16:45:10.128397Z"},{"created_at":"2025-04-28T18:48:01.860457Z","description":"","has_eula":false,"id":"a19296a6-4cef-447a-99bc-8f6c3ee30df4","name":"TestAccCustomModel_Basic","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725346,"status":"ready","tags":["custom"],"updated_at":null},{"created_at":"2025-04-30T13:29:24.004776Z","description":"","has_eula":false,"id":"eabb7f74-24a1-4173-911b-26924c1be619","name":"TestAccCustomModel_DeployModelOnServer","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725346,"status":"ready","tags":["custom"],"updated_at":null},{"created_at":"2025-03-27T16:48:11.513249Z","description":"Efficient 8B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"a51ce791-9546-4c28-aa44-24850d84778b","name":"deepseek/deepseek-r1-distill-llama-8b:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":90000,"quantization_bits":8},{"allowed":true,"max_context_size":39000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":16070465043,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:49.797687Z"},{"created_at":"2025-03-27T16:48:14.190404Z","description":"Efficient 8B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"b8dc7f2d-95d6-48ae-a076-a99e76b76e1f","name":"deepseek/deepseek-r1-distill-llama-8b:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":90000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9093169346,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-04-14T09:05:26.354374Z"},{"created_at":"2025-04-04T15:51:25.414165Z","description":"Highly efficient multimodal model with vision and chat capabilities supporting up to 128k context window.","has_eula":false,"id":"efcf0b60-999a-4c1e-981e-b68a428c4702","name":"mistral/mistral-small-3.1-24b-instruct-2503:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":75000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":96077777613,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-05-09T13:51:56.986698Z"},{"created_at":"2025-04-04T15:51:27.773573Z","description":"Highly efficient multimodal model with vision and chat capabilities supporting up to 128k context window.","has_eula":false,"id":"906c0feb-0eb0-4037-94aa-afd4d845b94f","name":"mistral/mistral-small-3.1-24b-instruct-2503:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":96077777613,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-04-08T14:26:24.388332Z"},{"created_at":"2025-03-27T16:47:41.108667Z","description":"Efficient 70B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"014919c1-00cc-43c2-98f2-4ffd263e6f33","name":"deepseek/deepseek-r1-distill-llama-70b:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":56960,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":141117442445,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:48.796286Z"},{"created_at":"2025-03-27T16:47:42.762505Z","description":"Efficient 70B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"bbfeeb62-2428-415d-ad0d-537af9aff946","name":"deepseek/deepseek-r1-distill-llama-70b:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72679175005,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-06T15:17:35.683881Z"},{"created_at":"2025-03-27T16:48:40.045689Z","description":"Efficient 8B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","name":"meta/llama-3.1-8b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":93000,"quantization_bits":8},{"allowed":true,"max_context_size":40000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":32132582323,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:53.288962Z"},{"created_at":"2025-03-27T16:50:12.267422Z","description":"Highly advanced coding model with a 128k context window, excelling in code generation, repairing, and reasoning.","has_eula":false,"id":"a3205fd3-ac4a-47cf-9074-82166d214bac","name":"qwen/qwen2.5-coder-32b-instruct:int8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":35080374444,"status":"ready","tags":["instruct","chat","code","featured"],"updated_at":"2025-05-09T13:52:04.105122Z"},{"created_at":"2025-03-27T16:49:51.968791Z","description":"A large language model customized by NVIDIA in order to improve the helpfulness of generated responses.","has_eula":true,"id":"4e6c9cea-57a1-4215-8a11-24ab51b9d1c8","name":"nvidia/llama-3.1-nemotron-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72679219797,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:01.331740Z"},{"created_at":"2025-05-13T12:13:50.994Z","description":"Best-in-class vision language model by research lab Allen Institute for AI. Available under the Apache 2.0 license.","has_eula":false,"id":"864e7786-4b86-4f4b-8534-25da1fc46a74","name":"allenai/molmo-72b-0924:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":45000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":293245208984,"status":"ready","tags":["instruct","chat","vision"],"updated_at":"2025-05-13T13:34:01.318606Z"},{"created_at":"2025-03-27T16:49:37.342054Z","description":"Efficient 8B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"775cbef7-6527-415d-9e6b-39d574cf39ec","name":"meta/llama-3.1-8b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":93000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9090504772,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:00.700210Z"},{"created_at":"2025-03-27T16:48:15.818596Z","description":"First generation of 8B-param model by Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"bc10c88e-4d18-4854-8250-77aff4763eca","name":"meta/llama-3-8b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":32132572668,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:51.995701Z"},{"created_at":"2025-03-27T16:49:33.359621Z","description":"First generation of 8B-param model by Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"b5a94646-9390-4ced-acba-9b078e63a794","name":"meta/llama-3-8b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9090489355,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:59.473065Z"},{"created_at":"2025-03-27T16:48:42.138410Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"126ad0c4-cfde-4b05-924f-f04c6343ccb2","name":"meta/llama-3.3-70b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":60000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":282254830887,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:53.868968Z"},{"created_at":"2025-03-27T16:50:09.605796Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"1678195b-5af6-4c27-8fdc-16aa84c68c34","name":"meta/llama-3.3-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72687332869,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-07T10:19:23.153808Z"},{"created_at":"2025-03-27T16:48:35.312110Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"7cbe0417-172a-4601-8940-3b71e4d0c8cb","name":"meta/llama-3.1-70b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":60000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":282246710880,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:52.677798Z"},{"created_at":"2025-03-27T16:49:35.836269Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"03150ad5-de83-4c74-afe0-3eeeb67d71a3","name":"meta/llama-3.1-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72665889083,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:52:00.003235Z"},{"created_at":"2025-03-27T16:49:31.715567Z","description":"First generation of 70B-param model from Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"b0c5a8fe-5c9e-49cc-942a-6c4ebaadde67","name":"meta/llama-3-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72665872089,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:58.899458Z"},{"created_at":"2025-03-27T16:49:17.458153Z","description":"A state-of-the-art 24B model with a 32k context window, designed for multilingual chat and agentic applications.","has_eula":false,"id":"1e555754-47fb-4dba-a82c-66f3f1fa9294","name":"mistral/mistral-small-24b-instruct-2501:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":94321843451,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:55.176379Z"},{"created_at":"2025-03-27T16:50:07.300436Z","description":"A state-of-the-art 24B model with a 32k context window, designed for multilingual chat and agentic applications.","has_eula":false,"id":"7bb28f2c-3719-4d71-9bcb-17db392a7118","name":"mistral/mistral-small-24b-instruct-2501:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":20000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":24938988520,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:55.726891Z"},{"created_at":"2025-04-15T10:51:31.291792Z","description":"Vision language model able to analyze images and offer insights without compromising on instruction following.","has_eula":false,"id":"1999f4f5-f038-4039-94ba-11a851917df5","name":"mistral/pixtral-12b-2409:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":50000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":25384844091,"status":"ready","tags":["vision","chat","featured"],"updated_at":"2025-05-09T13:51:58.281971Z"},{"created_at":"2025-03-27T16:49:14.593008Z","description":"A very efficient language model by Mistral AI, optimized for instruction-following tasks. Available under the Apache 2.0 license.","has_eula":false,"id":"bf6be106-c53d-4b93-bb33-1a4bd4d0b573","name":"mistral/mistral-7b-instruct-v0.3:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":28995471292,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:54.595513Z"},{"created_at":"2025-03-27T16:50:06.301430Z","description":"A state-of-the-art 12B model with a 128k context window, designed for multilingual chat applications.","has_eula":false,"id":"07681325-c743-4796-8b7d-1f0b35d4a8e0","name":"mistral/mistral-nemo-instruct-2407:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":13605604415,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-06T15:17:43.837103Z"},{"created_at":"2025-03-27T16:50:08.291821Z","description":"A high-quality Mixture of Experts (MoE) model with open weights by Mistral AI, licensed under Apache 2.0.","has_eula":false,"id":"1aa87d1e-9996-4c54-aa1c-5b900bf59fd4","name":"mistral/mixtral-8x7b-instruct-v0.1:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":46970879717,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:02.960404Z"},{"created_at":"2025-03-27T16:49:19.120192Z","description":"A high-quality Mixture of Experts (MoE) model with open weights by Mistral AI, licensed under Apache 2.0.","has_eula":false,"id":"11ed6599-f460-4e41-b266-87bc9a108fdd","name":"mistral/mixtral-8x7b-instruct-v0.1:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":190483875108,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:57.661626Z"},{"created_at":"2025-03-27T16:46:54.314987Z","description":"An embedding model spanning a broad range of languages and state-of-the-art results on multilingual benchmarks.","has_eula":true,"id":"d58efec4-b667-48e2-8ad8-bcc26c175ae6","name":"baai/bge-multilingual-gemma2:fp32","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]}]}],"parameter_size_bits":32,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":36989461520,"status":"ready","tags":["embedding","featured"],"updated_at":"2025-03-27T17:40:09.534954Z"}],"total_count":29}' headers: Content-Length: - - "633" + - "50297" Content-Security-Policy: - default-src 'none'; frame-ancestors 'none' Content-Type: - application/json Date: - - Thu, 24 Oct 2024 13:46:05 GMT + - Thu, 15 May 2025 09:06:12 GMT Server: - - Scaleway API Gateway (fr-par-2;edge03) + - Scaleway API Gateway (fr-par-1;edge01) Strict-Transport-Security: - max-age=63072000 X-Content-Type-Options: @@ -1077,10 +1077,10 @@ interactions: X-Frame-Options: - DENY X-Request-Id: - - 802fff30-5496-417e-b3b4-978cb481edd1 + - b4f0b99a-846e-4b10-bbf8-93066e61df6a status: 200 OK code: 200 - duration: 63.167042ms + duration: 588.501084ms - id: 22 request: proto: HTTP/1.1 @@ -1096,8 +1096,8 @@ interactions: form: {} headers: User-Agent: - - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.23.0; darwin; arm64) terraform-provider/develop terraform/terraform-tests - url: https://api.scaleway.com/inference/v1beta1/regions/fr-par/deployments/20043a05-db6e-4ef5-bec6-40089e6d13d0 + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models/7205dbce-cc80-4b2a-bb7f-3fd3a804afc3 method: GET response: proto: HTTP/2.0 @@ -1105,20 +1105,20 @@ interactions: proto_minor: 0 transfer_encoding: [] trailer: {} - content_length: 633 + content_length: 1723 uncompressed: false - body: '{"created_at":"2024-10-24T13:29:02.345060Z","endpoints":[{"disable_auth":false,"id":"323ee315-02b1-4ddd-acb2-c94a4ed78039","public_access":{},"url":"https://20043a05-db6e-4ef5-bec6-40089e6d13d0.ifr.fr-par.scaleway.com"}],"error_message":null,"id":"20043a05-db6e-4ef5-bec6-40089e6d13d0","max_size":1,"min_size":1,"model_id":"d33fb5fd-75ca-4dfb-8952-8af8b8b28be5","model_name":"meta/llama-3.1-8b-instruct:fp8","name":"test-inference-deployment-basic","node_type":"L4","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","region":"fr-par","size":1,"status":"ready","tags":[],"updated_at":"2024-10-24T13:45:54.400813Z"}' + body: '{"created_at":"2025-03-27T16:48:40.045689Z","description":"Efficient 8B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","name":"meta/llama-3.1-8b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":93000,"quantization_bits":8},{"allowed":true,"max_context_size":40000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":32132582323,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:53.288962Z"}' headers: Content-Length: - - "633" + - "1723" Content-Security-Policy: - default-src 'none'; frame-ancestors 'none' Content-Type: - application/json Date: - - Thu, 24 Oct 2024 13:46:06 GMT + - Thu, 15 May 2025 09:06:12 GMT Server: - - Scaleway API Gateway (fr-par-2;edge03) + - Scaleway API Gateway (fr-par-1;edge01) Strict-Transport-Security: - max-age=63072000 X-Content-Type-Options: @@ -1126,10 +1126,10 @@ interactions: X-Frame-Options: - DENY X-Request-Id: - - 4c13384e-44b7-49ac-aab1-6a2b4f61e73b + - f0cd64cc-c61a-4ba9-8859-cd01a925f125 status: 200 OK code: 200 - duration: 65.279292ms + duration: 136.611542ms - id: 23 request: proto: HTTP/1.1 @@ -1145,29 +1145,29 @@ interactions: form: {} headers: User-Agent: - - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.23.0; darwin; arm64) terraform-provider/develop terraform/terraform-tests - url: https://api.scaleway.com/inference/v1beta1/regions/fr-par/deployments/20043a05-db6e-4ef5-bec6-40089e6d13d0 - method: DELETE + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models?order_by=display_rank_asc&page_size=1000 + method: GET response: proto: HTTP/2.0 proto_major: 2 proto_minor: 0 transfer_encoding: [] trailer: {} - content_length: 636 + content_length: 50297 uncompressed: false - body: '{"created_at":"2024-10-24T13:29:02.345060Z","endpoints":[{"disable_auth":false,"id":"323ee315-02b1-4ddd-acb2-c94a4ed78039","public_access":{},"url":"https://20043a05-db6e-4ef5-bec6-40089e6d13d0.ifr.fr-par.scaleway.com"}],"error_message":null,"id":"20043a05-db6e-4ef5-bec6-40089e6d13d0","max_size":1,"min_size":1,"model_id":"d33fb5fd-75ca-4dfb-8952-8af8b8b28be5","model_name":"meta/llama-3.1-8b-instruct:fp8","name":"test-inference-deployment-basic","node_type":"L4","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","region":"fr-par","size":1,"status":"deleting","tags":[],"updated_at":"2024-10-24T13:45:54.400813Z"}' + body: '{"models":[{"created_at":"2025-04-04T13:11:00.900800Z","description":"Multimodal model for text generation an image understanding supporting up to 128k context window.","has_eula":false,"id":"5c40e594-d40d-452a-991e-5082225155e1","name":"google/gemma-3-27b-it:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":80000,"quantization_bits":8},{"allowed":true,"max_context_size":40000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":54904369444,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-05-09T16:45:10.128397Z"},{"created_at":"2025-04-28T18:48:01.860457Z","description":"","has_eula":false,"id":"a19296a6-4cef-447a-99bc-8f6c3ee30df4","name":"TestAccCustomModel_Basic","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725346,"status":"ready","tags":["custom"],"updated_at":null},{"created_at":"2025-04-30T13:29:24.004776Z","description":"","has_eula":false,"id":"eabb7f74-24a1-4173-911b-26924c1be619","name":"TestAccCustomModel_DeployModelOnServer","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725346,"status":"ready","tags":["custom"],"updated_at":null},{"created_at":"2025-03-27T16:48:11.513249Z","description":"Efficient 8B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"a51ce791-9546-4c28-aa44-24850d84778b","name":"deepseek/deepseek-r1-distill-llama-8b:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":90000,"quantization_bits":8},{"allowed":true,"max_context_size":39000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":16070465043,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:49.797687Z"},{"created_at":"2025-03-27T16:48:14.190404Z","description":"Efficient 8B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"b8dc7f2d-95d6-48ae-a076-a99e76b76e1f","name":"deepseek/deepseek-r1-distill-llama-8b:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":90000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9093169346,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-04-14T09:05:26.354374Z"},{"created_at":"2025-04-04T15:51:25.414165Z","description":"Highly efficient multimodal model with vision and chat capabilities supporting up to 128k context window.","has_eula":false,"id":"efcf0b60-999a-4c1e-981e-b68a428c4702","name":"mistral/mistral-small-3.1-24b-instruct-2503:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":75000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":96077777613,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-05-09T13:51:56.986698Z"},{"created_at":"2025-04-04T15:51:27.773573Z","description":"Highly efficient multimodal model with vision and chat capabilities supporting up to 128k context window.","has_eula":false,"id":"906c0feb-0eb0-4037-94aa-afd4d845b94f","name":"mistral/mistral-small-3.1-24b-instruct-2503:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":96077777613,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-04-08T14:26:24.388332Z"},{"created_at":"2025-03-27T16:47:41.108667Z","description":"Efficient 70B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"014919c1-00cc-43c2-98f2-4ffd263e6f33","name":"deepseek/deepseek-r1-distill-llama-70b:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":56960,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":141117442445,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:48.796286Z"},{"created_at":"2025-03-27T16:47:42.762505Z","description":"Efficient 70B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"bbfeeb62-2428-415d-ad0d-537af9aff946","name":"deepseek/deepseek-r1-distill-llama-70b:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72679175005,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-06T15:17:35.683881Z"},{"created_at":"2025-03-27T16:48:40.045689Z","description":"Efficient 8B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","name":"meta/llama-3.1-8b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":93000,"quantization_bits":8},{"allowed":true,"max_context_size":40000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":32132582323,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:53.288962Z"},{"created_at":"2025-03-27T16:50:12.267422Z","description":"Highly advanced coding model with a 128k context window, excelling in code generation, repairing, and reasoning.","has_eula":false,"id":"a3205fd3-ac4a-47cf-9074-82166d214bac","name":"qwen/qwen2.5-coder-32b-instruct:int8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":35080374444,"status":"ready","tags":["instruct","chat","code","featured"],"updated_at":"2025-05-09T13:52:04.105122Z"},{"created_at":"2025-03-27T16:49:51.968791Z","description":"A large language model customized by NVIDIA in order to improve the helpfulness of generated responses.","has_eula":true,"id":"4e6c9cea-57a1-4215-8a11-24ab51b9d1c8","name":"nvidia/llama-3.1-nemotron-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72679219797,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:01.331740Z"},{"created_at":"2025-05-13T12:13:50.994Z","description":"Best-in-class vision language model by research lab Allen Institute for AI. Available under the Apache 2.0 license.","has_eula":false,"id":"864e7786-4b86-4f4b-8534-25da1fc46a74","name":"allenai/molmo-72b-0924:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":45000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":293245208984,"status":"ready","tags":["instruct","chat","vision"],"updated_at":"2025-05-13T13:34:01.318606Z"},{"created_at":"2025-03-27T16:49:37.342054Z","description":"Efficient 8B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"775cbef7-6527-415d-9e6b-39d574cf39ec","name":"meta/llama-3.1-8b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":93000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9090504772,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:00.700210Z"},{"created_at":"2025-03-27T16:48:15.818596Z","description":"First generation of 8B-param model by Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"bc10c88e-4d18-4854-8250-77aff4763eca","name":"meta/llama-3-8b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":32132572668,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:51.995701Z"},{"created_at":"2025-03-27T16:49:33.359621Z","description":"First generation of 8B-param model by Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"b5a94646-9390-4ced-acba-9b078e63a794","name":"meta/llama-3-8b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9090489355,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:59.473065Z"},{"created_at":"2025-03-27T16:48:42.138410Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"126ad0c4-cfde-4b05-924f-f04c6343ccb2","name":"meta/llama-3.3-70b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":60000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":282254830887,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:53.868968Z"},{"created_at":"2025-03-27T16:50:09.605796Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"1678195b-5af6-4c27-8fdc-16aa84c68c34","name":"meta/llama-3.3-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72687332869,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-07T10:19:23.153808Z"},{"created_at":"2025-03-27T16:48:35.312110Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"7cbe0417-172a-4601-8940-3b71e4d0c8cb","name":"meta/llama-3.1-70b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":60000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":282246710880,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:52.677798Z"},{"created_at":"2025-03-27T16:49:35.836269Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"03150ad5-de83-4c74-afe0-3eeeb67d71a3","name":"meta/llama-3.1-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72665889083,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:52:00.003235Z"},{"created_at":"2025-03-27T16:49:31.715567Z","description":"First generation of 70B-param model from Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"b0c5a8fe-5c9e-49cc-942a-6c4ebaadde67","name":"meta/llama-3-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72665872089,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:58.899458Z"},{"created_at":"2025-03-27T16:49:17.458153Z","description":"A state-of-the-art 24B model with a 32k context window, designed for multilingual chat and agentic applications.","has_eula":false,"id":"1e555754-47fb-4dba-a82c-66f3f1fa9294","name":"mistral/mistral-small-24b-instruct-2501:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":94321843451,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:55.176379Z"},{"created_at":"2025-03-27T16:50:07.300436Z","description":"A state-of-the-art 24B model with a 32k context window, designed for multilingual chat and agentic applications.","has_eula":false,"id":"7bb28f2c-3719-4d71-9bcb-17db392a7118","name":"mistral/mistral-small-24b-instruct-2501:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":20000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":24938988520,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:55.726891Z"},{"created_at":"2025-04-15T10:51:31.291792Z","description":"Vision language model able to analyze images and offer insights without compromising on instruction following.","has_eula":false,"id":"1999f4f5-f038-4039-94ba-11a851917df5","name":"mistral/pixtral-12b-2409:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":50000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":25384844091,"status":"ready","tags":["vision","chat","featured"],"updated_at":"2025-05-09T13:51:58.281971Z"},{"created_at":"2025-03-27T16:49:14.593008Z","description":"A very efficient language model by Mistral AI, optimized for instruction-following tasks. Available under the Apache 2.0 license.","has_eula":false,"id":"bf6be106-c53d-4b93-bb33-1a4bd4d0b573","name":"mistral/mistral-7b-instruct-v0.3:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":28995471292,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:54.595513Z"},{"created_at":"2025-03-27T16:50:06.301430Z","description":"A state-of-the-art 12B model with a 128k context window, designed for multilingual chat applications.","has_eula":false,"id":"07681325-c743-4796-8b7d-1f0b35d4a8e0","name":"mistral/mistral-nemo-instruct-2407:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":13605604415,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-06T15:17:43.837103Z"},{"created_at":"2025-03-27T16:50:08.291821Z","description":"A high-quality Mixture of Experts (MoE) model with open weights by Mistral AI, licensed under Apache 2.0.","has_eula":false,"id":"1aa87d1e-9996-4c54-aa1c-5b900bf59fd4","name":"mistral/mixtral-8x7b-instruct-v0.1:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":46970879717,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:02.960404Z"},{"created_at":"2025-03-27T16:49:19.120192Z","description":"A high-quality Mixture of Experts (MoE) model with open weights by Mistral AI, licensed under Apache 2.0.","has_eula":false,"id":"11ed6599-f460-4e41-b266-87bc9a108fdd","name":"mistral/mixtral-8x7b-instruct-v0.1:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":190483875108,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:57.661626Z"},{"created_at":"2025-03-27T16:46:54.314987Z","description":"An embedding model spanning a broad range of languages and state-of-the-art results on multilingual benchmarks.","has_eula":true,"id":"d58efec4-b667-48e2-8ad8-bcc26c175ae6","name":"baai/bge-multilingual-gemma2:fp32","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]}]}],"parameter_size_bits":32,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":36989461520,"status":"ready","tags":["embedding","featured"],"updated_at":"2025-03-27T17:40:09.534954Z"}],"total_count":29}' headers: Content-Length: - - "636" + - "50297" Content-Security-Policy: - default-src 'none'; frame-ancestors 'none' Content-Type: - application/json Date: - - Thu, 24 Oct 2024 13:46:06 GMT + - Thu, 15 May 2025 09:06:13 GMT Server: - - Scaleway API Gateway (fr-par-2;edge03) + - Scaleway API Gateway (fr-par-1;edge01) Strict-Transport-Security: - max-age=63072000 X-Content-Type-Options: @@ -1175,10 +1175,10 @@ interactions: X-Frame-Options: - DENY X-Request-Id: - - 4168cc76-45f4-4964-a9bc-53eebcc0a96a + - 009ab078-d1c5-4468-97db-a6ff0a1b47ec status: 200 OK code: 200 - duration: 164.292334ms + duration: 221.551333ms - id: 24 request: proto: HTTP/1.1 @@ -1194,8 +1194,8 @@ interactions: form: {} headers: User-Agent: - - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.23.0; darwin; arm64) terraform-provider/develop terraform/terraform-tests - url: https://api.scaleway.com/inference/v1beta1/regions/fr-par/deployments/20043a05-db6e-4ef5-bec6-40089e6d13d0 + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models/7205dbce-cc80-4b2a-bb7f-3fd3a804afc3 method: GET response: proto: HTTP/2.0 @@ -1203,20 +1203,20 @@ interactions: proto_minor: 0 transfer_encoding: [] trailer: {} - content_length: 636 + content_length: 1723 uncompressed: false - body: '{"created_at":"2024-10-24T13:29:02.345060Z","endpoints":[{"disable_auth":false,"id":"323ee315-02b1-4ddd-acb2-c94a4ed78039","public_access":{},"url":"https://20043a05-db6e-4ef5-bec6-40089e6d13d0.ifr.fr-par.scaleway.com"}],"error_message":null,"id":"20043a05-db6e-4ef5-bec6-40089e6d13d0","max_size":1,"min_size":1,"model_id":"d33fb5fd-75ca-4dfb-8952-8af8b8b28be5","model_name":"meta/llama-3.1-8b-instruct:fp8","name":"test-inference-deployment-basic","node_type":"L4","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","region":"fr-par","size":1,"status":"deleting","tags":[],"updated_at":"2024-10-24T13:45:54.400813Z"}' + body: '{"created_at":"2025-03-27T16:48:40.045689Z","description":"Efficient 8B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","name":"meta/llama-3.1-8b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":93000,"quantization_bits":8},{"allowed":true,"max_context_size":40000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":32132582323,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:53.288962Z"}' headers: Content-Length: - - "636" + - "1723" Content-Security-Policy: - default-src 'none'; frame-ancestors 'none' Content-Type: - application/json Date: - - Thu, 24 Oct 2024 13:46:06 GMT + - Thu, 15 May 2025 09:06:13 GMT Server: - - Scaleway API Gateway (fr-par-2;edge03) + - Scaleway API Gateway (fr-par-1;edge01) Strict-Transport-Security: - max-age=63072000 X-Content-Type-Options: @@ -1224,10 +1224,10 @@ interactions: X-Frame-Options: - DENY X-Request-Id: - - 0f140049-ef2d-40e6-8904-e4902dd505c8 + - 09dbaddb-f519-481b-9268-2a0d573f474b status: 200 OK code: 200 - duration: 71.520792ms + duration: 53.621292ms - id: 25 request: proto: HTTP/1.1 @@ -1243,8 +1243,302 @@ interactions: form: {} headers: User-Agent: - - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.23.0; darwin; arm64) terraform-provider/develop terraform/terraform-tests - url: https://api.scaleway.com/inference/v1beta1/regions/fr-par/deployments/20043a05-db6e-4ef5-bec6-40089e6d13d0 + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/deployments/4ef1f64f-1d16-472c-a5d9-a40a55424e4e + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 666 + uncompressed: false + body: '{"created_at":"2025-05-15T08:52:50.831593Z","endpoints":[{"disable_auth":false,"id":"456167a1-2cda-4080-b529-16b7b9bb6e36","public_network":{},"url":"https://4ef1f64f-1d16-472c-a5d9-a40a55424e4e.ifr.fr-par.scaleway.com"}],"error_message":"","id":"4ef1f64f-1d16-472c-a5d9-a40a55424e4e","max_size":1,"min_size":1,"model_id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","model_name":"meta/llama-3.1-8b-instruct:bf16","name":"test-inference-deployment-basic","node_type_name":"L4","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","quantization":{"bits":16},"region":"fr-par","size":1,"status":"ready","tags":[],"updated_at":"2025-05-15T09:05:15.085852Z"}' + headers: + Content-Length: + - "666" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Thu, 15 May 2025 09:06:13 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge01) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - 6f1fe9d6-410b-460e-8f60-4824e36cecec + status: 200 OK + code: 200 + duration: 54.081292ms + - id: 26 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models?order_by=display_rank_asc&page_size=1000 + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 50297 + uncompressed: false + body: '{"models":[{"created_at":"2025-04-04T13:11:00.900800Z","description":"Multimodal model for text generation an image understanding supporting up to 128k context window.","has_eula":false,"id":"5c40e594-d40d-452a-991e-5082225155e1","name":"google/gemma-3-27b-it:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":80000,"quantization_bits":8},{"allowed":true,"max_context_size":40000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":54904369444,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-05-09T16:45:10.128397Z"},{"created_at":"2025-04-28T18:48:01.860457Z","description":"","has_eula":false,"id":"a19296a6-4cef-447a-99bc-8f6c3ee30df4","name":"TestAccCustomModel_Basic","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725346,"status":"ready","tags":["custom"],"updated_at":null},{"created_at":"2025-04-30T13:29:24.004776Z","description":"","has_eula":false,"id":"eabb7f74-24a1-4173-911b-26924c1be619","name":"TestAccCustomModel_DeployModelOnServer","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725346,"status":"ready","tags":["custom"],"updated_at":null},{"created_at":"2025-03-27T16:48:11.513249Z","description":"Efficient 8B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"a51ce791-9546-4c28-aa44-24850d84778b","name":"deepseek/deepseek-r1-distill-llama-8b:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":90000,"quantization_bits":8},{"allowed":true,"max_context_size":39000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":16070465043,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:49.797687Z"},{"created_at":"2025-03-27T16:48:14.190404Z","description":"Efficient 8B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"b8dc7f2d-95d6-48ae-a076-a99e76b76e1f","name":"deepseek/deepseek-r1-distill-llama-8b:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":90000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9093169346,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-04-14T09:05:26.354374Z"},{"created_at":"2025-04-04T15:51:25.414165Z","description":"Highly efficient multimodal model with vision and chat capabilities supporting up to 128k context window.","has_eula":false,"id":"efcf0b60-999a-4c1e-981e-b68a428c4702","name":"mistral/mistral-small-3.1-24b-instruct-2503:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":75000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":96077777613,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-05-09T13:51:56.986698Z"},{"created_at":"2025-04-04T15:51:27.773573Z","description":"Highly efficient multimodal model with vision and chat capabilities supporting up to 128k context window.","has_eula":false,"id":"906c0feb-0eb0-4037-94aa-afd4d845b94f","name":"mistral/mistral-small-3.1-24b-instruct-2503:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":96077777613,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-04-08T14:26:24.388332Z"},{"created_at":"2025-03-27T16:47:41.108667Z","description":"Efficient 70B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"014919c1-00cc-43c2-98f2-4ffd263e6f33","name":"deepseek/deepseek-r1-distill-llama-70b:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":56960,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":141117442445,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:48.796286Z"},{"created_at":"2025-03-27T16:47:42.762505Z","description":"Efficient 70B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"bbfeeb62-2428-415d-ad0d-537af9aff946","name":"deepseek/deepseek-r1-distill-llama-70b:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72679175005,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-06T15:17:35.683881Z"},{"created_at":"2025-03-27T16:48:40.045689Z","description":"Efficient 8B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","name":"meta/llama-3.1-8b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":93000,"quantization_bits":8},{"allowed":true,"max_context_size":40000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":32132582323,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:53.288962Z"},{"created_at":"2025-03-27T16:50:12.267422Z","description":"Highly advanced coding model with a 128k context window, excelling in code generation, repairing, and reasoning.","has_eula":false,"id":"a3205fd3-ac4a-47cf-9074-82166d214bac","name":"qwen/qwen2.5-coder-32b-instruct:int8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":35080374444,"status":"ready","tags":["instruct","chat","code","featured"],"updated_at":"2025-05-09T13:52:04.105122Z"},{"created_at":"2025-03-27T16:49:51.968791Z","description":"A large language model customized by NVIDIA in order to improve the helpfulness of generated responses.","has_eula":true,"id":"4e6c9cea-57a1-4215-8a11-24ab51b9d1c8","name":"nvidia/llama-3.1-nemotron-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72679219797,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:01.331740Z"},{"created_at":"2025-05-13T12:13:50.994Z","description":"Best-in-class vision language model by research lab Allen Institute for AI. Available under the Apache 2.0 license.","has_eula":false,"id":"864e7786-4b86-4f4b-8534-25da1fc46a74","name":"allenai/molmo-72b-0924:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":45000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":293245208984,"status":"ready","tags":["instruct","chat","vision"],"updated_at":"2025-05-13T13:34:01.318606Z"},{"created_at":"2025-03-27T16:49:37.342054Z","description":"Efficient 8B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"775cbef7-6527-415d-9e6b-39d574cf39ec","name":"meta/llama-3.1-8b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":93000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9090504772,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:00.700210Z"},{"created_at":"2025-03-27T16:48:15.818596Z","description":"First generation of 8B-param model by Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"bc10c88e-4d18-4854-8250-77aff4763eca","name":"meta/llama-3-8b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":32132572668,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:51.995701Z"},{"created_at":"2025-03-27T16:49:33.359621Z","description":"First generation of 8B-param model by Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"b5a94646-9390-4ced-acba-9b078e63a794","name":"meta/llama-3-8b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9090489355,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:59.473065Z"},{"created_at":"2025-03-27T16:48:42.138410Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"126ad0c4-cfde-4b05-924f-f04c6343ccb2","name":"meta/llama-3.3-70b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":60000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":282254830887,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:53.868968Z"},{"created_at":"2025-03-27T16:50:09.605796Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"1678195b-5af6-4c27-8fdc-16aa84c68c34","name":"meta/llama-3.3-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72687332869,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-07T10:19:23.153808Z"},{"created_at":"2025-03-27T16:48:35.312110Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"7cbe0417-172a-4601-8940-3b71e4d0c8cb","name":"meta/llama-3.1-70b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":60000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":282246710880,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:52.677798Z"},{"created_at":"2025-03-27T16:49:35.836269Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"03150ad5-de83-4c74-afe0-3eeeb67d71a3","name":"meta/llama-3.1-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72665889083,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:52:00.003235Z"},{"created_at":"2025-03-27T16:49:31.715567Z","description":"First generation of 70B-param model from Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"b0c5a8fe-5c9e-49cc-942a-6c4ebaadde67","name":"meta/llama-3-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72665872089,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:58.899458Z"},{"created_at":"2025-03-27T16:49:17.458153Z","description":"A state-of-the-art 24B model with a 32k context window, designed for multilingual chat and agentic applications.","has_eula":false,"id":"1e555754-47fb-4dba-a82c-66f3f1fa9294","name":"mistral/mistral-small-24b-instruct-2501:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":94321843451,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:55.176379Z"},{"created_at":"2025-03-27T16:50:07.300436Z","description":"A state-of-the-art 24B model with a 32k context window, designed for multilingual chat and agentic applications.","has_eula":false,"id":"7bb28f2c-3719-4d71-9bcb-17db392a7118","name":"mistral/mistral-small-24b-instruct-2501:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":20000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":24938988520,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:55.726891Z"},{"created_at":"2025-04-15T10:51:31.291792Z","description":"Vision language model able to analyze images and offer insights without compromising on instruction following.","has_eula":false,"id":"1999f4f5-f038-4039-94ba-11a851917df5","name":"mistral/pixtral-12b-2409:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":50000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":25384844091,"status":"ready","tags":["vision","chat","featured"],"updated_at":"2025-05-09T13:51:58.281971Z"},{"created_at":"2025-03-27T16:49:14.593008Z","description":"A very efficient language model by Mistral AI, optimized for instruction-following tasks. Available under the Apache 2.0 license.","has_eula":false,"id":"bf6be106-c53d-4b93-bb33-1a4bd4d0b573","name":"mistral/mistral-7b-instruct-v0.3:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":28995471292,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:54.595513Z"},{"created_at":"2025-03-27T16:50:06.301430Z","description":"A state-of-the-art 12B model with a 128k context window, designed for multilingual chat applications.","has_eula":false,"id":"07681325-c743-4796-8b7d-1f0b35d4a8e0","name":"mistral/mistral-nemo-instruct-2407:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":13605604415,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-06T15:17:43.837103Z"},{"created_at":"2025-03-27T16:50:08.291821Z","description":"A high-quality Mixture of Experts (MoE) model with open weights by Mistral AI, licensed under Apache 2.0.","has_eula":false,"id":"1aa87d1e-9996-4c54-aa1c-5b900bf59fd4","name":"mistral/mixtral-8x7b-instruct-v0.1:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":46970879717,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:02.960404Z"},{"created_at":"2025-03-27T16:49:19.120192Z","description":"A high-quality Mixture of Experts (MoE) model with open weights by Mistral AI, licensed under Apache 2.0.","has_eula":false,"id":"11ed6599-f460-4e41-b266-87bc9a108fdd","name":"mistral/mixtral-8x7b-instruct-v0.1:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":190483875108,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:57.661626Z"},{"created_at":"2025-03-27T16:46:54.314987Z","description":"An embedding model spanning a broad range of languages and state-of-the-art results on multilingual benchmarks.","has_eula":true,"id":"d58efec4-b667-48e2-8ad8-bcc26c175ae6","name":"baai/bge-multilingual-gemma2:fp32","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]}]}],"parameter_size_bits":32,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":36989461520,"status":"ready","tags":["embedding","featured"],"updated_at":"2025-03-27T17:40:09.534954Z"}],"total_count":29}' + headers: + Content-Length: + - "50297" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Thu, 15 May 2025 09:06:13 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge01) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - 46184f73-2e15-4960-9ae8-e468a421009f + status: 200 OK + code: 200 + duration: 210.549916ms + - id: 27 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models/7205dbce-cc80-4b2a-bb7f-3fd3a804afc3 + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 1723 + uncompressed: false + body: '{"created_at":"2025-03-27T16:48:40.045689Z","description":"Efficient 8B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","name":"meta/llama-3.1-8b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":93000,"quantization_bits":8},{"allowed":true,"max_context_size":40000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":32132582323,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:53.288962Z"}' + headers: + Content-Length: + - "1723" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Thu, 15 May 2025 09:06:13 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge01) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - 229d2c3c-ffa1-400a-a75f-b702f2f80c21 + status: 200 OK + code: 200 + duration: 35.025125ms + - id: 28 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/deployments/4ef1f64f-1d16-472c-a5d9-a40a55424e4e + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 666 + uncompressed: false + body: '{"created_at":"2025-05-15T08:52:50.831593Z","endpoints":[{"disable_auth":false,"id":"456167a1-2cda-4080-b529-16b7b9bb6e36","public_network":{},"url":"https://4ef1f64f-1d16-472c-a5d9-a40a55424e4e.ifr.fr-par.scaleway.com"}],"error_message":"","id":"4ef1f64f-1d16-472c-a5d9-a40a55424e4e","max_size":1,"min_size":1,"model_id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","model_name":"meta/llama-3.1-8b-instruct:bf16","name":"test-inference-deployment-basic","node_type_name":"L4","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","quantization":{"bits":16},"region":"fr-par","size":1,"status":"ready","tags":[],"updated_at":"2025-05-15T09:05:15.085852Z"}' + headers: + Content-Length: + - "666" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Thu, 15 May 2025 09:06:14 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge01) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - 1678bd93-4ba7-4835-b5cc-6bcf8c34986a + status: 200 OK + code: 200 + duration: 398.651667ms + - id: 29 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/deployments/4ef1f64f-1d16-472c-a5d9-a40a55424e4e + method: DELETE + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 669 + uncompressed: false + body: '{"created_at":"2025-05-15T08:52:50.831593Z","endpoints":[{"disable_auth":false,"id":"456167a1-2cda-4080-b529-16b7b9bb6e36","public_network":{},"url":"https://4ef1f64f-1d16-472c-a5d9-a40a55424e4e.ifr.fr-par.scaleway.com"}],"error_message":"","id":"4ef1f64f-1d16-472c-a5d9-a40a55424e4e","max_size":1,"min_size":1,"model_id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","model_name":"meta/llama-3.1-8b-instruct:bf16","name":"test-inference-deployment-basic","node_type_name":"L4","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","quantization":{"bits":16},"region":"fr-par","size":1,"status":"deleting","tags":[],"updated_at":"2025-05-15T09:05:15.085852Z"}' + headers: + Content-Length: + - "669" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Thu, 15 May 2025 09:06:15 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge01) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - d8d59438-49e7-4f5c-81ce-69c3d3b695d4 + status: 200 OK + code: 200 + duration: 357.046542ms + - id: 30 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/deployments/4ef1f64f-1d16-472c-a5d9-a40a55424e4e + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 669 + uncompressed: false + body: '{"created_at":"2025-05-15T08:52:50.831593Z","endpoints":[{"disable_auth":false,"id":"456167a1-2cda-4080-b529-16b7b9bb6e36","public_network":{},"url":"https://4ef1f64f-1d16-472c-a5d9-a40a55424e4e.ifr.fr-par.scaleway.com"}],"error_message":"","id":"4ef1f64f-1d16-472c-a5d9-a40a55424e4e","max_size":1,"min_size":1,"model_id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","model_name":"meta/llama-3.1-8b-instruct:bf16","name":"test-inference-deployment-basic","node_type_name":"L4","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","quantization":{"bits":16},"region":"fr-par","size":1,"status":"deleting","tags":[],"updated_at":"2025-05-15T09:05:15.085852Z"}' + headers: + Content-Length: + - "669" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Thu, 15 May 2025 09:06:15 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge01) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - d2165c78-86a1-443a-8d8a-d20f0c36094a + status: 200 OK + code: 200 + duration: 57.455333ms + - id: 31 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/deployments/4ef1f64f-1d16-472c-a5d9-a40a55424e4e method: GET response: proto: HTTP/2.0 @@ -1254,7 +1548,7 @@ interactions: trailer: {} content_length: 131 uncompressed: false - body: '{"message":"resource is not found","resource":"deployment","resource_id":"20043a05-db6e-4ef5-bec6-40089e6d13d0","type":"not_found"}' + body: '{"message":"resource is not found","resource":"deployment","resource_id":"4ef1f64f-1d16-472c-a5d9-a40a55424e4e","type":"not_found"}' headers: Content-Length: - "131" @@ -1263,9 +1557,9 @@ interactions: Content-Type: - application/json Date: - - Thu, 24 Oct 2024 13:47:06 GMT + - Thu, 15 May 2025 09:07:15 GMT Server: - - Scaleway API Gateway (fr-par-2;edge02) + - Scaleway API Gateway (fr-par-1;edge03) Strict-Transport-Security: - max-age=63072000 X-Content-Type-Options: @@ -1273,11 +1567,11 @@ interactions: X-Frame-Options: - DENY X-Request-Id: - - 6aa1a3bf-6696-4159-9d71-1521b634517f + - ae7e3f1b-e162-47ea-94c2-4cf0fe01ca1b status: 404 Not Found code: 404 - duration: 56.452333ms - - id: 26 + duration: 60.293333ms + - id: 32 request: proto: HTTP/1.1 proto_major: 1 @@ -1292,8 +1586,8 @@ interactions: form: {} headers: User-Agent: - - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.23.0; darwin; arm64) terraform-provider/develop terraform/terraform-tests - url: https://api.scaleway.com/inference/v1beta1/regions/fr-par/deployments/20043a05-db6e-4ef5-bec6-40089e6d13d0 + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/deployments/4ef1f64f-1d16-472c-a5d9-a40a55424e4e method: GET response: proto: HTTP/2.0 @@ -1303,7 +1597,7 @@ interactions: trailer: {} content_length: 131 uncompressed: false - body: '{"message":"resource is not found","resource":"deployment","resource_id":"20043a05-db6e-4ef5-bec6-40089e6d13d0","type":"not_found"}' + body: '{"message":"resource is not found","resource":"deployment","resource_id":"4ef1f64f-1d16-472c-a5d9-a40a55424e4e","type":"not_found"}' headers: Content-Length: - "131" @@ -1312,9 +1606,9 @@ interactions: Content-Type: - application/json Date: - - Thu, 24 Oct 2024 13:47:06 GMT + - Thu, 15 May 2025 09:07:15 GMT Server: - - Scaleway API Gateway (fr-par-2;edge02) + - Scaleway API Gateway (fr-par-1;edge03) Strict-Transport-Security: - max-age=63072000 X-Content-Type-Options: @@ -1322,7 +1616,7 @@ interactions: X-Frame-Options: - DENY X-Request-Id: - - cdd798bc-67a2-4dca-9f2d-630ee27d2c97 + - 6fbcb2a4-6356-4b53-bb79-ed222d861500 status: 404 Not Found code: 404 - duration: 38.865166ms + duration: 37.401375ms diff --git a/internal/services/inference/testdata/deployment-endpoint.cassette.yaml b/internal/services/inference/testdata/deployment-endpoint.cassette.yaml index 268937ee3f..17f8002dd5 100644 --- a/internal/services/inference/testdata/deployment-endpoint.cassette.yaml +++ b/internal/services/inference/testdata/deployment-endpoint.cassette.yaml @@ -6,41 +6,39 @@ interactions: proto: HTTP/1.1 proto_major: 1 proto_minor: 1 - content_length: 118 + content_length: 0 transfer_encoding: [] trailer: {} host: api.scaleway.com remote_addr: "" request_uri: "" - body: '{"name":"private-network-test-inference","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","tags":[],"subnets":null}' + body: "" form: {} headers: - Content-Type: - - application/json User-Agent: - - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.23.0; darwin; arm64) terraform-provider/develop terraform/terraform-tests - url: https://api.scaleway.com/vpc/v2/regions/fr-par/private-networks - method: POST + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models?order_by=display_rank_asc&page_size=1000 + method: GET response: proto: HTTP/2.0 proto_major: 2 proto_minor: 0 transfer_encoding: [] trailer: {} - content_length: 1056 + content_length: 50297 uncompressed: false - body: '{"created_at":"2024-10-24T13:29:02.392287Z","dhcp_enabled":true,"id":"5213e862-3c32-4e23-8c5a-fb079958acea","name":"private-network-test-inference","organization_id":"105bdce1-64c0-48ab-899d-868455867ecf","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","region":"fr-par","subnets":[{"created_at":"2024-10-24T13:29:02.392287Z","id":"55561a48-f44e-404e-89f9-7d31f442d655","private_network_id":"5213e862-3c32-4e23-8c5a-fb079958acea","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","subnet":"172.16.80.0/22","updated_at":"2024-10-24T13:29:02.392287Z","vpc_id":"8feba4f5-79f9-42cd-b5ce-3ed8c510569e"},{"created_at":"2024-10-24T13:29:02.392287Z","id":"9dda5654-4cf2-44d5-9a94-59075b96ea81","private_network_id":"5213e862-3c32-4e23-8c5a-fb079958acea","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","subnet":"fd5f:519c:6d46:9f1::/64","updated_at":"2024-10-24T13:29:02.392287Z","vpc_id":"8feba4f5-79f9-42cd-b5ce-3ed8c510569e"}],"tags":[],"updated_at":"2024-10-24T13:29:02.392287Z","vpc_id":"8feba4f5-79f9-42cd-b5ce-3ed8c510569e"}' + body: '{"models":[{"created_at":"2025-04-04T13:11:00.900800Z","description":"Multimodal model for text generation an image understanding supporting up to 128k context window.","has_eula":false,"id":"5c40e594-d40d-452a-991e-5082225155e1","name":"google/gemma-3-27b-it:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":80000,"quantization_bits":8},{"allowed":true,"max_context_size":40000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":54904369444,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-05-09T16:45:10.128397Z"},{"created_at":"2025-04-28T18:48:01.860457Z","description":"","has_eula":false,"id":"a19296a6-4cef-447a-99bc-8f6c3ee30df4","name":"TestAccCustomModel_Basic","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725346,"status":"ready","tags":["custom"],"updated_at":null},{"created_at":"2025-04-30T13:29:24.004776Z","description":"","has_eula":false,"id":"eabb7f74-24a1-4173-911b-26924c1be619","name":"TestAccCustomModel_DeployModelOnServer","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725346,"status":"ready","tags":["custom"],"updated_at":null},{"created_at":"2025-03-27T16:48:11.513249Z","description":"Efficient 8B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"a51ce791-9546-4c28-aa44-24850d84778b","name":"deepseek/deepseek-r1-distill-llama-8b:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":90000,"quantization_bits":8},{"allowed":true,"max_context_size":39000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":16070465043,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:49.797687Z"},{"created_at":"2025-03-27T16:48:14.190404Z","description":"Efficient 8B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"b8dc7f2d-95d6-48ae-a076-a99e76b76e1f","name":"deepseek/deepseek-r1-distill-llama-8b:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":90000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9093169346,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-04-14T09:05:26.354374Z"},{"created_at":"2025-04-04T15:51:25.414165Z","description":"Highly efficient multimodal model with vision and chat capabilities supporting up to 128k context window.","has_eula":false,"id":"efcf0b60-999a-4c1e-981e-b68a428c4702","name":"mistral/mistral-small-3.1-24b-instruct-2503:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":75000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":96077777613,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-05-09T13:51:56.986698Z"},{"created_at":"2025-04-04T15:51:27.773573Z","description":"Highly efficient multimodal model with vision and chat capabilities supporting up to 128k context window.","has_eula":false,"id":"906c0feb-0eb0-4037-94aa-afd4d845b94f","name":"mistral/mistral-small-3.1-24b-instruct-2503:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":96077777613,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-04-08T14:26:24.388332Z"},{"created_at":"2025-03-27T16:47:41.108667Z","description":"Efficient 70B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"014919c1-00cc-43c2-98f2-4ffd263e6f33","name":"deepseek/deepseek-r1-distill-llama-70b:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":56960,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":141117442445,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:48.796286Z"},{"created_at":"2025-03-27T16:47:42.762505Z","description":"Efficient 70B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"bbfeeb62-2428-415d-ad0d-537af9aff946","name":"deepseek/deepseek-r1-distill-llama-70b:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72679175005,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-06T15:17:35.683881Z"},{"created_at":"2025-03-27T16:48:40.045689Z","description":"Efficient 8B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","name":"meta/llama-3.1-8b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":93000,"quantization_bits":8},{"allowed":true,"max_context_size":40000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":32132582323,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:53.288962Z"},{"created_at":"2025-03-27T16:50:12.267422Z","description":"Highly advanced coding model with a 128k context window, excelling in code generation, repairing, and reasoning.","has_eula":false,"id":"a3205fd3-ac4a-47cf-9074-82166d214bac","name":"qwen/qwen2.5-coder-32b-instruct:int8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":35080374444,"status":"ready","tags":["instruct","chat","code","featured"],"updated_at":"2025-05-09T13:52:04.105122Z"},{"created_at":"2025-03-27T16:49:51.968791Z","description":"A large language model customized by NVIDIA in order to improve the helpfulness of generated responses.","has_eula":true,"id":"4e6c9cea-57a1-4215-8a11-24ab51b9d1c8","name":"nvidia/llama-3.1-nemotron-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72679219797,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:01.331740Z"},{"created_at":"2025-05-13T12:13:50.994Z","description":"Best-in-class vision language model by research lab Allen Institute for AI. Available under the Apache 2.0 license.","has_eula":false,"id":"864e7786-4b86-4f4b-8534-25da1fc46a74","name":"allenai/molmo-72b-0924:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":45000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":293245208984,"status":"ready","tags":["instruct","chat","vision"],"updated_at":"2025-05-13T13:34:01.318606Z"},{"created_at":"2025-03-27T16:49:37.342054Z","description":"Efficient 8B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"775cbef7-6527-415d-9e6b-39d574cf39ec","name":"meta/llama-3.1-8b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":93000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9090504772,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:00.700210Z"},{"created_at":"2025-03-27T16:48:15.818596Z","description":"First generation of 8B-param model by Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"bc10c88e-4d18-4854-8250-77aff4763eca","name":"meta/llama-3-8b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":32132572668,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:51.995701Z"},{"created_at":"2025-03-27T16:49:33.359621Z","description":"First generation of 8B-param model by Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"b5a94646-9390-4ced-acba-9b078e63a794","name":"meta/llama-3-8b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9090489355,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:59.473065Z"},{"created_at":"2025-03-27T16:48:42.138410Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"126ad0c4-cfde-4b05-924f-f04c6343ccb2","name":"meta/llama-3.3-70b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":60000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":282254830887,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:53.868968Z"},{"created_at":"2025-03-27T16:50:09.605796Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"1678195b-5af6-4c27-8fdc-16aa84c68c34","name":"meta/llama-3.3-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72687332869,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-07T10:19:23.153808Z"},{"created_at":"2025-03-27T16:48:35.312110Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"7cbe0417-172a-4601-8940-3b71e4d0c8cb","name":"meta/llama-3.1-70b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":60000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":282246710880,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:52.677798Z"},{"created_at":"2025-03-27T16:49:35.836269Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"03150ad5-de83-4c74-afe0-3eeeb67d71a3","name":"meta/llama-3.1-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72665889083,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:52:00.003235Z"},{"created_at":"2025-03-27T16:49:31.715567Z","description":"First generation of 70B-param model from Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"b0c5a8fe-5c9e-49cc-942a-6c4ebaadde67","name":"meta/llama-3-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72665872089,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:58.899458Z"},{"created_at":"2025-03-27T16:49:17.458153Z","description":"A state-of-the-art 24B model with a 32k context window, designed for multilingual chat and agentic applications.","has_eula":false,"id":"1e555754-47fb-4dba-a82c-66f3f1fa9294","name":"mistral/mistral-small-24b-instruct-2501:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":94321843451,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:55.176379Z"},{"created_at":"2025-03-27T16:50:07.300436Z","description":"A state-of-the-art 24B model with a 32k context window, designed for multilingual chat and agentic applications.","has_eula":false,"id":"7bb28f2c-3719-4d71-9bcb-17db392a7118","name":"mistral/mistral-small-24b-instruct-2501:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":20000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":24938988520,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:55.726891Z"},{"created_at":"2025-04-15T10:51:31.291792Z","description":"Vision language model able to analyze images and offer insights without compromising on instruction following.","has_eula":false,"id":"1999f4f5-f038-4039-94ba-11a851917df5","name":"mistral/pixtral-12b-2409:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":50000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":25384844091,"status":"ready","tags":["vision","chat","featured"],"updated_at":"2025-05-09T13:51:58.281971Z"},{"created_at":"2025-03-27T16:49:14.593008Z","description":"A very efficient language model by Mistral AI, optimized for instruction-following tasks. Available under the Apache 2.0 license.","has_eula":false,"id":"bf6be106-c53d-4b93-bb33-1a4bd4d0b573","name":"mistral/mistral-7b-instruct-v0.3:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":28995471292,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:54.595513Z"},{"created_at":"2025-03-27T16:50:06.301430Z","description":"A state-of-the-art 12B model with a 128k context window, designed for multilingual chat applications.","has_eula":false,"id":"07681325-c743-4796-8b7d-1f0b35d4a8e0","name":"mistral/mistral-nemo-instruct-2407:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":13605604415,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-06T15:17:43.837103Z"},{"created_at":"2025-03-27T16:50:08.291821Z","description":"A high-quality Mixture of Experts (MoE) model with open weights by Mistral AI, licensed under Apache 2.0.","has_eula":false,"id":"1aa87d1e-9996-4c54-aa1c-5b900bf59fd4","name":"mistral/mixtral-8x7b-instruct-v0.1:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":46970879717,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:02.960404Z"},{"created_at":"2025-03-27T16:49:19.120192Z","description":"A high-quality Mixture of Experts (MoE) model with open weights by Mistral AI, licensed under Apache 2.0.","has_eula":false,"id":"11ed6599-f460-4e41-b266-87bc9a108fdd","name":"mistral/mixtral-8x7b-instruct-v0.1:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":190483875108,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:57.661626Z"},{"created_at":"2025-03-27T16:46:54.314987Z","description":"An embedding model spanning a broad range of languages and state-of-the-art results on multilingual benchmarks.","has_eula":true,"id":"d58efec4-b667-48e2-8ad8-bcc26c175ae6","name":"baai/bge-multilingual-gemma2:fp32","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]}]}],"parameter_size_bits":32,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":36989461520,"status":"ready","tags":["embedding","featured"],"updated_at":"2025-03-27T17:40:09.534954Z"}],"total_count":29}' headers: Content-Length: - - "1056" + - "50297" Content-Security-Policy: - default-src 'none'; frame-ancestors 'none' Content-Type: - application/json Date: - - Thu, 24 Oct 2024 13:29:02 GMT + - Thu, 15 May 2025 09:36:48 GMT Server: - - Scaleway API Gateway (fr-par-2;edge01) + - Scaleway API Gateway (fr-par-1;edge03) Strict-Transport-Security: - max-age=63072000 X-Content-Type-Options: @@ -48,10 +46,10 @@ interactions: X-Frame-Options: - DENY X-Request-Id: - - d8085215-703c-40f0-9ebe-b34d6ff21ef3 + - 7913357e-5f2f-4d5c-8365-7b9a1e972f29 status: 200 OK code: 200 - duration: 675.246667ms + duration: 252.033333ms - id: 1 request: proto: HTTP/1.1 @@ -67,8 +65,8 @@ interactions: form: {} headers: User-Agent: - - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.23.0; darwin; arm64) terraform-provider/develop terraform/terraform-tests - url: https://api.scaleway.com/vpc/v2/regions/fr-par/private-networks/5213e862-3c32-4e23-8c5a-fb079958acea + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models/7205dbce-cc80-4b2a-bb7f-3fd3a804afc3 method: GET response: proto: HTTP/2.0 @@ -76,20 +74,20 @@ interactions: proto_minor: 0 transfer_encoding: [] trailer: {} - content_length: 1056 + content_length: 1723 uncompressed: false - body: '{"created_at":"2024-10-24T13:29:02.392287Z","dhcp_enabled":true,"id":"5213e862-3c32-4e23-8c5a-fb079958acea","name":"private-network-test-inference","organization_id":"105bdce1-64c0-48ab-899d-868455867ecf","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","region":"fr-par","subnets":[{"created_at":"2024-10-24T13:29:02.392287Z","id":"55561a48-f44e-404e-89f9-7d31f442d655","private_network_id":"5213e862-3c32-4e23-8c5a-fb079958acea","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","subnet":"172.16.80.0/22","updated_at":"2024-10-24T13:29:02.392287Z","vpc_id":"8feba4f5-79f9-42cd-b5ce-3ed8c510569e"},{"created_at":"2024-10-24T13:29:02.392287Z","id":"9dda5654-4cf2-44d5-9a94-59075b96ea81","private_network_id":"5213e862-3c32-4e23-8c5a-fb079958acea","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","subnet":"fd5f:519c:6d46:9f1::/64","updated_at":"2024-10-24T13:29:02.392287Z","vpc_id":"8feba4f5-79f9-42cd-b5ce-3ed8c510569e"}],"tags":[],"updated_at":"2024-10-24T13:29:02.392287Z","vpc_id":"8feba4f5-79f9-42cd-b5ce-3ed8c510569e"}' + body: '{"created_at":"2025-03-27T16:48:40.045689Z","description":"Efficient 8B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","name":"meta/llama-3.1-8b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":93000,"quantization_bits":8},{"allowed":true,"max_context_size":40000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":32132582323,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:53.288962Z"}' headers: Content-Length: - - "1056" + - "1723" Content-Security-Policy: - default-src 'none'; frame-ancestors 'none' Content-Type: - application/json Date: - - Thu, 24 Oct 2024 13:29:02 GMT + - Thu, 15 May 2025 09:36:48 GMT Server: - - Scaleway API Gateway (fr-par-2;edge01) + - Scaleway API Gateway (fr-par-1;edge03) Strict-Transport-Security: - max-age=63072000 X-Content-Type-Options: @@ -97,50 +95,48 @@ interactions: X-Frame-Options: - DENY X-Request-Id: - - d90fd1bd-801f-4d08-bfc5-b9d186c597b2 + - 3d41f6d5-cf3c-4f9c-a7c7-308268358524 status: 200 OK code: 200 - duration: 38.378416ms + duration: 39.664417ms - id: 2 request: proto: HTTP/1.1 proto_major: 1 proto_minor: 1 - content_length: 314 + content_length: 0 transfer_encoding: [] trailer: {} host: api.scaleway.com remote_addr: "" request_uri: "" - body: '{"name":"test-inference-deployment-endpoint-private","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","model_name":"meta/llama-3.1-8b-instruct:fp8","accept_eula":true,"node_type":"L4","tags":[],"endpoints":[{"private_network":{"private_network_id":"5213e862-3c32-4e23-8c5a-fb079958acea"},"disable_auth":false}]}' + body: "" form: {} headers: - Content-Type: - - application/json User-Agent: - - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.23.0; darwin; arm64) terraform-provider/develop terraform/terraform-tests - url: https://api.scaleway.com/inference/v1beta1/regions/fr-par/deployments - method: POST + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models?order_by=display_rank_asc&page_size=1000 + method: GET response: proto: HTTP/2.0 proto_major: 2 proto_minor: 0 transfer_encoding: [] trailer: {} - content_length: 690 + content_length: 50297 uncompressed: false - body: '{"created_at":"2024-10-24T13:29:03.040316Z","endpoints":[{"disable_auth":false,"id":"02527765-8d08-427e-8c8d-db76fe661126","private_network":{"private_network_id":"5213e862-3c32-4e23-8c5a-fb079958acea"},"url":"https://7aac488b-b5a5-47cf-894a-75bf1759e247.private-network-test-inference"}],"error_message":null,"id":"7aac488b-b5a5-47cf-894a-75bf1759e247","max_size":1,"min_size":1,"model_id":"d33fb5fd-75ca-4dfb-8952-8af8b8b28be5","model_name":"meta/llama-3.1-8b-instruct:fp8","name":"test-inference-deployment-endpoint-private","node_type":"L4","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","region":"fr-par","size":0,"status":"creating","tags":[],"updated_at":null}' + body: '{"models":[{"created_at":"2025-04-04T13:11:00.900800Z","description":"Multimodal model for text generation an image understanding supporting up to 128k context window.","has_eula":false,"id":"5c40e594-d40d-452a-991e-5082225155e1","name":"google/gemma-3-27b-it:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":80000,"quantization_bits":8},{"allowed":true,"max_context_size":40000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":54904369444,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-05-09T16:45:10.128397Z"},{"created_at":"2025-04-28T18:48:01.860457Z","description":"","has_eula":false,"id":"a19296a6-4cef-447a-99bc-8f6c3ee30df4","name":"TestAccCustomModel_Basic","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725346,"status":"ready","tags":["custom"],"updated_at":null},{"created_at":"2025-04-30T13:29:24.004776Z","description":"","has_eula":false,"id":"eabb7f74-24a1-4173-911b-26924c1be619","name":"TestAccCustomModel_DeployModelOnServer","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725346,"status":"ready","tags":["custom"],"updated_at":null},{"created_at":"2025-03-27T16:48:11.513249Z","description":"Efficient 8B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"a51ce791-9546-4c28-aa44-24850d84778b","name":"deepseek/deepseek-r1-distill-llama-8b:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":90000,"quantization_bits":8},{"allowed":true,"max_context_size":39000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":16070465043,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:49.797687Z"},{"created_at":"2025-03-27T16:48:14.190404Z","description":"Efficient 8B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"b8dc7f2d-95d6-48ae-a076-a99e76b76e1f","name":"deepseek/deepseek-r1-distill-llama-8b:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":90000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9093169346,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-04-14T09:05:26.354374Z"},{"created_at":"2025-04-04T15:51:25.414165Z","description":"Highly efficient multimodal model with vision and chat capabilities supporting up to 128k context window.","has_eula":false,"id":"efcf0b60-999a-4c1e-981e-b68a428c4702","name":"mistral/mistral-small-3.1-24b-instruct-2503:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":75000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":96077777613,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-05-09T13:51:56.986698Z"},{"created_at":"2025-04-04T15:51:27.773573Z","description":"Highly efficient multimodal model with vision and chat capabilities supporting up to 128k context window.","has_eula":false,"id":"906c0feb-0eb0-4037-94aa-afd4d845b94f","name":"mistral/mistral-small-3.1-24b-instruct-2503:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":96077777613,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-04-08T14:26:24.388332Z"},{"created_at":"2025-03-27T16:47:41.108667Z","description":"Efficient 70B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"014919c1-00cc-43c2-98f2-4ffd263e6f33","name":"deepseek/deepseek-r1-distill-llama-70b:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":56960,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":141117442445,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:48.796286Z"},{"created_at":"2025-03-27T16:47:42.762505Z","description":"Efficient 70B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"bbfeeb62-2428-415d-ad0d-537af9aff946","name":"deepseek/deepseek-r1-distill-llama-70b:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72679175005,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-06T15:17:35.683881Z"},{"created_at":"2025-03-27T16:48:40.045689Z","description":"Efficient 8B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","name":"meta/llama-3.1-8b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":93000,"quantization_bits":8},{"allowed":true,"max_context_size":40000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":32132582323,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:53.288962Z"},{"created_at":"2025-03-27T16:50:12.267422Z","description":"Highly advanced coding model with a 128k context window, excelling in code generation, repairing, and reasoning.","has_eula":false,"id":"a3205fd3-ac4a-47cf-9074-82166d214bac","name":"qwen/qwen2.5-coder-32b-instruct:int8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":35080374444,"status":"ready","tags":["instruct","chat","code","featured"],"updated_at":"2025-05-09T13:52:04.105122Z"},{"created_at":"2025-03-27T16:49:51.968791Z","description":"A large language model customized by NVIDIA in order to improve the helpfulness of generated responses.","has_eula":true,"id":"4e6c9cea-57a1-4215-8a11-24ab51b9d1c8","name":"nvidia/llama-3.1-nemotron-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72679219797,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:01.331740Z"},{"created_at":"2025-05-13T12:13:50.994Z","description":"Best-in-class vision language model by research lab Allen Institute for AI. Available under the Apache 2.0 license.","has_eula":false,"id":"864e7786-4b86-4f4b-8534-25da1fc46a74","name":"allenai/molmo-72b-0924:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":45000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":293245208984,"status":"ready","tags":["instruct","chat","vision"],"updated_at":"2025-05-13T13:34:01.318606Z"},{"created_at":"2025-03-27T16:49:37.342054Z","description":"Efficient 8B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"775cbef7-6527-415d-9e6b-39d574cf39ec","name":"meta/llama-3.1-8b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":93000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9090504772,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:00.700210Z"},{"created_at":"2025-03-27T16:48:15.818596Z","description":"First generation of 8B-param model by Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"bc10c88e-4d18-4854-8250-77aff4763eca","name":"meta/llama-3-8b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":32132572668,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:51.995701Z"},{"created_at":"2025-03-27T16:49:33.359621Z","description":"First generation of 8B-param model by Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"b5a94646-9390-4ced-acba-9b078e63a794","name":"meta/llama-3-8b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9090489355,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:59.473065Z"},{"created_at":"2025-03-27T16:48:42.138410Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"126ad0c4-cfde-4b05-924f-f04c6343ccb2","name":"meta/llama-3.3-70b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":60000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":282254830887,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:53.868968Z"},{"created_at":"2025-03-27T16:50:09.605796Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"1678195b-5af6-4c27-8fdc-16aa84c68c34","name":"meta/llama-3.3-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72687332869,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-07T10:19:23.153808Z"},{"created_at":"2025-03-27T16:48:35.312110Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"7cbe0417-172a-4601-8940-3b71e4d0c8cb","name":"meta/llama-3.1-70b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":60000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":282246710880,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:52.677798Z"},{"created_at":"2025-03-27T16:49:35.836269Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"03150ad5-de83-4c74-afe0-3eeeb67d71a3","name":"meta/llama-3.1-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72665889083,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:52:00.003235Z"},{"created_at":"2025-03-27T16:49:31.715567Z","description":"First generation of 70B-param model from Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"b0c5a8fe-5c9e-49cc-942a-6c4ebaadde67","name":"meta/llama-3-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72665872089,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:58.899458Z"},{"created_at":"2025-03-27T16:49:17.458153Z","description":"A state-of-the-art 24B model with a 32k context window, designed for multilingual chat and agentic applications.","has_eula":false,"id":"1e555754-47fb-4dba-a82c-66f3f1fa9294","name":"mistral/mistral-small-24b-instruct-2501:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":94321843451,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:55.176379Z"},{"created_at":"2025-03-27T16:50:07.300436Z","description":"A state-of-the-art 24B model with a 32k context window, designed for multilingual chat and agentic applications.","has_eula":false,"id":"7bb28f2c-3719-4d71-9bcb-17db392a7118","name":"mistral/mistral-small-24b-instruct-2501:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":20000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":24938988520,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:55.726891Z"},{"created_at":"2025-04-15T10:51:31.291792Z","description":"Vision language model able to analyze images and offer insights without compromising on instruction following.","has_eula":false,"id":"1999f4f5-f038-4039-94ba-11a851917df5","name":"mistral/pixtral-12b-2409:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":50000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":25384844091,"status":"ready","tags":["vision","chat","featured"],"updated_at":"2025-05-09T13:51:58.281971Z"},{"created_at":"2025-03-27T16:49:14.593008Z","description":"A very efficient language model by Mistral AI, optimized for instruction-following tasks. Available under the Apache 2.0 license.","has_eula":false,"id":"bf6be106-c53d-4b93-bb33-1a4bd4d0b573","name":"mistral/mistral-7b-instruct-v0.3:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":28995471292,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:54.595513Z"},{"created_at":"2025-03-27T16:50:06.301430Z","description":"A state-of-the-art 12B model with a 128k context window, designed for multilingual chat applications.","has_eula":false,"id":"07681325-c743-4796-8b7d-1f0b35d4a8e0","name":"mistral/mistral-nemo-instruct-2407:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":13605604415,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-06T15:17:43.837103Z"},{"created_at":"2025-03-27T16:50:08.291821Z","description":"A high-quality Mixture of Experts (MoE) model with open weights by Mistral AI, licensed under Apache 2.0.","has_eula":false,"id":"1aa87d1e-9996-4c54-aa1c-5b900bf59fd4","name":"mistral/mixtral-8x7b-instruct-v0.1:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":46970879717,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:02.960404Z"},{"created_at":"2025-03-27T16:49:19.120192Z","description":"A high-quality Mixture of Experts (MoE) model with open weights by Mistral AI, licensed under Apache 2.0.","has_eula":false,"id":"11ed6599-f460-4e41-b266-87bc9a108fdd","name":"mistral/mixtral-8x7b-instruct-v0.1:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":190483875108,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:57.661626Z"},{"created_at":"2025-03-27T16:46:54.314987Z","description":"An embedding model spanning a broad range of languages and state-of-the-art results on multilingual benchmarks.","has_eula":true,"id":"d58efec4-b667-48e2-8ad8-bcc26c175ae6","name":"baai/bge-multilingual-gemma2:fp32","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]}]}],"parameter_size_bits":32,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":36989461520,"status":"ready","tags":["embedding","featured"],"updated_at":"2025-03-27T17:40:09.534954Z"}],"total_count":29}' headers: Content-Length: - - "690" + - "50297" Content-Security-Policy: - default-src 'none'; frame-ancestors 'none' Content-Type: - application/json Date: - - Thu, 24 Oct 2024 13:29:03 GMT + - Thu, 15 May 2025 09:36:48 GMT Server: - - Scaleway API Gateway (fr-par-2;edge01) + - Scaleway API Gateway (fr-par-1;edge03) Strict-Transport-Security: - max-age=63072000 X-Content-Type-Options: @@ -148,10 +144,10 @@ interactions: X-Frame-Options: - DENY X-Request-Id: - - 74261352-8793-4687-ad52-0c16f27f7483 + - 4870582c-8d51-462a-976f-c2c6541e8a8f status: 200 OK code: 200 - duration: 394.979208ms + duration: 229.922375ms - id: 3 request: proto: HTTP/1.1 @@ -167,8 +163,8 @@ interactions: form: {} headers: User-Agent: - - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.23.0; darwin; arm64) terraform-provider/develop terraform/terraform-tests - url: https://api.scaleway.com/inference/v1beta1/regions/fr-par/deployments/7aac488b-b5a5-47cf-894a-75bf1759e247 + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models/7205dbce-cc80-4b2a-bb7f-3fd3a804afc3 method: GET response: proto: HTTP/2.0 @@ -176,20 +172,20 @@ interactions: proto_minor: 0 transfer_encoding: [] trailer: {} - content_length: 690 + content_length: 1723 uncompressed: false - body: '{"created_at":"2024-10-24T13:29:03.040316Z","endpoints":[{"disable_auth":false,"id":"02527765-8d08-427e-8c8d-db76fe661126","private_network":{"private_network_id":"5213e862-3c32-4e23-8c5a-fb079958acea"},"url":"https://7aac488b-b5a5-47cf-894a-75bf1759e247.private-network-test-inference"}],"error_message":null,"id":"7aac488b-b5a5-47cf-894a-75bf1759e247","max_size":1,"min_size":1,"model_id":"d33fb5fd-75ca-4dfb-8952-8af8b8b28be5","model_name":"meta/llama-3.1-8b-instruct:fp8","name":"test-inference-deployment-endpoint-private","node_type":"L4","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","region":"fr-par","size":0,"status":"creating","tags":[],"updated_at":null}' + body: '{"created_at":"2025-03-27T16:48:40.045689Z","description":"Efficient 8B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","name":"meta/llama-3.1-8b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":93000,"quantization_bits":8},{"allowed":true,"max_context_size":40000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":32132582323,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:53.288962Z"}' headers: Content-Length: - - "690" + - "1723" Content-Security-Policy: - default-src 'none'; frame-ancestors 'none' Content-Type: - application/json Date: - - Thu, 24 Oct 2024 13:29:03 GMT + - Thu, 15 May 2025 09:36:48 GMT Server: - - Scaleway API Gateway (fr-par-2;edge01) + - Scaleway API Gateway (fr-par-1;edge03) Strict-Transport-Security: - max-age=63072000 X-Content-Type-Options: @@ -197,48 +193,50 @@ interactions: X-Frame-Options: - DENY X-Request-Id: - - 8ff48fbd-45ed-4f4e-a84b-320f5e45f457 + - 0e228506-fbff-4b2a-8ea6-bb3dcdf4e682 status: 200 OK code: 200 - duration: 77.838917ms + duration: 33.941458ms - id: 4 request: proto: HTTP/1.1 proto_major: 1 proto_minor: 1 - content_length: 0 + content_length: 118 transfer_encoding: [] trailer: {} host: api.scaleway.com remote_addr: "" request_uri: "" - body: "" + body: '{"name":"private-network-test-inference","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","tags":[],"subnets":null}' form: {} headers: + Content-Type: + - application/json User-Agent: - - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.23.0; darwin; arm64) terraform-provider/develop terraform/terraform-tests - url: https://api.scaleway.com/inference/v1beta1/regions/fr-par/deployments/7aac488b-b5a5-47cf-894a-75bf1759e247 - method: GET + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/vpc/v2/regions/fr-par/private-networks + method: POST response: proto: HTTP/2.0 proto_major: 2 proto_minor: 0 transfer_encoding: [] trailer: {} - content_length: 690 + content_length: 1057 uncompressed: false - body: '{"created_at":"2024-10-24T13:29:03.040316Z","endpoints":[{"disable_auth":false,"id":"02527765-8d08-427e-8c8d-db76fe661126","private_network":{"private_network_id":"5213e862-3c32-4e23-8c5a-fb079958acea"},"url":"https://7aac488b-b5a5-47cf-894a-75bf1759e247.private-network-test-inference"}],"error_message":null,"id":"7aac488b-b5a5-47cf-894a-75bf1759e247","max_size":1,"min_size":1,"model_id":"d33fb5fd-75ca-4dfb-8952-8af8b8b28be5","model_name":"meta/llama-3.1-8b-instruct:fp8","name":"test-inference-deployment-endpoint-private","node_type":"L4","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","region":"fr-par","size":0,"status":"creating","tags":[],"updated_at":null}' + body: '{"created_at":"2025-05-15T09:36:49.519324Z","dhcp_enabled":true,"id":"552f5c0d-40f0-4d67-bb57-727c50641ba9","name":"private-network-test-inference","organization_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","subnets":[{"created_at":"2025-05-15T09:36:49.519324Z","id":"7633077d-66da-4644-bc1b-dbdf8bca995b","private_network_id":"552f5c0d-40f0-4d67-bb57-727c50641ba9","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","subnet":"172.16.68.0/22","updated_at":"2025-05-15T09:36:49.519324Z","vpc_id":"e092f3d5-d85b-46fd-8d08-025e3282c8c1"},{"created_at":"2025-05-15T09:36:49.519324Z","id":"7488b8ca-80eb-42b4-aad5-a17471547dfa","private_network_id":"552f5c0d-40f0-4d67-bb57-727c50641ba9","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","subnet":"fd64:badd:7710:82eb::/64","updated_at":"2025-05-15T09:36:49.519324Z","vpc_id":"e092f3d5-d85b-46fd-8d08-025e3282c8c1"}],"tags":[],"updated_at":"2025-05-15T09:36:49.519324Z","vpc_id":"e092f3d5-d85b-46fd-8d08-025e3282c8c1"}' headers: Content-Length: - - "690" + - "1057" Content-Security-Policy: - default-src 'none'; frame-ancestors 'none' Content-Type: - application/json Date: - - Thu, 24 Oct 2024 13:30:03 GMT + - Thu, 15 May 2025 09:36:50 GMT Server: - - Scaleway API Gateway (fr-par-2;edge03) + - Scaleway API Gateway (fr-par-1;edge03) Strict-Transport-Security: - max-age=63072000 X-Content-Type-Options: @@ -246,10 +244,10 @@ interactions: X-Frame-Options: - DENY X-Request-Id: - - b4c7654b-adb9-455a-8fc3-ce191a1df037 + - 1c97940e-4b11-432a-9bf7-5b400527a0fa status: 200 OK code: 200 - duration: 106.647667ms + duration: 530.426167ms - id: 5 request: proto: HTTP/1.1 @@ -265,8 +263,8 @@ interactions: form: {} headers: User-Agent: - - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.23.0; darwin; arm64) terraform-provider/develop terraform/terraform-tests - url: https://api.scaleway.com/inference/v1beta1/regions/fr-par/deployments/7aac488b-b5a5-47cf-894a-75bf1759e247 + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/vpc/v2/regions/fr-par/private-networks/552f5c0d-40f0-4d67-bb57-727c50641ba9 method: GET response: proto: HTTP/2.0 @@ -274,20 +272,20 @@ interactions: proto_minor: 0 transfer_encoding: [] trailer: {} - content_length: 690 + content_length: 1057 uncompressed: false - body: '{"created_at":"2024-10-24T13:29:03.040316Z","endpoints":[{"disable_auth":false,"id":"02527765-8d08-427e-8c8d-db76fe661126","private_network":{"private_network_id":"5213e862-3c32-4e23-8c5a-fb079958acea"},"url":"https://7aac488b-b5a5-47cf-894a-75bf1759e247.private-network-test-inference"}],"error_message":null,"id":"7aac488b-b5a5-47cf-894a-75bf1759e247","max_size":1,"min_size":1,"model_id":"d33fb5fd-75ca-4dfb-8952-8af8b8b28be5","model_name":"meta/llama-3.1-8b-instruct:fp8","name":"test-inference-deployment-endpoint-private","node_type":"L4","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","region":"fr-par","size":0,"status":"creating","tags":[],"updated_at":null}' + body: '{"created_at":"2025-05-15T09:36:49.519324Z","dhcp_enabled":true,"id":"552f5c0d-40f0-4d67-bb57-727c50641ba9","name":"private-network-test-inference","organization_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","subnets":[{"created_at":"2025-05-15T09:36:49.519324Z","id":"7633077d-66da-4644-bc1b-dbdf8bca995b","private_network_id":"552f5c0d-40f0-4d67-bb57-727c50641ba9","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","subnet":"172.16.68.0/22","updated_at":"2025-05-15T09:36:49.519324Z","vpc_id":"e092f3d5-d85b-46fd-8d08-025e3282c8c1"},{"created_at":"2025-05-15T09:36:49.519324Z","id":"7488b8ca-80eb-42b4-aad5-a17471547dfa","private_network_id":"552f5c0d-40f0-4d67-bb57-727c50641ba9","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","subnet":"fd64:badd:7710:82eb::/64","updated_at":"2025-05-15T09:36:49.519324Z","vpc_id":"e092f3d5-d85b-46fd-8d08-025e3282c8c1"}],"tags":[],"updated_at":"2025-05-15T09:36:49.519324Z","vpc_id":"e092f3d5-d85b-46fd-8d08-025e3282c8c1"}' headers: Content-Length: - - "690" + - "1057" Content-Security-Policy: - default-src 'none'; frame-ancestors 'none' Content-Type: - application/json Date: - - Thu, 24 Oct 2024 13:31:03 GMT + - Thu, 15 May 2025 09:36:50 GMT Server: - - Scaleway API Gateway (fr-par-2;edge02) + - Scaleway API Gateway (fr-par-1;edge03) Strict-Transport-Security: - max-age=63072000 X-Content-Type-Options: @@ -295,48 +293,50 @@ interactions: X-Frame-Options: - DENY X-Request-Id: - - dff662ea-8014-47b3-9ef2-07ab8306bfce + - 039084c3-1504-465e-9505-958c04766479 status: 200 OK code: 200 - duration: 55.982375ms + duration: 27.667125ms - id: 6 request: proto: HTTP/1.1 proto_major: 1 proto_minor: 1 - content_length: 0 + content_length: 349 transfer_encoding: [] trailer: {} host: api.scaleway.com remote_addr: "" request_uri: "" - body: "" + body: '{"name":"test-inference-deployment-endpoint-private","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","model_id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","accept_eula":true,"node_type_name":"L4","tags":[],"min_size":1,"max_size":1,"endpoints":[{"private_network":{"private_network_id":"552f5c0d-40f0-4d67-bb57-727c50641ba9"},"disable_auth":false}]}' form: {} headers: + Content-Type: + - application/json User-Agent: - - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.23.0; darwin; arm64) terraform-provider/develop terraform/terraform-tests - url: https://api.scaleway.com/inference/v1beta1/regions/fr-par/deployments/7aac488b-b5a5-47cf-894a-75bf1759e247 - method: GET + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/deployments + method: POST response: proto: HTTP/2.0 proto_major: 2 proto_minor: 0 transfer_encoding: [] trailer: {} - content_length: 690 + content_length: 717 uncompressed: false - body: '{"created_at":"2024-10-24T13:29:03.040316Z","endpoints":[{"disable_auth":false,"id":"02527765-8d08-427e-8c8d-db76fe661126","private_network":{"private_network_id":"5213e862-3c32-4e23-8c5a-fb079958acea"},"url":"https://7aac488b-b5a5-47cf-894a-75bf1759e247.private-network-test-inference"}],"error_message":null,"id":"7aac488b-b5a5-47cf-894a-75bf1759e247","max_size":1,"min_size":1,"model_id":"d33fb5fd-75ca-4dfb-8952-8af8b8b28be5","model_name":"meta/llama-3.1-8b-instruct:fp8","name":"test-inference-deployment-endpoint-private","node_type":"L4","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","region":"fr-par","size":0,"status":"creating","tags":[],"updated_at":null}' + body: '{"created_at":"2025-05-15T09:36:50.092088Z","endpoints":[{"disable_auth":false,"id":"60f26766-a204-48cb-8e51-907a62dd69d0","private_network":{"private_network_id":"552f5c0d-40f0-4d67-bb57-727c50641ba9"},"url":"https://986d6dc6-3514-4780-b64b-d8f214ef1757.552f5c0d-40f0-4d67-bb57-727c50641ba9.internal"}],"id":"986d6dc6-3514-4780-b64b-d8f214ef1757","max_size":1,"min_size":1,"model_id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","model_name":"meta/llama-3.1-8b-instruct:bf16","name":"test-inference-deployment-endpoint-private","node_type_name":"L4","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","quantization":{"bits":16},"region":"fr-par","size":0,"status":"creating","tags":[],"updated_at":null}' headers: Content-Length: - - "690" + - "717" Content-Security-Policy: - default-src 'none'; frame-ancestors 'none' Content-Type: - application/json Date: - - Thu, 24 Oct 2024 13:32:03 GMT + - Thu, 15 May 2025 09:36:50 GMT Server: - - Scaleway API Gateway (fr-par-2;edge02) + - Scaleway API Gateway (fr-par-1;edge03) Strict-Transport-Security: - max-age=63072000 X-Content-Type-Options: @@ -344,10 +344,10 @@ interactions: X-Frame-Options: - DENY X-Request-Id: - - 9840990f-39da-46f2-a934-385200c7e3e7 + - 7840d252-704a-493b-88a3-c09baedbaee2 status: 200 OK code: 200 - duration: 98.294042ms + duration: 291.656291ms - id: 7 request: proto: HTTP/1.1 @@ -363,8 +363,8 @@ interactions: form: {} headers: User-Agent: - - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.23.0; darwin; arm64) terraform-provider/develop terraform/terraform-tests - url: https://api.scaleway.com/inference/v1beta1/regions/fr-par/deployments/7aac488b-b5a5-47cf-894a-75bf1759e247 + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/deployments/986d6dc6-3514-4780-b64b-d8f214ef1757 method: GET response: proto: HTTP/2.0 @@ -372,20 +372,20 @@ interactions: proto_minor: 0 transfer_encoding: [] trailer: {} - content_length: 690 + content_length: 717 uncompressed: false - body: '{"created_at":"2024-10-24T13:29:03.040316Z","endpoints":[{"disable_auth":false,"id":"02527765-8d08-427e-8c8d-db76fe661126","private_network":{"private_network_id":"5213e862-3c32-4e23-8c5a-fb079958acea"},"url":"https://7aac488b-b5a5-47cf-894a-75bf1759e247.private-network-test-inference"}],"error_message":null,"id":"7aac488b-b5a5-47cf-894a-75bf1759e247","max_size":1,"min_size":1,"model_id":"d33fb5fd-75ca-4dfb-8952-8af8b8b28be5","model_name":"meta/llama-3.1-8b-instruct:fp8","name":"test-inference-deployment-endpoint-private","node_type":"L4","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","region":"fr-par","size":0,"status":"creating","tags":[],"updated_at":null}' + body: '{"created_at":"2025-05-15T09:36:50.092088Z","endpoints":[{"disable_auth":false,"id":"60f26766-a204-48cb-8e51-907a62dd69d0","private_network":{"private_network_id":"552f5c0d-40f0-4d67-bb57-727c50641ba9"},"url":"https://986d6dc6-3514-4780-b64b-d8f214ef1757.552f5c0d-40f0-4d67-bb57-727c50641ba9.internal"}],"id":"986d6dc6-3514-4780-b64b-d8f214ef1757","max_size":1,"min_size":1,"model_id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","model_name":"meta/llama-3.1-8b-instruct:bf16","name":"test-inference-deployment-endpoint-private","node_type_name":"L4","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","quantization":{"bits":16},"region":"fr-par","size":0,"status":"creating","tags":[],"updated_at":null}' headers: Content-Length: - - "690" + - "717" Content-Security-Policy: - default-src 'none'; frame-ancestors 'none' Content-Type: - application/json Date: - - Thu, 24 Oct 2024 13:33:03 GMT + - Thu, 15 May 2025 09:36:50 GMT Server: - - Scaleway API Gateway (fr-par-2;edge02) + - Scaleway API Gateway (fr-par-1;edge03) Strict-Transport-Security: - max-age=63072000 X-Content-Type-Options: @@ -393,10 +393,10 @@ interactions: X-Frame-Options: - DENY X-Request-Id: - - f12f8474-ac1b-4eba-9b30-32f7f46fad52 + - e4119343-7606-4236-bc61-2fdceb6a8618 status: 200 OK code: 200 - duration: 77.809333ms + duration: 51.23975ms - id: 8 request: proto: HTTP/1.1 @@ -412,8 +412,8 @@ interactions: form: {} headers: User-Agent: - - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.23.0; darwin; arm64) terraform-provider/develop terraform/terraform-tests - url: https://api.scaleway.com/inference/v1beta1/regions/fr-par/deployments/7aac488b-b5a5-47cf-894a-75bf1759e247 + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/deployments/986d6dc6-3514-4780-b64b-d8f214ef1757 method: GET response: proto: HTTP/2.0 @@ -421,20 +421,20 @@ interactions: proto_minor: 0 transfer_encoding: [] trailer: {} - content_length: 690 + content_length: 717 uncompressed: false - body: '{"created_at":"2024-10-24T13:29:03.040316Z","endpoints":[{"disable_auth":false,"id":"02527765-8d08-427e-8c8d-db76fe661126","private_network":{"private_network_id":"5213e862-3c32-4e23-8c5a-fb079958acea"},"url":"https://7aac488b-b5a5-47cf-894a-75bf1759e247.private-network-test-inference"}],"error_message":null,"id":"7aac488b-b5a5-47cf-894a-75bf1759e247","max_size":1,"min_size":1,"model_id":"d33fb5fd-75ca-4dfb-8952-8af8b8b28be5","model_name":"meta/llama-3.1-8b-instruct:fp8","name":"test-inference-deployment-endpoint-private","node_type":"L4","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","region":"fr-par","size":0,"status":"creating","tags":[],"updated_at":null}' + body: '{"created_at":"2025-05-15T09:36:50.092088Z","endpoints":[{"disable_auth":false,"id":"60f26766-a204-48cb-8e51-907a62dd69d0","private_network":{"private_network_id":"552f5c0d-40f0-4d67-bb57-727c50641ba9"},"url":"https://986d6dc6-3514-4780-b64b-d8f214ef1757.552f5c0d-40f0-4d67-bb57-727c50641ba9.internal"}],"id":"986d6dc6-3514-4780-b64b-d8f214ef1757","max_size":1,"min_size":1,"model_id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","model_name":"meta/llama-3.1-8b-instruct:bf16","name":"test-inference-deployment-endpoint-private","node_type_name":"L4","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","quantization":{"bits":16},"region":"fr-par","size":0,"status":"creating","tags":[],"updated_at":null}' headers: Content-Length: - - "690" + - "717" Content-Security-Policy: - default-src 'none'; frame-ancestors 'none' Content-Type: - application/json Date: - - Thu, 24 Oct 2024 13:34:03 GMT + - Thu, 15 May 2025 09:37:50 GMT Server: - - Scaleway API Gateway (fr-par-2;edge02) + - Scaleway API Gateway (fr-par-1;edge02) Strict-Transport-Security: - max-age=63072000 X-Content-Type-Options: @@ -442,10 +442,10 @@ interactions: X-Frame-Options: - DENY X-Request-Id: - - 20897fe5-f982-4281-855e-46b0917ab3ab + - e4fe59f1-e3d0-454e-a8c3-121a75d2e742 status: 200 OK code: 200 - duration: 68.491917ms + duration: 117.681541ms - id: 9 request: proto: HTTP/1.1 @@ -461,8 +461,8 @@ interactions: form: {} headers: User-Agent: - - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.23.0; darwin; arm64) terraform-provider/develop terraform/terraform-tests - url: https://api.scaleway.com/inference/v1beta1/regions/fr-par/deployments/7aac488b-b5a5-47cf-894a-75bf1759e247 + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/deployments/986d6dc6-3514-4780-b64b-d8f214ef1757 method: GET response: proto: HTTP/2.0 @@ -470,20 +470,20 @@ interactions: proto_minor: 0 transfer_encoding: [] trailer: {} - content_length: 690 + content_length: 717 uncompressed: false - body: '{"created_at":"2024-10-24T13:29:03.040316Z","endpoints":[{"disable_auth":false,"id":"02527765-8d08-427e-8c8d-db76fe661126","private_network":{"private_network_id":"5213e862-3c32-4e23-8c5a-fb079958acea"},"url":"https://7aac488b-b5a5-47cf-894a-75bf1759e247.private-network-test-inference"}],"error_message":null,"id":"7aac488b-b5a5-47cf-894a-75bf1759e247","max_size":1,"min_size":1,"model_id":"d33fb5fd-75ca-4dfb-8952-8af8b8b28be5","model_name":"meta/llama-3.1-8b-instruct:fp8","name":"test-inference-deployment-endpoint-private","node_type":"L4","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","region":"fr-par","size":0,"status":"creating","tags":[],"updated_at":null}' + body: '{"created_at":"2025-05-15T09:36:50.092088Z","endpoints":[{"disable_auth":false,"id":"60f26766-a204-48cb-8e51-907a62dd69d0","private_network":{"private_network_id":"552f5c0d-40f0-4d67-bb57-727c50641ba9"},"url":"https://986d6dc6-3514-4780-b64b-d8f214ef1757.552f5c0d-40f0-4d67-bb57-727c50641ba9.internal"}],"id":"986d6dc6-3514-4780-b64b-d8f214ef1757","max_size":1,"min_size":1,"model_id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","model_name":"meta/llama-3.1-8b-instruct:bf16","name":"test-inference-deployment-endpoint-private","node_type_name":"L4","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","quantization":{"bits":16},"region":"fr-par","size":0,"status":"creating","tags":[],"updated_at":null}' headers: Content-Length: - - "690" + - "717" Content-Security-Policy: - default-src 'none'; frame-ancestors 'none' Content-Type: - application/json Date: - - Thu, 24 Oct 2024 13:35:04 GMT + - Thu, 15 May 2025 09:38:50 GMT Server: - - Scaleway API Gateway (fr-par-2;edge01) + - Scaleway API Gateway (fr-par-3;edge02) Strict-Transport-Security: - max-age=63072000 X-Content-Type-Options: @@ -491,10 +491,10 @@ interactions: X-Frame-Options: - DENY X-Request-Id: - - 22b09aa4-5a43-494b-8859-591888cceddf + - 653b13f6-5d50-4955-a26e-0e8738ff7b07 status: 200 OK code: 200 - duration: 97.426375ms + duration: 95.343917ms - id: 10 request: proto: HTTP/1.1 @@ -510,8 +510,8 @@ interactions: form: {} headers: User-Agent: - - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.23.0; darwin; arm64) terraform-provider/develop terraform/terraform-tests - url: https://api.scaleway.com/inference/v1beta1/regions/fr-par/deployments/7aac488b-b5a5-47cf-894a-75bf1759e247 + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/deployments/986d6dc6-3514-4780-b64b-d8f214ef1757 method: GET response: proto: HTTP/2.0 @@ -519,20 +519,20 @@ interactions: proto_minor: 0 transfer_encoding: [] trailer: {} - content_length: 690 + content_length: 763 uncompressed: false - body: '{"created_at":"2024-10-24T13:29:03.040316Z","endpoints":[{"disable_auth":false,"id":"02527765-8d08-427e-8c8d-db76fe661126","private_network":{"private_network_id":"5213e862-3c32-4e23-8c5a-fb079958acea"},"url":"https://7aac488b-b5a5-47cf-894a-75bf1759e247.private-network-test-inference"}],"error_message":null,"id":"7aac488b-b5a5-47cf-894a-75bf1759e247","max_size":1,"min_size":1,"model_id":"d33fb5fd-75ca-4dfb-8952-8af8b8b28be5","model_name":"meta/llama-3.1-8b-instruct:fp8","name":"test-inference-deployment-endpoint-private","node_type":"L4","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","region":"fr-par","size":0,"status":"creating","tags":[],"updated_at":null}' + body: '{"created_at":"2025-05-15T09:36:50.092088Z","endpoints":[{"disable_auth":false,"id":"60f26766-a204-48cb-8e51-907a62dd69d0","private_network":{"private_network_id":"552f5c0d-40f0-4d67-bb57-727c50641ba9"},"url":"https://986d6dc6-3514-4780-b64b-d8f214ef1757.552f5c0d-40f0-4d67-bb57-727c50641ba9.internal"}],"error_message":"","id":"986d6dc6-3514-4780-b64b-d8f214ef1757","max_size":1,"min_size":1,"model_id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","model_name":"meta/llama-3.1-8b-instruct:bf16","name":"test-inference-deployment-endpoint-private","node_type_name":"L4","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","quantization":{"bits":16},"region":"fr-par","size":0,"status":"deploying","tags":[],"updated_at":"2025-05-15T09:39:43.360182Z"}' headers: Content-Length: - - "690" + - "763" Content-Security-Policy: - default-src 'none'; frame-ancestors 'none' Content-Type: - application/json Date: - - Thu, 24 Oct 2024 13:36:04 GMT + - Thu, 15 May 2025 09:42:00 GMT Server: - - Scaleway API Gateway (fr-par-2;edge02) + - Scaleway API Gateway (fr-par-2;edge03) Strict-Transport-Security: - max-age=63072000 X-Content-Type-Options: @@ -540,10 +540,10 @@ interactions: X-Frame-Options: - DENY X-Request-Id: - - aef1877e-d5ba-453a-bc6e-0d76562d9b23 + - a261ad7f-5712-4701-a6f9-b626baac58a9 status: 200 OK code: 200 - duration: 110.898542ms + duration: 105.144167ms - id: 11 request: proto: HTTP/1.1 @@ -559,8 +559,8 @@ interactions: form: {} headers: User-Agent: - - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.23.0; darwin; arm64) terraform-provider/develop terraform/terraform-tests - url: https://api.scaleway.com/inference/v1beta1/regions/fr-par/deployments/7aac488b-b5a5-47cf-894a-75bf1759e247 + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/deployments/986d6dc6-3514-4780-b64b-d8f214ef1757 method: GET response: proto: HTTP/2.0 @@ -568,18 +568,18 @@ interactions: proto_minor: 0 transfer_encoding: [] trailer: {} - content_length: 690 + content_length: 763 uncompressed: false - body: '{"created_at":"2024-10-24T13:29:03.040316Z","endpoints":[{"disable_auth":false,"id":"02527765-8d08-427e-8c8d-db76fe661126","private_network":{"private_network_id":"5213e862-3c32-4e23-8c5a-fb079958acea"},"url":"https://7aac488b-b5a5-47cf-894a-75bf1759e247.private-network-test-inference"}],"error_message":null,"id":"7aac488b-b5a5-47cf-894a-75bf1759e247","max_size":1,"min_size":1,"model_id":"d33fb5fd-75ca-4dfb-8952-8af8b8b28be5","model_name":"meta/llama-3.1-8b-instruct:fp8","name":"test-inference-deployment-endpoint-private","node_type":"L4","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","region":"fr-par","size":0,"status":"creating","tags":[],"updated_at":null}' + body: '{"created_at":"2025-05-15T09:36:50.092088Z","endpoints":[{"disable_auth":false,"id":"60f26766-a204-48cb-8e51-907a62dd69d0","private_network":{"private_network_id":"552f5c0d-40f0-4d67-bb57-727c50641ba9"},"url":"https://986d6dc6-3514-4780-b64b-d8f214ef1757.552f5c0d-40f0-4d67-bb57-727c50641ba9.internal"}],"error_message":"","id":"986d6dc6-3514-4780-b64b-d8f214ef1757","max_size":1,"min_size":1,"model_id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","model_name":"meta/llama-3.1-8b-instruct:bf16","name":"test-inference-deployment-endpoint-private","node_type_name":"L4","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","quantization":{"bits":16},"region":"fr-par","size":0,"status":"deploying","tags":[],"updated_at":"2025-05-15T09:39:43.360182Z"}' headers: Content-Length: - - "690" + - "763" Content-Security-Policy: - default-src 'none'; frame-ancestors 'none' Content-Type: - application/json Date: - - Thu, 24 Oct 2024 13:37:04 GMT + - Thu, 15 May 2025 09:43:00 GMT Server: - Scaleway API Gateway (fr-par-2;edge02) Strict-Transport-Security: @@ -589,10 +589,10 @@ interactions: X-Frame-Options: - DENY X-Request-Id: - - 7cdd2952-ac35-4424-9fe7-70458e5c8515 + - 495f22b6-7a1b-47fd-8b40-dea54e3d4c48 status: 200 OK code: 200 - duration: 203.311792ms + duration: 224.630958ms - id: 12 request: proto: HTTP/1.1 @@ -608,8 +608,8 @@ interactions: form: {} headers: User-Agent: - - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.23.0; darwin; arm64) terraform-provider/develop terraform/terraform-tests - url: https://api.scaleway.com/inference/v1beta1/regions/fr-par/deployments/7aac488b-b5a5-47cf-894a-75bf1759e247 + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/deployments/986d6dc6-3514-4780-b64b-d8f214ef1757 method: GET response: proto: HTTP/2.0 @@ -617,20 +617,20 @@ interactions: proto_minor: 0 transfer_encoding: [] trailer: {} - content_length: 716 + content_length: 763 uncompressed: false - body: '{"created_at":"2024-10-24T13:29:03.040316Z","endpoints":[{"disable_auth":false,"id":"02527765-8d08-427e-8c8d-db76fe661126","private_network":{"private_network_id":"5213e862-3c32-4e23-8c5a-fb079958acea"},"url":"https://7aac488b-b5a5-47cf-894a-75bf1759e247.private-network-test-inference"}],"error_message":null,"id":"7aac488b-b5a5-47cf-894a-75bf1759e247","max_size":1,"min_size":1,"model_id":"d33fb5fd-75ca-4dfb-8952-8af8b8b28be5","model_name":"meta/llama-3.1-8b-instruct:fp8","name":"test-inference-deployment-endpoint-private","node_type":"L4","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","region":"fr-par","size":0,"status":"deploying","tags":[],"updated_at":"2024-10-24T13:37:38.541451Z"}' + body: '{"created_at":"2025-05-15T09:36:50.092088Z","endpoints":[{"disable_auth":false,"id":"60f26766-a204-48cb-8e51-907a62dd69d0","private_network":{"private_network_id":"552f5c0d-40f0-4d67-bb57-727c50641ba9"},"url":"https://986d6dc6-3514-4780-b64b-d8f214ef1757.552f5c0d-40f0-4d67-bb57-727c50641ba9.internal"}],"error_message":"","id":"986d6dc6-3514-4780-b64b-d8f214ef1757","max_size":1,"min_size":1,"model_id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","model_name":"meta/llama-3.1-8b-instruct:bf16","name":"test-inference-deployment-endpoint-private","node_type_name":"L4","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","quantization":{"bits":16},"region":"fr-par","size":0,"status":"deploying","tags":[],"updated_at":"2025-05-15T09:39:43.360182Z"}' headers: Content-Length: - - "716" + - "763" Content-Security-Policy: - default-src 'none'; frame-ancestors 'none' Content-Type: - application/json Date: - - Thu, 24 Oct 2024 13:38:04 GMT + - Thu, 15 May 2025 09:44:00 GMT Server: - - Scaleway API Gateway (fr-par-2;edge02) + - Scaleway API Gateway (fr-par-2;edge03) Strict-Transport-Security: - max-age=63072000 X-Content-Type-Options: @@ -638,10 +638,10 @@ interactions: X-Frame-Options: - DENY X-Request-Id: - - 13e612a2-5246-498a-9e39-708124fd49ff + - a642a607-9ba7-4a1e-b2e6-ea01f56eb373 status: 200 OK code: 200 - duration: 104.959292ms + duration: 95.987ms - id: 13 request: proto: HTTP/1.1 @@ -657,8 +657,8 @@ interactions: form: {} headers: User-Agent: - - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.23.0; darwin; arm64) terraform-provider/develop terraform/terraform-tests - url: https://api.scaleway.com/inference/v1beta1/regions/fr-par/deployments/7aac488b-b5a5-47cf-894a-75bf1759e247 + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/deployments/986d6dc6-3514-4780-b64b-d8f214ef1757 method: GET response: proto: HTTP/2.0 @@ -666,20 +666,20 @@ interactions: proto_minor: 0 transfer_encoding: [] trailer: {} - content_length: 716 + content_length: 763 uncompressed: false - body: '{"created_at":"2024-10-24T13:29:03.040316Z","endpoints":[{"disable_auth":false,"id":"02527765-8d08-427e-8c8d-db76fe661126","private_network":{"private_network_id":"5213e862-3c32-4e23-8c5a-fb079958acea"},"url":"https://7aac488b-b5a5-47cf-894a-75bf1759e247.private-network-test-inference"}],"error_message":null,"id":"7aac488b-b5a5-47cf-894a-75bf1759e247","max_size":1,"min_size":1,"model_id":"d33fb5fd-75ca-4dfb-8952-8af8b8b28be5","model_name":"meta/llama-3.1-8b-instruct:fp8","name":"test-inference-deployment-endpoint-private","node_type":"L4","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","region":"fr-par","size":0,"status":"deploying","tags":[],"updated_at":"2024-10-24T13:37:38.541451Z"}' + body: '{"created_at":"2025-05-15T09:36:50.092088Z","endpoints":[{"disable_auth":false,"id":"60f26766-a204-48cb-8e51-907a62dd69d0","private_network":{"private_network_id":"552f5c0d-40f0-4d67-bb57-727c50641ba9"},"url":"https://986d6dc6-3514-4780-b64b-d8f214ef1757.552f5c0d-40f0-4d67-bb57-727c50641ba9.internal"}],"error_message":"","id":"986d6dc6-3514-4780-b64b-d8f214ef1757","max_size":1,"min_size":1,"model_id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","model_name":"meta/llama-3.1-8b-instruct:bf16","name":"test-inference-deployment-endpoint-private","node_type_name":"L4","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","quantization":{"bits":16},"region":"fr-par","size":0,"status":"deploying","tags":[],"updated_at":"2025-05-15T09:39:43.360182Z"}' headers: Content-Length: - - "716" + - "763" Content-Security-Policy: - default-src 'none'; frame-ancestors 'none' Content-Type: - application/json Date: - - Thu, 24 Oct 2024 13:39:04 GMT + - Thu, 15 May 2025 09:45:00 GMT Server: - - Scaleway API Gateway (fr-par-2;edge02) + - Scaleway API Gateway (fr-par-2;edge01) Strict-Transport-Security: - max-age=63072000 X-Content-Type-Options: @@ -687,10 +687,10 @@ interactions: X-Frame-Options: - DENY X-Request-Id: - - 766594ce-47d2-4b85-b944-86ba69f874cb + - f4f332ff-79b9-4bde-8ebe-96e9ede956d4 status: 200 OK code: 200 - duration: 120.497584ms + duration: 91.154ms - id: 14 request: proto: HTTP/1.1 @@ -706,8 +706,8 @@ interactions: form: {} headers: User-Agent: - - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.23.0; darwin; arm64) terraform-provider/develop terraform/terraform-tests - url: https://api.scaleway.com/inference/v1beta1/regions/fr-par/deployments/7aac488b-b5a5-47cf-894a-75bf1759e247 + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/deployments/986d6dc6-3514-4780-b64b-d8f214ef1757 method: GET response: proto: HTTP/2.0 @@ -715,20 +715,20 @@ interactions: proto_minor: 0 transfer_encoding: [] trailer: {} - content_length: 716 + content_length: 763 uncompressed: false - body: '{"created_at":"2024-10-24T13:29:03.040316Z","endpoints":[{"disable_auth":false,"id":"02527765-8d08-427e-8c8d-db76fe661126","private_network":{"private_network_id":"5213e862-3c32-4e23-8c5a-fb079958acea"},"url":"https://7aac488b-b5a5-47cf-894a-75bf1759e247.private-network-test-inference"}],"error_message":null,"id":"7aac488b-b5a5-47cf-894a-75bf1759e247","max_size":1,"min_size":1,"model_id":"d33fb5fd-75ca-4dfb-8952-8af8b8b28be5","model_name":"meta/llama-3.1-8b-instruct:fp8","name":"test-inference-deployment-endpoint-private","node_type":"L4","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","region":"fr-par","size":0,"status":"deploying","tags":[],"updated_at":"2024-10-24T13:37:38.541451Z"}' + body: '{"created_at":"2025-05-15T09:36:50.092088Z","endpoints":[{"disable_auth":false,"id":"60f26766-a204-48cb-8e51-907a62dd69d0","private_network":{"private_network_id":"552f5c0d-40f0-4d67-bb57-727c50641ba9"},"url":"https://986d6dc6-3514-4780-b64b-d8f214ef1757.552f5c0d-40f0-4d67-bb57-727c50641ba9.internal"}],"error_message":"","id":"986d6dc6-3514-4780-b64b-d8f214ef1757","max_size":1,"min_size":1,"model_id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","model_name":"meta/llama-3.1-8b-instruct:bf16","name":"test-inference-deployment-endpoint-private","node_type_name":"L4","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","quantization":{"bits":16},"region":"fr-par","size":0,"status":"deploying","tags":[],"updated_at":"2025-05-15T09:39:43.360182Z"}' headers: Content-Length: - - "716" + - "763" Content-Security-Policy: - default-src 'none'; frame-ancestors 'none' Content-Type: - application/json Date: - - Thu, 24 Oct 2024 13:40:04 GMT + - Thu, 15 May 2025 09:46:00 GMT Server: - - Scaleway API Gateway (fr-par-2;edge01) + - Scaleway API Gateway (fr-par-2;edge02) Strict-Transport-Security: - max-age=63072000 X-Content-Type-Options: @@ -736,10 +736,10 @@ interactions: X-Frame-Options: - DENY X-Request-Id: - - 07353d68-3d86-45b8-8235-67b6bbdd9a4f + - 90f2a5f1-b573-4ea7-88fd-89a79f88d87c status: 200 OK code: 200 - duration: 120.69975ms + duration: 94.418625ms - id: 15 request: proto: HTTP/1.1 @@ -755,8 +755,8 @@ interactions: form: {} headers: User-Agent: - - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.23.0; darwin; arm64) terraform-provider/develop terraform/terraform-tests - url: https://api.scaleway.com/inference/v1beta1/regions/fr-par/deployments/7aac488b-b5a5-47cf-894a-75bf1759e247 + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/deployments/986d6dc6-3514-4780-b64b-d8f214ef1757 method: GET response: proto: HTTP/2.0 @@ -764,18 +764,18 @@ interactions: proto_minor: 0 transfer_encoding: [] trailer: {} - content_length: 716 + content_length: 763 uncompressed: false - body: '{"created_at":"2024-10-24T13:29:03.040316Z","endpoints":[{"disable_auth":false,"id":"02527765-8d08-427e-8c8d-db76fe661126","private_network":{"private_network_id":"5213e862-3c32-4e23-8c5a-fb079958acea"},"url":"https://7aac488b-b5a5-47cf-894a-75bf1759e247.private-network-test-inference"}],"error_message":null,"id":"7aac488b-b5a5-47cf-894a-75bf1759e247","max_size":1,"min_size":1,"model_id":"d33fb5fd-75ca-4dfb-8952-8af8b8b28be5","model_name":"meta/llama-3.1-8b-instruct:fp8","name":"test-inference-deployment-endpoint-private","node_type":"L4","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","region":"fr-par","size":0,"status":"deploying","tags":[],"updated_at":"2024-10-24T13:37:38.541451Z"}' + body: '{"created_at":"2025-05-15T09:36:50.092088Z","endpoints":[{"disable_auth":false,"id":"60f26766-a204-48cb-8e51-907a62dd69d0","private_network":{"private_network_id":"552f5c0d-40f0-4d67-bb57-727c50641ba9"},"url":"https://986d6dc6-3514-4780-b64b-d8f214ef1757.552f5c0d-40f0-4d67-bb57-727c50641ba9.internal"}],"error_message":"","id":"986d6dc6-3514-4780-b64b-d8f214ef1757","max_size":1,"min_size":1,"model_id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","model_name":"meta/llama-3.1-8b-instruct:bf16","name":"test-inference-deployment-endpoint-private","node_type_name":"L4","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","quantization":{"bits":16},"region":"fr-par","size":0,"status":"deploying","tags":[],"updated_at":"2025-05-15T09:39:43.360182Z"}' headers: Content-Length: - - "716" + - "763" Content-Security-Policy: - default-src 'none'; frame-ancestors 'none' Content-Type: - application/json Date: - - Thu, 24 Oct 2024 13:41:04 GMT + - Thu, 15 May 2025 09:47:00 GMT Server: - Scaleway API Gateway (fr-par-2;edge02) Strict-Transport-Security: @@ -785,10 +785,10 @@ interactions: X-Frame-Options: - DENY X-Request-Id: - - e3e818a0-b8f2-481a-acc5-12a2faff2642 + - 301c8668-15a9-4a81-abda-5afaa1eac870 status: 200 OK code: 200 - duration: 106.448625ms + duration: 83.121958ms - id: 16 request: proto: HTTP/1.1 @@ -804,8 +804,8 @@ interactions: form: {} headers: User-Agent: - - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.23.0; darwin; arm64) terraform-provider/develop terraform/terraform-tests - url: https://api.scaleway.com/inference/v1beta1/regions/fr-par/deployments/7aac488b-b5a5-47cf-894a-75bf1759e247 + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/deployments/986d6dc6-3514-4780-b64b-d8f214ef1757 method: GET response: proto: HTTP/2.0 @@ -813,18 +813,18 @@ interactions: proto_minor: 0 transfer_encoding: [] trailer: {} - content_length: 716 + content_length: 759 uncompressed: false - body: '{"created_at":"2024-10-24T13:29:03.040316Z","endpoints":[{"disable_auth":false,"id":"02527765-8d08-427e-8c8d-db76fe661126","private_network":{"private_network_id":"5213e862-3c32-4e23-8c5a-fb079958acea"},"url":"https://7aac488b-b5a5-47cf-894a-75bf1759e247.private-network-test-inference"}],"error_message":null,"id":"7aac488b-b5a5-47cf-894a-75bf1759e247","max_size":1,"min_size":1,"model_id":"d33fb5fd-75ca-4dfb-8952-8af8b8b28be5","model_name":"meta/llama-3.1-8b-instruct:fp8","name":"test-inference-deployment-endpoint-private","node_type":"L4","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","region":"fr-par","size":0,"status":"deploying","tags":[],"updated_at":"2024-10-24T13:37:38.541451Z"}' + body: '{"created_at":"2025-05-15T09:36:50.092088Z","endpoints":[{"disable_auth":false,"id":"60f26766-a204-48cb-8e51-907a62dd69d0","private_network":{"private_network_id":"552f5c0d-40f0-4d67-bb57-727c50641ba9"},"url":"https://986d6dc6-3514-4780-b64b-d8f214ef1757.552f5c0d-40f0-4d67-bb57-727c50641ba9.internal"}],"error_message":"","id":"986d6dc6-3514-4780-b64b-d8f214ef1757","max_size":1,"min_size":1,"model_id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","model_name":"meta/llama-3.1-8b-instruct:bf16","name":"test-inference-deployment-endpoint-private","node_type_name":"L4","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","quantization":{"bits":16},"region":"fr-par","size":1,"status":"ready","tags":[],"updated_at":"2025-05-15T09:47:45.790408Z"}' headers: Content-Length: - - "716" + - "759" Content-Security-Policy: - default-src 'none'; frame-ancestors 'none' Content-Type: - application/json Date: - - Thu, 24 Oct 2024 13:42:04 GMT + - Thu, 15 May 2025 09:48:01 GMT Server: - Scaleway API Gateway (fr-par-2;edge02) Strict-Transport-Security: @@ -834,10 +834,10 @@ interactions: X-Frame-Options: - DENY X-Request-Id: - - a9c5e28a-b44d-47a9-9ca3-5b6af160ce7d + - 8552fb36-97af-4650-a68c-a3cf8809973c status: 200 OK code: 200 - duration: 129.56375ms + duration: 99.45975ms - id: 17 request: proto: HTTP/1.1 @@ -853,8 +853,8 @@ interactions: form: {} headers: User-Agent: - - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.23.0; darwin; arm64) terraform-provider/develop terraform/terraform-tests - url: https://api.scaleway.com/inference/v1beta1/regions/fr-par/deployments/7aac488b-b5a5-47cf-894a-75bf1759e247 + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/deployments/986d6dc6-3514-4780-b64b-d8f214ef1757 method: GET response: proto: HTTP/2.0 @@ -862,18 +862,18 @@ interactions: proto_minor: 0 transfer_encoding: [] trailer: {} - content_length: 716 + content_length: 759 uncompressed: false - body: '{"created_at":"2024-10-24T13:29:03.040316Z","endpoints":[{"disable_auth":false,"id":"02527765-8d08-427e-8c8d-db76fe661126","private_network":{"private_network_id":"5213e862-3c32-4e23-8c5a-fb079958acea"},"url":"https://7aac488b-b5a5-47cf-894a-75bf1759e247.private-network-test-inference"}],"error_message":null,"id":"7aac488b-b5a5-47cf-894a-75bf1759e247","max_size":1,"min_size":1,"model_id":"d33fb5fd-75ca-4dfb-8952-8af8b8b28be5","model_name":"meta/llama-3.1-8b-instruct:fp8","name":"test-inference-deployment-endpoint-private","node_type":"L4","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","region":"fr-par","size":0,"status":"deploying","tags":[],"updated_at":"2024-10-24T13:37:38.541451Z"}' + body: '{"created_at":"2025-05-15T09:36:50.092088Z","endpoints":[{"disable_auth":false,"id":"60f26766-a204-48cb-8e51-907a62dd69d0","private_network":{"private_network_id":"552f5c0d-40f0-4d67-bb57-727c50641ba9"},"url":"https://986d6dc6-3514-4780-b64b-d8f214ef1757.552f5c0d-40f0-4d67-bb57-727c50641ba9.internal"}],"error_message":"","id":"986d6dc6-3514-4780-b64b-d8f214ef1757","max_size":1,"min_size":1,"model_id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","model_name":"meta/llama-3.1-8b-instruct:bf16","name":"test-inference-deployment-endpoint-private","node_type_name":"L4","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","quantization":{"bits":16},"region":"fr-par","size":1,"status":"ready","tags":[],"updated_at":"2025-05-15T09:47:45.790408Z"}' headers: Content-Length: - - "716" + - "759" Content-Security-Policy: - default-src 'none'; frame-ancestors 'none' Content-Type: - application/json Date: - - Thu, 24 Oct 2024 13:43:05 GMT + - Thu, 15 May 2025 09:48:01 GMT Server: - Scaleway API Gateway (fr-par-2;edge02) Strict-Transport-Security: @@ -883,10 +883,10 @@ interactions: X-Frame-Options: - DENY X-Request-Id: - - db37fc50-9fdd-471c-89b8-e8d7cdfa31db + - b9d88f3a-e0de-4864-a807-3ad261760376 status: 200 OK code: 200 - duration: 157.868875ms + duration: 54.053667ms - id: 18 request: proto: HTTP/1.1 @@ -902,8 +902,8 @@ interactions: form: {} headers: User-Agent: - - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.23.0; darwin; arm64) terraform-provider/develop terraform/terraform-tests - url: https://api.scaleway.com/inference/v1beta1/regions/fr-par/deployments/7aac488b-b5a5-47cf-894a-75bf1759e247 + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/deployments/986d6dc6-3514-4780-b64b-d8f214ef1757 method: GET response: proto: HTTP/2.0 @@ -911,18 +911,18 @@ interactions: proto_minor: 0 transfer_encoding: [] trailer: {} - content_length: 716 + content_length: 759 uncompressed: false - body: '{"created_at":"2024-10-24T13:29:03.040316Z","endpoints":[{"disable_auth":false,"id":"02527765-8d08-427e-8c8d-db76fe661126","private_network":{"private_network_id":"5213e862-3c32-4e23-8c5a-fb079958acea"},"url":"https://7aac488b-b5a5-47cf-894a-75bf1759e247.private-network-test-inference"}],"error_message":null,"id":"7aac488b-b5a5-47cf-894a-75bf1759e247","max_size":1,"min_size":1,"model_id":"d33fb5fd-75ca-4dfb-8952-8af8b8b28be5","model_name":"meta/llama-3.1-8b-instruct:fp8","name":"test-inference-deployment-endpoint-private","node_type":"L4","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","region":"fr-par","size":0,"status":"deploying","tags":[],"updated_at":"2024-10-24T13:37:38.541451Z"}' + body: '{"created_at":"2025-05-15T09:36:50.092088Z","endpoints":[{"disable_auth":false,"id":"60f26766-a204-48cb-8e51-907a62dd69d0","private_network":{"private_network_id":"552f5c0d-40f0-4d67-bb57-727c50641ba9"},"url":"https://986d6dc6-3514-4780-b64b-d8f214ef1757.552f5c0d-40f0-4d67-bb57-727c50641ba9.internal"}],"error_message":"","id":"986d6dc6-3514-4780-b64b-d8f214ef1757","max_size":1,"min_size":1,"model_id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","model_name":"meta/llama-3.1-8b-instruct:bf16","name":"test-inference-deployment-endpoint-private","node_type_name":"L4","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","quantization":{"bits":16},"region":"fr-par","size":1,"status":"ready","tags":[],"updated_at":"2025-05-15T09:47:45.790408Z"}' headers: Content-Length: - - "716" + - "759" Content-Security-Policy: - default-src 'none'; frame-ancestors 'none' Content-Type: - application/json Date: - - Thu, 24 Oct 2024 13:44:05 GMT + - Thu, 15 May 2025 09:48:01 GMT Server: - Scaleway API Gateway (fr-par-2;edge02) Strict-Transport-Security: @@ -932,10 +932,10 @@ interactions: X-Frame-Options: - DENY X-Request-Id: - - e131fd10-aec1-42af-bde8-dbf94336aedb + - ba0f6803-80be-498e-8454-73ffb1387b42 status: 200 OK code: 200 - duration: 135.776875ms + duration: 48.278958ms - id: 19 request: proto: HTTP/1.1 @@ -951,8 +951,8 @@ interactions: form: {} headers: User-Agent: - - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.23.0; darwin; arm64) terraform-provider/develop terraform/terraform-tests - url: https://api.scaleway.com/inference/v1beta1/regions/fr-par/deployments/7aac488b-b5a5-47cf-894a-75bf1759e247 + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models?order_by=display_rank_asc&page_size=1000 method: GET response: proto: HTTP/2.0 @@ -960,18 +960,18 @@ interactions: proto_minor: 0 transfer_encoding: [] trailer: {} - content_length: 716 + content_length: 50297 uncompressed: false - body: '{"created_at":"2024-10-24T13:29:03.040316Z","endpoints":[{"disable_auth":false,"id":"02527765-8d08-427e-8c8d-db76fe661126","private_network":{"private_network_id":"5213e862-3c32-4e23-8c5a-fb079958acea"},"url":"https://7aac488b-b5a5-47cf-894a-75bf1759e247.private-network-test-inference"}],"error_message":null,"id":"7aac488b-b5a5-47cf-894a-75bf1759e247","max_size":1,"min_size":1,"model_id":"d33fb5fd-75ca-4dfb-8952-8af8b8b28be5","model_name":"meta/llama-3.1-8b-instruct:fp8","name":"test-inference-deployment-endpoint-private","node_type":"L4","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","region":"fr-par","size":0,"status":"deploying","tags":[],"updated_at":"2024-10-24T13:37:38.541451Z"}' + body: '{"models":[{"created_at":"2025-04-04T13:11:00.900800Z","description":"Multimodal model for text generation an image understanding supporting up to 128k context window.","has_eula":false,"id":"5c40e594-d40d-452a-991e-5082225155e1","name":"google/gemma-3-27b-it:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":80000,"quantization_bits":8},{"allowed":true,"max_context_size":40000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":54904369444,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-05-09T16:45:10.128397Z"},{"created_at":"2025-04-28T18:48:01.860457Z","description":"","has_eula":false,"id":"a19296a6-4cef-447a-99bc-8f6c3ee30df4","name":"TestAccCustomModel_Basic","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725346,"status":"ready","tags":["custom"],"updated_at":null},{"created_at":"2025-04-30T13:29:24.004776Z","description":"","has_eula":false,"id":"eabb7f74-24a1-4173-911b-26924c1be619","name":"TestAccCustomModel_DeployModelOnServer","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725346,"status":"ready","tags":["custom"],"updated_at":null},{"created_at":"2025-03-27T16:48:11.513249Z","description":"Efficient 8B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"a51ce791-9546-4c28-aa44-24850d84778b","name":"deepseek/deepseek-r1-distill-llama-8b:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":90000,"quantization_bits":8},{"allowed":true,"max_context_size":39000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":16070465043,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:49.797687Z"},{"created_at":"2025-03-27T16:48:14.190404Z","description":"Efficient 8B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"b8dc7f2d-95d6-48ae-a076-a99e76b76e1f","name":"deepseek/deepseek-r1-distill-llama-8b:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":90000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9093169346,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-04-14T09:05:26.354374Z"},{"created_at":"2025-04-04T15:51:25.414165Z","description":"Highly efficient multimodal model with vision and chat capabilities supporting up to 128k context window.","has_eula":false,"id":"efcf0b60-999a-4c1e-981e-b68a428c4702","name":"mistral/mistral-small-3.1-24b-instruct-2503:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":75000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":96077777613,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-05-09T13:51:56.986698Z"},{"created_at":"2025-04-04T15:51:27.773573Z","description":"Highly efficient multimodal model with vision and chat capabilities supporting up to 128k context window.","has_eula":false,"id":"906c0feb-0eb0-4037-94aa-afd4d845b94f","name":"mistral/mistral-small-3.1-24b-instruct-2503:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":96077777613,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-04-08T14:26:24.388332Z"},{"created_at":"2025-03-27T16:47:41.108667Z","description":"Efficient 70B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"014919c1-00cc-43c2-98f2-4ffd263e6f33","name":"deepseek/deepseek-r1-distill-llama-70b:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":56960,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":141117442445,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:48.796286Z"},{"created_at":"2025-03-27T16:47:42.762505Z","description":"Efficient 70B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"bbfeeb62-2428-415d-ad0d-537af9aff946","name":"deepseek/deepseek-r1-distill-llama-70b:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72679175005,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-06T15:17:35.683881Z"},{"created_at":"2025-03-27T16:48:40.045689Z","description":"Efficient 8B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","name":"meta/llama-3.1-8b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":93000,"quantization_bits":8},{"allowed":true,"max_context_size":40000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":32132582323,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:53.288962Z"},{"created_at":"2025-03-27T16:50:12.267422Z","description":"Highly advanced coding model with a 128k context window, excelling in code generation, repairing, and reasoning.","has_eula":false,"id":"a3205fd3-ac4a-47cf-9074-82166d214bac","name":"qwen/qwen2.5-coder-32b-instruct:int8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":35080374444,"status":"ready","tags":["instruct","chat","code","featured"],"updated_at":"2025-05-09T13:52:04.105122Z"},{"created_at":"2025-03-27T16:49:51.968791Z","description":"A large language model customized by NVIDIA in order to improve the helpfulness of generated responses.","has_eula":true,"id":"4e6c9cea-57a1-4215-8a11-24ab51b9d1c8","name":"nvidia/llama-3.1-nemotron-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72679219797,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:01.331740Z"},{"created_at":"2025-05-13T12:13:50.994Z","description":"Best-in-class vision language model by research lab Allen Institute for AI. Available under the Apache 2.0 license.","has_eula":false,"id":"864e7786-4b86-4f4b-8534-25da1fc46a74","name":"allenai/molmo-72b-0924:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":45000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":293245208984,"status":"ready","tags":["instruct","chat","vision"],"updated_at":"2025-05-13T13:34:01.318606Z"},{"created_at":"2025-03-27T16:49:37.342054Z","description":"Efficient 8B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"775cbef7-6527-415d-9e6b-39d574cf39ec","name":"meta/llama-3.1-8b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":93000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9090504772,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:00.700210Z"},{"created_at":"2025-03-27T16:48:15.818596Z","description":"First generation of 8B-param model by Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"bc10c88e-4d18-4854-8250-77aff4763eca","name":"meta/llama-3-8b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":32132572668,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:51.995701Z"},{"created_at":"2025-03-27T16:49:33.359621Z","description":"First generation of 8B-param model by Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"b5a94646-9390-4ced-acba-9b078e63a794","name":"meta/llama-3-8b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9090489355,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:59.473065Z"},{"created_at":"2025-03-27T16:48:42.138410Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"126ad0c4-cfde-4b05-924f-f04c6343ccb2","name":"meta/llama-3.3-70b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":60000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":282254830887,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:53.868968Z"},{"created_at":"2025-03-27T16:50:09.605796Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"1678195b-5af6-4c27-8fdc-16aa84c68c34","name":"meta/llama-3.3-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72687332869,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-07T10:19:23.153808Z"},{"created_at":"2025-03-27T16:48:35.312110Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"7cbe0417-172a-4601-8940-3b71e4d0c8cb","name":"meta/llama-3.1-70b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":60000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":282246710880,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:52.677798Z"},{"created_at":"2025-03-27T16:49:35.836269Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"03150ad5-de83-4c74-afe0-3eeeb67d71a3","name":"meta/llama-3.1-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72665889083,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:52:00.003235Z"},{"created_at":"2025-03-27T16:49:31.715567Z","description":"First generation of 70B-param model from Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"b0c5a8fe-5c9e-49cc-942a-6c4ebaadde67","name":"meta/llama-3-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72665872089,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:58.899458Z"},{"created_at":"2025-03-27T16:49:17.458153Z","description":"A state-of-the-art 24B model with a 32k context window, designed for multilingual chat and agentic applications.","has_eula":false,"id":"1e555754-47fb-4dba-a82c-66f3f1fa9294","name":"mistral/mistral-small-24b-instruct-2501:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":94321843451,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:55.176379Z"},{"created_at":"2025-03-27T16:50:07.300436Z","description":"A state-of-the-art 24B model with a 32k context window, designed for multilingual chat and agentic applications.","has_eula":false,"id":"7bb28f2c-3719-4d71-9bcb-17db392a7118","name":"mistral/mistral-small-24b-instruct-2501:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":20000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":24938988520,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:55.726891Z"},{"created_at":"2025-04-15T10:51:31.291792Z","description":"Vision language model able to analyze images and offer insights without compromising on instruction following.","has_eula":false,"id":"1999f4f5-f038-4039-94ba-11a851917df5","name":"mistral/pixtral-12b-2409:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":50000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":25384844091,"status":"ready","tags":["vision","chat","featured"],"updated_at":"2025-05-09T13:51:58.281971Z"},{"created_at":"2025-03-27T16:49:14.593008Z","description":"A very efficient language model by Mistral AI, optimized for instruction-following tasks. Available under the Apache 2.0 license.","has_eula":false,"id":"bf6be106-c53d-4b93-bb33-1a4bd4d0b573","name":"mistral/mistral-7b-instruct-v0.3:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":28995471292,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:54.595513Z"},{"created_at":"2025-03-27T16:50:06.301430Z","description":"A state-of-the-art 12B model with a 128k context window, designed for multilingual chat applications.","has_eula":false,"id":"07681325-c743-4796-8b7d-1f0b35d4a8e0","name":"mistral/mistral-nemo-instruct-2407:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":13605604415,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-06T15:17:43.837103Z"},{"created_at":"2025-03-27T16:50:08.291821Z","description":"A high-quality Mixture of Experts (MoE) model with open weights by Mistral AI, licensed under Apache 2.0.","has_eula":false,"id":"1aa87d1e-9996-4c54-aa1c-5b900bf59fd4","name":"mistral/mixtral-8x7b-instruct-v0.1:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":46970879717,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:02.960404Z"},{"created_at":"2025-03-27T16:49:19.120192Z","description":"A high-quality Mixture of Experts (MoE) model with open weights by Mistral AI, licensed under Apache 2.0.","has_eula":false,"id":"11ed6599-f460-4e41-b266-87bc9a108fdd","name":"mistral/mixtral-8x7b-instruct-v0.1:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":190483875108,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:57.661626Z"},{"created_at":"2025-03-27T16:46:54.314987Z","description":"An embedding model spanning a broad range of languages and state-of-the-art results on multilingual benchmarks.","has_eula":true,"id":"d58efec4-b667-48e2-8ad8-bcc26c175ae6","name":"baai/bge-multilingual-gemma2:fp32","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]}]}],"parameter_size_bits":32,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":36989461520,"status":"ready","tags":["embedding","featured"],"updated_at":"2025-03-27T17:40:09.534954Z"}],"total_count":29}' headers: Content-Length: - - "716" + - "50297" Content-Security-Policy: - default-src 'none'; frame-ancestors 'none' Content-Type: - application/json Date: - - Thu, 24 Oct 2024 13:45:05 GMT + - Thu, 15 May 2025 09:48:01 GMT Server: - Scaleway API Gateway (fr-par-2;edge02) Strict-Transport-Security: @@ -981,10 +981,10 @@ interactions: X-Frame-Options: - DENY X-Request-Id: - - d4fed6dd-8310-4c1c-a8bd-8a79436c1cc3 + - eade2eb2-d72b-4495-ac31-1dc6a52e537e status: 200 OK code: 200 - duration: 105.398583ms + duration: 259.126417ms - id: 20 request: proto: HTTP/1.1 @@ -1000,8 +1000,8 @@ interactions: form: {} headers: User-Agent: - - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.23.0; darwin; arm64) terraform-provider/develop terraform/terraform-tests - url: https://api.scaleway.com/inference/v1beta1/regions/fr-par/deployments/7aac488b-b5a5-47cf-894a-75bf1759e247 + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models/7205dbce-cc80-4b2a-bb7f-3fd3a804afc3 method: GET response: proto: HTTP/2.0 @@ -1009,20 +1009,20 @@ interactions: proto_minor: 0 transfer_encoding: [] trailer: {} - content_length: 712 + content_length: 1723 uncompressed: false - body: '{"created_at":"2024-10-24T13:29:03.040316Z","endpoints":[{"disable_auth":false,"id":"02527765-8d08-427e-8c8d-db76fe661126","private_network":{"private_network_id":"5213e862-3c32-4e23-8c5a-fb079958acea"},"url":"https://7aac488b-b5a5-47cf-894a-75bf1759e247.private-network-test-inference"}],"error_message":null,"id":"7aac488b-b5a5-47cf-894a-75bf1759e247","max_size":1,"min_size":1,"model_id":"d33fb5fd-75ca-4dfb-8952-8af8b8b28be5","model_name":"meta/llama-3.1-8b-instruct:fp8","name":"test-inference-deployment-endpoint-private","node_type":"L4","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","region":"fr-par","size":1,"status":"ready","tags":[],"updated_at":"2024-10-24T13:46:00.862117Z"}' + body: '{"created_at":"2025-03-27T16:48:40.045689Z","description":"Efficient 8B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","name":"meta/llama-3.1-8b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":93000,"quantization_bits":8},{"allowed":true,"max_context_size":40000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":32132582323,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:53.288962Z"}' headers: Content-Length: - - "712" + - "1723" Content-Security-Policy: - default-src 'none'; frame-ancestors 'none' Content-Type: - application/json Date: - - Thu, 24 Oct 2024 13:46:05 GMT + - Thu, 15 May 2025 09:48:02 GMT Server: - - Scaleway API Gateway (fr-par-2;edge03) + - Scaleway API Gateway (fr-par-2;edge02) Strict-Transport-Security: - max-age=63072000 X-Content-Type-Options: @@ -1030,10 +1030,10 @@ interactions: X-Frame-Options: - DENY X-Request-Id: - - 00d78fdf-5432-40ca-bbfa-73229cccfc07 + - ee3aad49-ac77-46a7-8cd6-6e2129f85498 status: 200 OK code: 200 - duration: 110.750292ms + duration: 118.039125ms - id: 21 request: proto: HTTP/1.1 @@ -1049,8 +1049,8 @@ interactions: form: {} headers: User-Agent: - - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.23.0; darwin; arm64) terraform-provider/develop terraform/terraform-tests - url: https://api.scaleway.com/inference/v1beta1/regions/fr-par/deployments/7aac488b-b5a5-47cf-894a-75bf1759e247 + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/vpc/v2/regions/fr-par/private-networks/552f5c0d-40f0-4d67-bb57-727c50641ba9 method: GET response: proto: HTTP/2.0 @@ -1058,20 +1058,20 @@ interactions: proto_minor: 0 transfer_encoding: [] trailer: {} - content_length: 712 + content_length: 1057 uncompressed: false - body: '{"created_at":"2024-10-24T13:29:03.040316Z","endpoints":[{"disable_auth":false,"id":"02527765-8d08-427e-8c8d-db76fe661126","private_network":{"private_network_id":"5213e862-3c32-4e23-8c5a-fb079958acea"},"url":"https://7aac488b-b5a5-47cf-894a-75bf1759e247.private-network-test-inference"}],"error_message":null,"id":"7aac488b-b5a5-47cf-894a-75bf1759e247","max_size":1,"min_size":1,"model_id":"d33fb5fd-75ca-4dfb-8952-8af8b8b28be5","model_name":"meta/llama-3.1-8b-instruct:fp8","name":"test-inference-deployment-endpoint-private","node_type":"L4","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","region":"fr-par","size":1,"status":"ready","tags":[],"updated_at":"2024-10-24T13:46:00.862117Z"}' + body: '{"created_at":"2025-05-15T09:36:49.519324Z","dhcp_enabled":true,"id":"552f5c0d-40f0-4d67-bb57-727c50641ba9","name":"private-network-test-inference","organization_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","subnets":[{"created_at":"2025-05-15T09:36:49.519324Z","id":"7633077d-66da-4644-bc1b-dbdf8bca995b","private_network_id":"552f5c0d-40f0-4d67-bb57-727c50641ba9","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","subnet":"172.16.68.0/22","updated_at":"2025-05-15T09:36:49.519324Z","vpc_id":"e092f3d5-d85b-46fd-8d08-025e3282c8c1"},{"created_at":"2025-05-15T09:36:49.519324Z","id":"7488b8ca-80eb-42b4-aad5-a17471547dfa","private_network_id":"552f5c0d-40f0-4d67-bb57-727c50641ba9","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","subnet":"fd64:badd:7710:82eb::/64","updated_at":"2025-05-15T09:36:49.519324Z","vpc_id":"e092f3d5-d85b-46fd-8d08-025e3282c8c1"}],"tags":[],"updated_at":"2025-05-15T09:36:49.519324Z","vpc_id":"e092f3d5-d85b-46fd-8d08-025e3282c8c1"}' headers: Content-Length: - - "712" + - "1057" Content-Security-Policy: - default-src 'none'; frame-ancestors 'none' Content-Type: - application/json Date: - - Thu, 24 Oct 2024 13:46:05 GMT + - Thu, 15 May 2025 09:48:02 GMT Server: - - Scaleway API Gateway (fr-par-2;edge03) + - Scaleway API Gateway (fr-par-2;edge02) Strict-Transport-Security: - max-age=63072000 X-Content-Type-Options: @@ -1079,10 +1079,10 @@ interactions: X-Frame-Options: - DENY X-Request-Id: - - 8eaaf821-b5c6-499b-bed0-1e845a4aa48a + - f050b449-cda7-4cd7-ade5-5c714b7ea280 status: 200 OK code: 200 - duration: 73.559125ms + duration: 45.185458ms - id: 22 request: proto: HTTP/1.1 @@ -1098,8 +1098,8 @@ interactions: form: {} headers: User-Agent: - - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.23.0; darwin; arm64) terraform-provider/develop terraform/terraform-tests - url: https://api.scaleway.com/inference/v1beta1/regions/fr-par/deployments/7aac488b-b5a5-47cf-894a-75bf1759e247 + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models?order_by=display_rank_asc&page_size=1000 method: GET response: proto: HTTP/2.0 @@ -1107,20 +1107,20 @@ interactions: proto_minor: 0 transfer_encoding: [] trailer: {} - content_length: 712 + content_length: 50297 uncompressed: false - body: '{"created_at":"2024-10-24T13:29:03.040316Z","endpoints":[{"disable_auth":false,"id":"02527765-8d08-427e-8c8d-db76fe661126","private_network":{"private_network_id":"5213e862-3c32-4e23-8c5a-fb079958acea"},"url":"https://7aac488b-b5a5-47cf-894a-75bf1759e247.private-network-test-inference"}],"error_message":null,"id":"7aac488b-b5a5-47cf-894a-75bf1759e247","max_size":1,"min_size":1,"model_id":"d33fb5fd-75ca-4dfb-8952-8af8b8b28be5","model_name":"meta/llama-3.1-8b-instruct:fp8","name":"test-inference-deployment-endpoint-private","node_type":"L4","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","region":"fr-par","size":1,"status":"ready","tags":[],"updated_at":"2024-10-24T13:46:00.862117Z"}' + body: '{"models":[{"created_at":"2025-04-04T13:11:00.900800Z","description":"Multimodal model for text generation an image understanding supporting up to 128k context window.","has_eula":false,"id":"5c40e594-d40d-452a-991e-5082225155e1","name":"google/gemma-3-27b-it:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":80000,"quantization_bits":8},{"allowed":true,"max_context_size":40000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":54904369444,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-05-09T16:45:10.128397Z"},{"created_at":"2025-04-28T18:48:01.860457Z","description":"","has_eula":false,"id":"a19296a6-4cef-447a-99bc-8f6c3ee30df4","name":"TestAccCustomModel_Basic","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725346,"status":"ready","tags":["custom"],"updated_at":null},{"created_at":"2025-04-30T13:29:24.004776Z","description":"","has_eula":false,"id":"eabb7f74-24a1-4173-911b-26924c1be619","name":"TestAccCustomModel_DeployModelOnServer","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725346,"status":"ready","tags":["custom"],"updated_at":null},{"created_at":"2025-03-27T16:48:11.513249Z","description":"Efficient 8B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"a51ce791-9546-4c28-aa44-24850d84778b","name":"deepseek/deepseek-r1-distill-llama-8b:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":90000,"quantization_bits":8},{"allowed":true,"max_context_size":39000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":16070465043,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:49.797687Z"},{"created_at":"2025-03-27T16:48:14.190404Z","description":"Efficient 8B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"b8dc7f2d-95d6-48ae-a076-a99e76b76e1f","name":"deepseek/deepseek-r1-distill-llama-8b:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":90000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9093169346,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-04-14T09:05:26.354374Z"},{"created_at":"2025-04-04T15:51:25.414165Z","description":"Highly efficient multimodal model with vision and chat capabilities supporting up to 128k context window.","has_eula":false,"id":"efcf0b60-999a-4c1e-981e-b68a428c4702","name":"mistral/mistral-small-3.1-24b-instruct-2503:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":75000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":96077777613,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-05-09T13:51:56.986698Z"},{"created_at":"2025-04-04T15:51:27.773573Z","description":"Highly efficient multimodal model with vision and chat capabilities supporting up to 128k context window.","has_eula":false,"id":"906c0feb-0eb0-4037-94aa-afd4d845b94f","name":"mistral/mistral-small-3.1-24b-instruct-2503:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":96077777613,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-04-08T14:26:24.388332Z"},{"created_at":"2025-03-27T16:47:41.108667Z","description":"Efficient 70B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"014919c1-00cc-43c2-98f2-4ffd263e6f33","name":"deepseek/deepseek-r1-distill-llama-70b:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":56960,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":141117442445,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:48.796286Z"},{"created_at":"2025-03-27T16:47:42.762505Z","description":"Efficient 70B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"bbfeeb62-2428-415d-ad0d-537af9aff946","name":"deepseek/deepseek-r1-distill-llama-70b:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72679175005,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-06T15:17:35.683881Z"},{"created_at":"2025-03-27T16:48:40.045689Z","description":"Efficient 8B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","name":"meta/llama-3.1-8b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":93000,"quantization_bits":8},{"allowed":true,"max_context_size":40000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":32132582323,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:53.288962Z"},{"created_at":"2025-03-27T16:50:12.267422Z","description":"Highly advanced coding model with a 128k context window, excelling in code generation, repairing, and reasoning.","has_eula":false,"id":"a3205fd3-ac4a-47cf-9074-82166d214bac","name":"qwen/qwen2.5-coder-32b-instruct:int8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":35080374444,"status":"ready","tags":["instruct","chat","code","featured"],"updated_at":"2025-05-09T13:52:04.105122Z"},{"created_at":"2025-03-27T16:49:51.968791Z","description":"A large language model customized by NVIDIA in order to improve the helpfulness of generated responses.","has_eula":true,"id":"4e6c9cea-57a1-4215-8a11-24ab51b9d1c8","name":"nvidia/llama-3.1-nemotron-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72679219797,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:01.331740Z"},{"created_at":"2025-05-13T12:13:50.994Z","description":"Best-in-class vision language model by research lab Allen Institute for AI. Available under the Apache 2.0 license.","has_eula":false,"id":"864e7786-4b86-4f4b-8534-25da1fc46a74","name":"allenai/molmo-72b-0924:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":45000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":293245208984,"status":"ready","tags":["instruct","chat","vision"],"updated_at":"2025-05-13T13:34:01.318606Z"},{"created_at":"2025-03-27T16:49:37.342054Z","description":"Efficient 8B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"775cbef7-6527-415d-9e6b-39d574cf39ec","name":"meta/llama-3.1-8b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":93000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9090504772,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:00.700210Z"},{"created_at":"2025-03-27T16:48:15.818596Z","description":"First generation of 8B-param model by Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"bc10c88e-4d18-4854-8250-77aff4763eca","name":"meta/llama-3-8b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":32132572668,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:51.995701Z"},{"created_at":"2025-03-27T16:49:33.359621Z","description":"First generation of 8B-param model by Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"b5a94646-9390-4ced-acba-9b078e63a794","name":"meta/llama-3-8b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9090489355,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:59.473065Z"},{"created_at":"2025-03-27T16:48:42.138410Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"126ad0c4-cfde-4b05-924f-f04c6343ccb2","name":"meta/llama-3.3-70b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":60000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":282254830887,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:53.868968Z"},{"created_at":"2025-03-27T16:50:09.605796Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"1678195b-5af6-4c27-8fdc-16aa84c68c34","name":"meta/llama-3.3-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72687332869,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-07T10:19:23.153808Z"},{"created_at":"2025-03-27T16:48:35.312110Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"7cbe0417-172a-4601-8940-3b71e4d0c8cb","name":"meta/llama-3.1-70b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":60000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":282246710880,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:52.677798Z"},{"created_at":"2025-03-27T16:49:35.836269Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"03150ad5-de83-4c74-afe0-3eeeb67d71a3","name":"meta/llama-3.1-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72665889083,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:52:00.003235Z"},{"created_at":"2025-03-27T16:49:31.715567Z","description":"First generation of 70B-param model from Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"b0c5a8fe-5c9e-49cc-942a-6c4ebaadde67","name":"meta/llama-3-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72665872089,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:58.899458Z"},{"created_at":"2025-03-27T16:49:17.458153Z","description":"A state-of-the-art 24B model with a 32k context window, designed for multilingual chat and agentic applications.","has_eula":false,"id":"1e555754-47fb-4dba-a82c-66f3f1fa9294","name":"mistral/mistral-small-24b-instruct-2501:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":94321843451,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:55.176379Z"},{"created_at":"2025-03-27T16:50:07.300436Z","description":"A state-of-the-art 24B model with a 32k context window, designed for multilingual chat and agentic applications.","has_eula":false,"id":"7bb28f2c-3719-4d71-9bcb-17db392a7118","name":"mistral/mistral-small-24b-instruct-2501:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":20000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":24938988520,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:55.726891Z"},{"created_at":"2025-04-15T10:51:31.291792Z","description":"Vision language model able to analyze images and offer insights without compromising on instruction following.","has_eula":false,"id":"1999f4f5-f038-4039-94ba-11a851917df5","name":"mistral/pixtral-12b-2409:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":50000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":25384844091,"status":"ready","tags":["vision","chat","featured"],"updated_at":"2025-05-09T13:51:58.281971Z"},{"created_at":"2025-03-27T16:49:14.593008Z","description":"A very efficient language model by Mistral AI, optimized for instruction-following tasks. Available under the Apache 2.0 license.","has_eula":false,"id":"bf6be106-c53d-4b93-bb33-1a4bd4d0b573","name":"mistral/mistral-7b-instruct-v0.3:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":28995471292,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:54.595513Z"},{"created_at":"2025-03-27T16:50:06.301430Z","description":"A state-of-the-art 12B model with a 128k context window, designed for multilingual chat applications.","has_eula":false,"id":"07681325-c743-4796-8b7d-1f0b35d4a8e0","name":"mistral/mistral-nemo-instruct-2407:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":13605604415,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-06T15:17:43.837103Z"},{"created_at":"2025-03-27T16:50:08.291821Z","description":"A high-quality Mixture of Experts (MoE) model with open weights by Mistral AI, licensed under Apache 2.0.","has_eula":false,"id":"1aa87d1e-9996-4c54-aa1c-5b900bf59fd4","name":"mistral/mixtral-8x7b-instruct-v0.1:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":46970879717,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:02.960404Z"},{"created_at":"2025-03-27T16:49:19.120192Z","description":"A high-quality Mixture of Experts (MoE) model with open weights by Mistral AI, licensed under Apache 2.0.","has_eula":false,"id":"11ed6599-f460-4e41-b266-87bc9a108fdd","name":"mistral/mixtral-8x7b-instruct-v0.1:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":190483875108,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:57.661626Z"},{"created_at":"2025-03-27T16:46:54.314987Z","description":"An embedding model spanning a broad range of languages and state-of-the-art results on multilingual benchmarks.","has_eula":true,"id":"d58efec4-b667-48e2-8ad8-bcc26c175ae6","name":"baai/bge-multilingual-gemma2:fp32","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]}]}],"parameter_size_bits":32,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":36989461520,"status":"ready","tags":["embedding","featured"],"updated_at":"2025-03-27T17:40:09.534954Z"}],"total_count":29}' headers: Content-Length: - - "712" + - "50297" Content-Security-Policy: - default-src 'none'; frame-ancestors 'none' Content-Type: - application/json Date: - - Thu, 24 Oct 2024 13:46:05 GMT + - Thu, 15 May 2025 09:48:02 GMT Server: - - Scaleway API Gateway (fr-par-2;edge03) + - Scaleway API Gateway (fr-par-2;edge02) Strict-Transport-Security: - max-age=63072000 X-Content-Type-Options: @@ -1128,10 +1128,10 @@ interactions: X-Frame-Options: - DENY X-Request-Id: - - 6de80e28-dd36-4320-9f1f-7538d209ce9d + - 71d8257f-31f2-4fbb-b239-e0221a1d8456 status: 200 OK code: 200 - duration: 65.040083ms + duration: 173.9375ms - id: 23 request: proto: HTTP/1.1 @@ -1147,8 +1147,8 @@ interactions: form: {} headers: User-Agent: - - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.23.0; darwin; arm64) terraform-provider/develop terraform/terraform-tests - url: https://api.scaleway.com/vpc/v2/regions/fr-par/private-networks/5213e862-3c32-4e23-8c5a-fb079958acea + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models/7205dbce-cc80-4b2a-bb7f-3fd3a804afc3 method: GET response: proto: HTTP/2.0 @@ -1156,20 +1156,20 @@ interactions: proto_minor: 0 transfer_encoding: [] trailer: {} - content_length: 1056 + content_length: 1723 uncompressed: false - body: '{"created_at":"2024-10-24T13:29:02.392287Z","dhcp_enabled":true,"id":"5213e862-3c32-4e23-8c5a-fb079958acea","name":"private-network-test-inference","organization_id":"105bdce1-64c0-48ab-899d-868455867ecf","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","region":"fr-par","subnets":[{"created_at":"2024-10-24T13:29:02.392287Z","id":"55561a48-f44e-404e-89f9-7d31f442d655","private_network_id":"5213e862-3c32-4e23-8c5a-fb079958acea","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","subnet":"172.16.80.0/22","updated_at":"2024-10-24T13:29:02.392287Z","vpc_id":"8feba4f5-79f9-42cd-b5ce-3ed8c510569e"},{"created_at":"2024-10-24T13:29:02.392287Z","id":"9dda5654-4cf2-44d5-9a94-59075b96ea81","private_network_id":"5213e862-3c32-4e23-8c5a-fb079958acea","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","subnet":"fd5f:519c:6d46:9f1::/64","updated_at":"2024-10-24T13:29:02.392287Z","vpc_id":"8feba4f5-79f9-42cd-b5ce-3ed8c510569e"}],"tags":[],"updated_at":"2024-10-24T13:29:02.392287Z","vpc_id":"8feba4f5-79f9-42cd-b5ce-3ed8c510569e"}' + body: '{"created_at":"2025-03-27T16:48:40.045689Z","description":"Efficient 8B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","name":"meta/llama-3.1-8b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":93000,"quantization_bits":8},{"allowed":true,"max_context_size":40000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":32132582323,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:53.288962Z"}' headers: Content-Length: - - "1056" + - "1723" Content-Security-Policy: - default-src 'none'; frame-ancestors 'none' Content-Type: - application/json Date: - - Thu, 24 Oct 2024 13:46:05 GMT + - Thu, 15 May 2025 09:48:02 GMT Server: - - Scaleway API Gateway (fr-par-2;edge03) + - Scaleway API Gateway (fr-par-2;edge02) Strict-Transport-Security: - max-age=63072000 X-Content-Type-Options: @@ -1177,10 +1177,10 @@ interactions: X-Frame-Options: - DENY X-Request-Id: - - afe8b439-e19b-4e40-8741-3d40dfe57676 + - 40d2f2ad-bd9c-4a2f-b720-073233fa4377 status: 200 OK code: 200 - duration: 67.957041ms + duration: 56.269625ms - id: 24 request: proto: HTTP/1.1 @@ -1196,8 +1196,8 @@ interactions: form: {} headers: User-Agent: - - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.23.0; darwin; arm64) terraform-provider/develop terraform/terraform-tests - url: https://api.scaleway.com/inference/v1beta1/regions/fr-par/deployments/7aac488b-b5a5-47cf-894a-75bf1759e247 + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/deployments/986d6dc6-3514-4780-b64b-d8f214ef1757 method: GET response: proto: HTTP/2.0 @@ -1205,20 +1205,20 @@ interactions: proto_minor: 0 transfer_encoding: [] trailer: {} - content_length: 712 + content_length: 759 uncompressed: false - body: '{"created_at":"2024-10-24T13:29:03.040316Z","endpoints":[{"disable_auth":false,"id":"02527765-8d08-427e-8c8d-db76fe661126","private_network":{"private_network_id":"5213e862-3c32-4e23-8c5a-fb079958acea"},"url":"https://7aac488b-b5a5-47cf-894a-75bf1759e247.private-network-test-inference"}],"error_message":null,"id":"7aac488b-b5a5-47cf-894a-75bf1759e247","max_size":1,"min_size":1,"model_id":"d33fb5fd-75ca-4dfb-8952-8af8b8b28be5","model_name":"meta/llama-3.1-8b-instruct:fp8","name":"test-inference-deployment-endpoint-private","node_type":"L4","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","region":"fr-par","size":1,"status":"ready","tags":[],"updated_at":"2024-10-24T13:46:00.862117Z"}' + body: '{"created_at":"2025-05-15T09:36:50.092088Z","endpoints":[{"disable_auth":false,"id":"60f26766-a204-48cb-8e51-907a62dd69d0","private_network":{"private_network_id":"552f5c0d-40f0-4d67-bb57-727c50641ba9"},"url":"https://986d6dc6-3514-4780-b64b-d8f214ef1757.552f5c0d-40f0-4d67-bb57-727c50641ba9.internal"}],"error_message":"","id":"986d6dc6-3514-4780-b64b-d8f214ef1757","max_size":1,"min_size":1,"model_id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","model_name":"meta/llama-3.1-8b-instruct:bf16","name":"test-inference-deployment-endpoint-private","node_type_name":"L4","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","quantization":{"bits":16},"region":"fr-par","size":1,"status":"ready","tags":[],"updated_at":"2025-05-15T09:47:45.790408Z"}' headers: Content-Length: - - "712" + - "759" Content-Security-Policy: - default-src 'none'; frame-ancestors 'none' Content-Type: - application/json Date: - - Thu, 24 Oct 2024 13:46:06 GMT + - Thu, 15 May 2025 09:48:02 GMT Server: - - Scaleway API Gateway (fr-par-2;edge03) + - Scaleway API Gateway (fr-par-2;edge02) Strict-Transport-Security: - max-age=63072000 X-Content-Type-Options: @@ -1226,10 +1226,10 @@ interactions: X-Frame-Options: - DENY X-Request-Id: - - 5296230c-24db-4b44-bb78-5ae6dc8a0c7d + - db895643-5542-42f8-b30a-ea8b2d867cfc status: 200 OK code: 200 - duration: 68.275667ms + duration: 72.203ms - id: 25 request: proto: HTTP/1.1 @@ -1245,8 +1245,8 @@ interactions: form: {} headers: User-Agent: - - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.23.0; darwin; arm64) terraform-provider/develop terraform/terraform-tests - url: https://api.scaleway.com/vpc/v2/regions/fr-par/private-networks/5213e862-3c32-4e23-8c5a-fb079958acea + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models?order_by=display_rank_asc&page_size=1000 method: GET response: proto: HTTP/2.0 @@ -1254,20 +1254,20 @@ interactions: proto_minor: 0 transfer_encoding: [] trailer: {} - content_length: 1056 + content_length: 50297 uncompressed: false - body: '{"created_at":"2024-10-24T13:29:02.392287Z","dhcp_enabled":true,"id":"5213e862-3c32-4e23-8c5a-fb079958acea","name":"private-network-test-inference","organization_id":"105bdce1-64c0-48ab-899d-868455867ecf","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","region":"fr-par","subnets":[{"created_at":"2024-10-24T13:29:02.392287Z","id":"55561a48-f44e-404e-89f9-7d31f442d655","private_network_id":"5213e862-3c32-4e23-8c5a-fb079958acea","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","subnet":"172.16.80.0/22","updated_at":"2024-10-24T13:29:02.392287Z","vpc_id":"8feba4f5-79f9-42cd-b5ce-3ed8c510569e"},{"created_at":"2024-10-24T13:29:02.392287Z","id":"9dda5654-4cf2-44d5-9a94-59075b96ea81","private_network_id":"5213e862-3c32-4e23-8c5a-fb079958acea","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","subnet":"fd5f:519c:6d46:9f1::/64","updated_at":"2024-10-24T13:29:02.392287Z","vpc_id":"8feba4f5-79f9-42cd-b5ce-3ed8c510569e"}],"tags":[],"updated_at":"2024-10-24T13:29:02.392287Z","vpc_id":"8feba4f5-79f9-42cd-b5ce-3ed8c510569e"}' + body: '{"models":[{"created_at":"2025-04-04T13:11:00.900800Z","description":"Multimodal model for text generation an image understanding supporting up to 128k context window.","has_eula":false,"id":"5c40e594-d40d-452a-991e-5082225155e1","name":"google/gemma-3-27b-it:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":80000,"quantization_bits":8},{"allowed":true,"max_context_size":40000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":54904369444,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-05-09T16:45:10.128397Z"},{"created_at":"2025-04-28T18:48:01.860457Z","description":"","has_eula":false,"id":"a19296a6-4cef-447a-99bc-8f6c3ee30df4","name":"TestAccCustomModel_Basic","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725346,"status":"ready","tags":["custom"],"updated_at":null},{"created_at":"2025-04-30T13:29:24.004776Z","description":"","has_eula":false,"id":"eabb7f74-24a1-4173-911b-26924c1be619","name":"TestAccCustomModel_DeployModelOnServer","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725346,"status":"ready","tags":["custom"],"updated_at":null},{"created_at":"2025-03-27T16:48:11.513249Z","description":"Efficient 8B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"a51ce791-9546-4c28-aa44-24850d84778b","name":"deepseek/deepseek-r1-distill-llama-8b:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":90000,"quantization_bits":8},{"allowed":true,"max_context_size":39000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":16070465043,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:49.797687Z"},{"created_at":"2025-03-27T16:48:14.190404Z","description":"Efficient 8B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"b8dc7f2d-95d6-48ae-a076-a99e76b76e1f","name":"deepseek/deepseek-r1-distill-llama-8b:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":90000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9093169346,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-04-14T09:05:26.354374Z"},{"created_at":"2025-04-04T15:51:25.414165Z","description":"Highly efficient multimodal model with vision and chat capabilities supporting up to 128k context window.","has_eula":false,"id":"efcf0b60-999a-4c1e-981e-b68a428c4702","name":"mistral/mistral-small-3.1-24b-instruct-2503:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":75000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":96077777613,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-05-09T13:51:56.986698Z"},{"created_at":"2025-04-04T15:51:27.773573Z","description":"Highly efficient multimodal model with vision and chat capabilities supporting up to 128k context window.","has_eula":false,"id":"906c0feb-0eb0-4037-94aa-afd4d845b94f","name":"mistral/mistral-small-3.1-24b-instruct-2503:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":96077777613,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-04-08T14:26:24.388332Z"},{"created_at":"2025-03-27T16:47:41.108667Z","description":"Efficient 70B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"014919c1-00cc-43c2-98f2-4ffd263e6f33","name":"deepseek/deepseek-r1-distill-llama-70b:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":56960,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":141117442445,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:48.796286Z"},{"created_at":"2025-03-27T16:47:42.762505Z","description":"Efficient 70B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"bbfeeb62-2428-415d-ad0d-537af9aff946","name":"deepseek/deepseek-r1-distill-llama-70b:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72679175005,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-06T15:17:35.683881Z"},{"created_at":"2025-03-27T16:48:40.045689Z","description":"Efficient 8B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","name":"meta/llama-3.1-8b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":93000,"quantization_bits":8},{"allowed":true,"max_context_size":40000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":32132582323,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:53.288962Z"},{"created_at":"2025-03-27T16:50:12.267422Z","description":"Highly advanced coding model with a 128k context window, excelling in code generation, repairing, and reasoning.","has_eula":false,"id":"a3205fd3-ac4a-47cf-9074-82166d214bac","name":"qwen/qwen2.5-coder-32b-instruct:int8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":35080374444,"status":"ready","tags":["instruct","chat","code","featured"],"updated_at":"2025-05-09T13:52:04.105122Z"},{"created_at":"2025-03-27T16:49:51.968791Z","description":"A large language model customized by NVIDIA in order to improve the helpfulness of generated responses.","has_eula":true,"id":"4e6c9cea-57a1-4215-8a11-24ab51b9d1c8","name":"nvidia/llama-3.1-nemotron-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72679219797,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:01.331740Z"},{"created_at":"2025-05-13T12:13:50.994Z","description":"Best-in-class vision language model by research lab Allen Institute for AI. Available under the Apache 2.0 license.","has_eula":false,"id":"864e7786-4b86-4f4b-8534-25da1fc46a74","name":"allenai/molmo-72b-0924:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":45000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":293245208984,"status":"ready","tags":["instruct","chat","vision"],"updated_at":"2025-05-13T13:34:01.318606Z"},{"created_at":"2025-03-27T16:49:37.342054Z","description":"Efficient 8B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"775cbef7-6527-415d-9e6b-39d574cf39ec","name":"meta/llama-3.1-8b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":93000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9090504772,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:00.700210Z"},{"created_at":"2025-03-27T16:48:15.818596Z","description":"First generation of 8B-param model by Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"bc10c88e-4d18-4854-8250-77aff4763eca","name":"meta/llama-3-8b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":32132572668,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:51.995701Z"},{"created_at":"2025-03-27T16:49:33.359621Z","description":"First generation of 8B-param model by Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"b5a94646-9390-4ced-acba-9b078e63a794","name":"meta/llama-3-8b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9090489355,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:59.473065Z"},{"created_at":"2025-03-27T16:48:42.138410Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"126ad0c4-cfde-4b05-924f-f04c6343ccb2","name":"meta/llama-3.3-70b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":60000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":282254830887,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:53.868968Z"},{"created_at":"2025-03-27T16:50:09.605796Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"1678195b-5af6-4c27-8fdc-16aa84c68c34","name":"meta/llama-3.3-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72687332869,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-07T10:19:23.153808Z"},{"created_at":"2025-03-27T16:48:35.312110Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"7cbe0417-172a-4601-8940-3b71e4d0c8cb","name":"meta/llama-3.1-70b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":60000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":282246710880,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:52.677798Z"},{"created_at":"2025-03-27T16:49:35.836269Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"03150ad5-de83-4c74-afe0-3eeeb67d71a3","name":"meta/llama-3.1-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72665889083,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:52:00.003235Z"},{"created_at":"2025-03-27T16:49:31.715567Z","description":"First generation of 70B-param model from Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"b0c5a8fe-5c9e-49cc-942a-6c4ebaadde67","name":"meta/llama-3-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72665872089,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:58.899458Z"},{"created_at":"2025-03-27T16:49:17.458153Z","description":"A state-of-the-art 24B model with a 32k context window, designed for multilingual chat and agentic applications.","has_eula":false,"id":"1e555754-47fb-4dba-a82c-66f3f1fa9294","name":"mistral/mistral-small-24b-instruct-2501:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":94321843451,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:55.176379Z"},{"created_at":"2025-03-27T16:50:07.300436Z","description":"A state-of-the-art 24B model with a 32k context window, designed for multilingual chat and agentic applications.","has_eula":false,"id":"7bb28f2c-3719-4d71-9bcb-17db392a7118","name":"mistral/mistral-small-24b-instruct-2501:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":20000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":24938988520,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:55.726891Z"},{"created_at":"2025-04-15T10:51:31.291792Z","description":"Vision language model able to analyze images and offer insights without compromising on instruction following.","has_eula":false,"id":"1999f4f5-f038-4039-94ba-11a851917df5","name":"mistral/pixtral-12b-2409:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":50000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":25384844091,"status":"ready","tags":["vision","chat","featured"],"updated_at":"2025-05-09T13:51:58.281971Z"},{"created_at":"2025-03-27T16:49:14.593008Z","description":"A very efficient language model by Mistral AI, optimized for instruction-following tasks. Available under the Apache 2.0 license.","has_eula":false,"id":"bf6be106-c53d-4b93-bb33-1a4bd4d0b573","name":"mistral/mistral-7b-instruct-v0.3:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":28995471292,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:54.595513Z"},{"created_at":"2025-03-27T16:50:06.301430Z","description":"A state-of-the-art 12B model with a 128k context window, designed for multilingual chat applications.","has_eula":false,"id":"07681325-c743-4796-8b7d-1f0b35d4a8e0","name":"mistral/mistral-nemo-instruct-2407:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":13605604415,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-06T15:17:43.837103Z"},{"created_at":"2025-03-27T16:50:08.291821Z","description":"A high-quality Mixture of Experts (MoE) model with open weights by Mistral AI, licensed under Apache 2.0.","has_eula":false,"id":"1aa87d1e-9996-4c54-aa1c-5b900bf59fd4","name":"mistral/mixtral-8x7b-instruct-v0.1:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":46970879717,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:02.960404Z"},{"created_at":"2025-03-27T16:49:19.120192Z","description":"A high-quality Mixture of Experts (MoE) model with open weights by Mistral AI, licensed under Apache 2.0.","has_eula":false,"id":"11ed6599-f460-4e41-b266-87bc9a108fdd","name":"mistral/mixtral-8x7b-instruct-v0.1:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":190483875108,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:57.661626Z"},{"created_at":"2025-03-27T16:46:54.314987Z","description":"An embedding model spanning a broad range of languages and state-of-the-art results on multilingual benchmarks.","has_eula":true,"id":"d58efec4-b667-48e2-8ad8-bcc26c175ae6","name":"baai/bge-multilingual-gemma2:fp32","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]}]}],"parameter_size_bits":32,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":36989461520,"status":"ready","tags":["embedding","featured"],"updated_at":"2025-03-27T17:40:09.534954Z"}],"total_count":29}' headers: Content-Length: - - "1056" + - "50297" Content-Security-Policy: - default-src 'none'; frame-ancestors 'none' Content-Type: - application/json Date: - - Thu, 24 Oct 2024 13:46:06 GMT + - Thu, 15 May 2025 09:48:03 GMT Server: - - Scaleway API Gateway (fr-par-2;edge03) + - Scaleway API Gateway (fr-par-2;edge02) Strict-Transport-Security: - max-age=63072000 X-Content-Type-Options: @@ -1275,10 +1275,10 @@ interactions: X-Frame-Options: - DENY X-Request-Id: - - c5153c24-b431-47a7-889c-e0cf9ee9b80f + - 3b9efb76-f970-47a0-9318-edbda24ccd0a status: 200 OK code: 200 - duration: 25.130541ms + duration: 187.605916ms - id: 26 request: proto: HTTP/1.1 @@ -1294,8 +1294,8 @@ interactions: form: {} headers: User-Agent: - - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.23.0; darwin; arm64) terraform-provider/develop terraform/terraform-tests - url: https://api.scaleway.com/inference/v1beta1/regions/fr-par/deployments/7aac488b-b5a5-47cf-894a-75bf1759e247 + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models/7205dbce-cc80-4b2a-bb7f-3fd3a804afc3 method: GET response: proto: HTTP/2.0 @@ -1303,20 +1303,20 @@ interactions: proto_minor: 0 transfer_encoding: [] trailer: {} - content_length: 712 + content_length: 1723 uncompressed: false - body: '{"created_at":"2024-10-24T13:29:03.040316Z","endpoints":[{"disable_auth":false,"id":"02527765-8d08-427e-8c8d-db76fe661126","private_network":{"private_network_id":"5213e862-3c32-4e23-8c5a-fb079958acea"},"url":"https://7aac488b-b5a5-47cf-894a-75bf1759e247.private-network-test-inference"}],"error_message":null,"id":"7aac488b-b5a5-47cf-894a-75bf1759e247","max_size":1,"min_size":1,"model_id":"d33fb5fd-75ca-4dfb-8952-8af8b8b28be5","model_name":"meta/llama-3.1-8b-instruct:fp8","name":"test-inference-deployment-endpoint-private","node_type":"L4","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","region":"fr-par","size":1,"status":"ready","tags":[],"updated_at":"2024-10-24T13:46:00.862117Z"}' + body: '{"created_at":"2025-03-27T16:48:40.045689Z","description":"Efficient 8B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","name":"meta/llama-3.1-8b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":93000,"quantization_bits":8},{"allowed":true,"max_context_size":40000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":32132582323,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:53.288962Z"}' headers: Content-Length: - - "712" + - "1723" Content-Security-Policy: - default-src 'none'; frame-ancestors 'none' Content-Type: - application/json Date: - - Thu, 24 Oct 2024 13:46:06 GMT + - Thu, 15 May 2025 09:48:03 GMT Server: - - Scaleway API Gateway (fr-par-2;edge03) + - Scaleway API Gateway (fr-par-2;edge02) Strict-Transport-Security: - max-age=63072000 X-Content-Type-Options: @@ -1324,50 +1324,48 @@ interactions: X-Frame-Options: - DENY X-Request-Id: - - e2e96741-f679-4324-8361-94ae359412c2 + - 4db62bcb-8b50-49aa-83bc-5ce845095d5d status: 200 OK code: 200 - duration: 60.36575ms + duration: 32.809167ms - id: 27 request: proto: HTTP/1.1 proto_major: 1 proto_minor: 1 - content_length: 58 + content_length: 0 transfer_encoding: [] trailer: {} host: api.scaleway.com remote_addr: "" request_uri: "" - body: '{"name":"private-network-test-inference-public","tags":[]}' + body: "" form: {} headers: - Content-Type: - - application/json User-Agent: - - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.23.0; darwin; arm64) terraform-provider/develop terraform/terraform-tests - url: https://api.scaleway.com/vpc/v2/regions/fr-par/private-networks/5213e862-3c32-4e23-8c5a-fb079958acea - method: PATCH + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/vpc/v2/regions/fr-par/private-networks/552f5c0d-40f0-4d67-bb57-727c50641ba9 + method: GET response: proto: HTTP/2.0 proto_major: 2 proto_minor: 0 transfer_encoding: [] trailer: {} - content_length: 1063 + content_length: 1057 uncompressed: false - body: '{"created_at":"2024-10-24T13:29:02.392287Z","dhcp_enabled":true,"id":"5213e862-3c32-4e23-8c5a-fb079958acea","name":"private-network-test-inference-public","organization_id":"105bdce1-64c0-48ab-899d-868455867ecf","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","region":"fr-par","subnets":[{"created_at":"2024-10-24T13:29:02.392287Z","id":"55561a48-f44e-404e-89f9-7d31f442d655","private_network_id":"5213e862-3c32-4e23-8c5a-fb079958acea","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","subnet":"172.16.80.0/22","updated_at":"2024-10-24T13:29:02.392287Z","vpc_id":"8feba4f5-79f9-42cd-b5ce-3ed8c510569e"},{"created_at":"2024-10-24T13:29:02.392287Z","id":"9dda5654-4cf2-44d5-9a94-59075b96ea81","private_network_id":"5213e862-3c32-4e23-8c5a-fb079958acea","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","subnet":"fd5f:519c:6d46:9f1::/64","updated_at":"2024-10-24T13:29:02.392287Z","vpc_id":"8feba4f5-79f9-42cd-b5ce-3ed8c510569e"}],"tags":[],"updated_at":"2024-10-24T13:46:06.534698Z","vpc_id":"8feba4f5-79f9-42cd-b5ce-3ed8c510569e"}' + body: '{"created_at":"2025-05-15T09:36:49.519324Z","dhcp_enabled":true,"id":"552f5c0d-40f0-4d67-bb57-727c50641ba9","name":"private-network-test-inference","organization_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","subnets":[{"created_at":"2025-05-15T09:36:49.519324Z","id":"7633077d-66da-4644-bc1b-dbdf8bca995b","private_network_id":"552f5c0d-40f0-4d67-bb57-727c50641ba9","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","subnet":"172.16.68.0/22","updated_at":"2025-05-15T09:36:49.519324Z","vpc_id":"e092f3d5-d85b-46fd-8d08-025e3282c8c1"},{"created_at":"2025-05-15T09:36:49.519324Z","id":"7488b8ca-80eb-42b4-aad5-a17471547dfa","private_network_id":"552f5c0d-40f0-4d67-bb57-727c50641ba9","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","subnet":"fd64:badd:7710:82eb::/64","updated_at":"2025-05-15T09:36:49.519324Z","vpc_id":"e092f3d5-d85b-46fd-8d08-025e3282c8c1"}],"tags":[],"updated_at":"2025-05-15T09:36:49.519324Z","vpc_id":"e092f3d5-d85b-46fd-8d08-025e3282c8c1"}' headers: Content-Length: - - "1063" + - "1057" Content-Security-Policy: - default-src 'none'; frame-ancestors 'none' Content-Type: - application/json Date: - - Thu, 24 Oct 2024 13:46:06 GMT + - Thu, 15 May 2025 09:48:03 GMT Server: - - Scaleway API Gateway (fr-par-2;edge03) + - Scaleway API Gateway (fr-par-2;edge02) Strict-Transport-Security: - max-age=63072000 X-Content-Type-Options: @@ -1375,10 +1373,10 @@ interactions: X-Frame-Options: - DENY X-Request-Id: - - 68444496-adaa-4be9-8552-cfd927cc3a4a + - 824a239c-7dca-4478-ab30-0101f9857c81 status: 200 OK code: 200 - duration: 92.484917ms + duration: 32.190208ms - id: 28 request: proto: HTTP/1.1 @@ -1394,8 +1392,8 @@ interactions: form: {} headers: User-Agent: - - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.23.0; darwin; arm64) terraform-provider/develop terraform/terraform-tests - url: https://api.scaleway.com/vpc/v2/regions/fr-par/private-networks/5213e862-3c32-4e23-8c5a-fb079958acea + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models?order_by=display_rank_asc&page_size=1000 method: GET response: proto: HTTP/2.0 @@ -1403,20 +1401,20 @@ interactions: proto_minor: 0 transfer_encoding: [] trailer: {} - content_length: 1063 + content_length: 50297 uncompressed: false - body: '{"created_at":"2024-10-24T13:29:02.392287Z","dhcp_enabled":true,"id":"5213e862-3c32-4e23-8c5a-fb079958acea","name":"private-network-test-inference-public","organization_id":"105bdce1-64c0-48ab-899d-868455867ecf","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","region":"fr-par","subnets":[{"created_at":"2024-10-24T13:29:02.392287Z","id":"55561a48-f44e-404e-89f9-7d31f442d655","private_network_id":"5213e862-3c32-4e23-8c5a-fb079958acea","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","subnet":"172.16.80.0/22","updated_at":"2024-10-24T13:29:02.392287Z","vpc_id":"8feba4f5-79f9-42cd-b5ce-3ed8c510569e"},{"created_at":"2024-10-24T13:29:02.392287Z","id":"9dda5654-4cf2-44d5-9a94-59075b96ea81","private_network_id":"5213e862-3c32-4e23-8c5a-fb079958acea","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","subnet":"fd5f:519c:6d46:9f1::/64","updated_at":"2024-10-24T13:29:02.392287Z","vpc_id":"8feba4f5-79f9-42cd-b5ce-3ed8c510569e"}],"tags":[],"updated_at":"2024-10-24T13:46:06.534698Z","vpc_id":"8feba4f5-79f9-42cd-b5ce-3ed8c510569e"}' + body: '{"models":[{"created_at":"2025-04-04T13:11:00.900800Z","description":"Multimodal model for text generation an image understanding supporting up to 128k context window.","has_eula":false,"id":"5c40e594-d40d-452a-991e-5082225155e1","name":"google/gemma-3-27b-it:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":80000,"quantization_bits":8},{"allowed":true,"max_context_size":40000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":54904369444,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-05-09T16:45:10.128397Z"},{"created_at":"2025-04-28T18:48:01.860457Z","description":"","has_eula":false,"id":"a19296a6-4cef-447a-99bc-8f6c3ee30df4","name":"TestAccCustomModel_Basic","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725346,"status":"ready","tags":["custom"],"updated_at":null},{"created_at":"2025-04-30T13:29:24.004776Z","description":"","has_eula":false,"id":"eabb7f74-24a1-4173-911b-26924c1be619","name":"TestAccCustomModel_DeployModelOnServer","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725346,"status":"ready","tags":["custom"],"updated_at":null},{"created_at":"2025-03-27T16:48:11.513249Z","description":"Efficient 8B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"a51ce791-9546-4c28-aa44-24850d84778b","name":"deepseek/deepseek-r1-distill-llama-8b:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":90000,"quantization_bits":8},{"allowed":true,"max_context_size":39000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":16070465043,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:49.797687Z"},{"created_at":"2025-03-27T16:48:14.190404Z","description":"Efficient 8B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"b8dc7f2d-95d6-48ae-a076-a99e76b76e1f","name":"deepseek/deepseek-r1-distill-llama-8b:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":90000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9093169346,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-04-14T09:05:26.354374Z"},{"created_at":"2025-04-04T15:51:25.414165Z","description":"Highly efficient multimodal model with vision and chat capabilities supporting up to 128k context window.","has_eula":false,"id":"efcf0b60-999a-4c1e-981e-b68a428c4702","name":"mistral/mistral-small-3.1-24b-instruct-2503:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":75000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":96077777613,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-05-09T13:51:56.986698Z"},{"created_at":"2025-04-04T15:51:27.773573Z","description":"Highly efficient multimodal model with vision and chat capabilities supporting up to 128k context window.","has_eula":false,"id":"906c0feb-0eb0-4037-94aa-afd4d845b94f","name":"mistral/mistral-small-3.1-24b-instruct-2503:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":96077777613,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-04-08T14:26:24.388332Z"},{"created_at":"2025-03-27T16:47:41.108667Z","description":"Efficient 70B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"014919c1-00cc-43c2-98f2-4ffd263e6f33","name":"deepseek/deepseek-r1-distill-llama-70b:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":56960,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":141117442445,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:48.796286Z"},{"created_at":"2025-03-27T16:47:42.762505Z","description":"Efficient 70B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"bbfeeb62-2428-415d-ad0d-537af9aff946","name":"deepseek/deepseek-r1-distill-llama-70b:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72679175005,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-06T15:17:35.683881Z"},{"created_at":"2025-03-27T16:48:40.045689Z","description":"Efficient 8B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","name":"meta/llama-3.1-8b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":93000,"quantization_bits":8},{"allowed":true,"max_context_size":40000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":32132582323,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:53.288962Z"},{"created_at":"2025-03-27T16:50:12.267422Z","description":"Highly advanced coding model with a 128k context window, excelling in code generation, repairing, and reasoning.","has_eula":false,"id":"a3205fd3-ac4a-47cf-9074-82166d214bac","name":"qwen/qwen2.5-coder-32b-instruct:int8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":35080374444,"status":"ready","tags":["instruct","chat","code","featured"],"updated_at":"2025-05-09T13:52:04.105122Z"},{"created_at":"2025-03-27T16:49:51.968791Z","description":"A large language model customized by NVIDIA in order to improve the helpfulness of generated responses.","has_eula":true,"id":"4e6c9cea-57a1-4215-8a11-24ab51b9d1c8","name":"nvidia/llama-3.1-nemotron-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72679219797,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:01.331740Z"},{"created_at":"2025-05-13T12:13:50.994Z","description":"Best-in-class vision language model by research lab Allen Institute for AI. Available under the Apache 2.0 license.","has_eula":false,"id":"864e7786-4b86-4f4b-8534-25da1fc46a74","name":"allenai/molmo-72b-0924:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":45000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":293245208984,"status":"ready","tags":["instruct","chat","vision"],"updated_at":"2025-05-13T13:34:01.318606Z"},{"created_at":"2025-03-27T16:49:37.342054Z","description":"Efficient 8B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"775cbef7-6527-415d-9e6b-39d574cf39ec","name":"meta/llama-3.1-8b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":93000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9090504772,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:00.700210Z"},{"created_at":"2025-03-27T16:48:15.818596Z","description":"First generation of 8B-param model by Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"bc10c88e-4d18-4854-8250-77aff4763eca","name":"meta/llama-3-8b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":32132572668,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:51.995701Z"},{"created_at":"2025-03-27T16:49:33.359621Z","description":"First generation of 8B-param model by Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"b5a94646-9390-4ced-acba-9b078e63a794","name":"meta/llama-3-8b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9090489355,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:59.473065Z"},{"created_at":"2025-03-27T16:48:42.138410Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"126ad0c4-cfde-4b05-924f-f04c6343ccb2","name":"meta/llama-3.3-70b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":60000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":282254830887,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:53.868968Z"},{"created_at":"2025-03-27T16:50:09.605796Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"1678195b-5af6-4c27-8fdc-16aa84c68c34","name":"meta/llama-3.3-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72687332869,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-07T10:19:23.153808Z"},{"created_at":"2025-03-27T16:48:35.312110Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"7cbe0417-172a-4601-8940-3b71e4d0c8cb","name":"meta/llama-3.1-70b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":60000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":282246710880,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:52.677798Z"},{"created_at":"2025-03-27T16:49:35.836269Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"03150ad5-de83-4c74-afe0-3eeeb67d71a3","name":"meta/llama-3.1-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72665889083,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:52:00.003235Z"},{"created_at":"2025-03-27T16:49:31.715567Z","description":"First generation of 70B-param model from Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"b0c5a8fe-5c9e-49cc-942a-6c4ebaadde67","name":"meta/llama-3-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72665872089,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:58.899458Z"},{"created_at":"2025-03-27T16:49:17.458153Z","description":"A state-of-the-art 24B model with a 32k context window, designed for multilingual chat and agentic applications.","has_eula":false,"id":"1e555754-47fb-4dba-a82c-66f3f1fa9294","name":"mistral/mistral-small-24b-instruct-2501:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":94321843451,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:55.176379Z"},{"created_at":"2025-03-27T16:50:07.300436Z","description":"A state-of-the-art 24B model with a 32k context window, designed for multilingual chat and agentic applications.","has_eula":false,"id":"7bb28f2c-3719-4d71-9bcb-17db392a7118","name":"mistral/mistral-small-24b-instruct-2501:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":20000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":24938988520,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:55.726891Z"},{"created_at":"2025-04-15T10:51:31.291792Z","description":"Vision language model able to analyze images and offer insights without compromising on instruction following.","has_eula":false,"id":"1999f4f5-f038-4039-94ba-11a851917df5","name":"mistral/pixtral-12b-2409:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":50000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":25384844091,"status":"ready","tags":["vision","chat","featured"],"updated_at":"2025-05-09T13:51:58.281971Z"},{"created_at":"2025-03-27T16:49:14.593008Z","description":"A very efficient language model by Mistral AI, optimized for instruction-following tasks. Available under the Apache 2.0 license.","has_eula":false,"id":"bf6be106-c53d-4b93-bb33-1a4bd4d0b573","name":"mistral/mistral-7b-instruct-v0.3:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":28995471292,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:54.595513Z"},{"created_at":"2025-03-27T16:50:06.301430Z","description":"A state-of-the-art 12B model with a 128k context window, designed for multilingual chat applications.","has_eula":false,"id":"07681325-c743-4796-8b7d-1f0b35d4a8e0","name":"mistral/mistral-nemo-instruct-2407:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":13605604415,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-06T15:17:43.837103Z"},{"created_at":"2025-03-27T16:50:08.291821Z","description":"A high-quality Mixture of Experts (MoE) model with open weights by Mistral AI, licensed under Apache 2.0.","has_eula":false,"id":"1aa87d1e-9996-4c54-aa1c-5b900bf59fd4","name":"mistral/mixtral-8x7b-instruct-v0.1:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":46970879717,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:02.960404Z"},{"created_at":"2025-03-27T16:49:19.120192Z","description":"A high-quality Mixture of Experts (MoE) model with open weights by Mistral AI, licensed under Apache 2.0.","has_eula":false,"id":"11ed6599-f460-4e41-b266-87bc9a108fdd","name":"mistral/mixtral-8x7b-instruct-v0.1:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":190483875108,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:57.661626Z"},{"created_at":"2025-03-27T16:46:54.314987Z","description":"An embedding model spanning a broad range of languages and state-of-the-art results on multilingual benchmarks.","has_eula":true,"id":"d58efec4-b667-48e2-8ad8-bcc26c175ae6","name":"baai/bge-multilingual-gemma2:fp32","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]}]}],"parameter_size_bits":32,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":36989461520,"status":"ready","tags":["embedding","featured"],"updated_at":"2025-03-27T17:40:09.534954Z"}],"total_count":29}' headers: Content-Length: - - "1063" + - "50297" Content-Security-Policy: - default-src 'none'; frame-ancestors 'none' Content-Type: - application/json Date: - - Thu, 24 Oct 2024 13:46:06 GMT + - Thu, 15 May 2025 09:48:03 GMT Server: - - Scaleway API Gateway (fr-par-2;edge03) + - Scaleway API Gateway (fr-par-2;edge02) Strict-Transport-Security: - max-age=63072000 X-Content-Type-Options: @@ -1424,10 +1422,10 @@ interactions: X-Frame-Options: - DENY X-Request-Id: - - 01713e43-b5d0-4b98-839c-bbbe99457cd2 + - 05def516-f0bb-4201-99f1-5077f2fcc3d5 status: 200 OK code: 200 - duration: 33.021875ms + duration: 177.197541ms - id: 29 request: proto: HTTP/1.1 @@ -1443,8 +1441,8 @@ interactions: form: {} headers: User-Agent: - - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.23.0; darwin; arm64) terraform-provider/develop terraform/terraform-tests - url: https://api.scaleway.com/inference/v1beta1/regions/fr-par/deployments/7aac488b-b5a5-47cf-894a-75bf1759e247 + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models/7205dbce-cc80-4b2a-bb7f-3fd3a804afc3 method: GET response: proto: HTTP/2.0 @@ -1452,20 +1450,20 @@ interactions: proto_minor: 0 transfer_encoding: [] trailer: {} - content_length: 712 + content_length: 1723 uncompressed: false - body: '{"created_at":"2024-10-24T13:29:03.040316Z","endpoints":[{"disable_auth":false,"id":"02527765-8d08-427e-8c8d-db76fe661126","private_network":{"private_network_id":"5213e862-3c32-4e23-8c5a-fb079958acea"},"url":"https://7aac488b-b5a5-47cf-894a-75bf1759e247.private-network-test-inference"}],"error_message":null,"id":"7aac488b-b5a5-47cf-894a-75bf1759e247","max_size":1,"min_size":1,"model_id":"d33fb5fd-75ca-4dfb-8952-8af8b8b28be5","model_name":"meta/llama-3.1-8b-instruct:fp8","name":"test-inference-deployment-endpoint-private","node_type":"L4","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","region":"fr-par","size":1,"status":"ready","tags":[],"updated_at":"2024-10-24T13:46:00.862117Z"}' + body: '{"created_at":"2025-03-27T16:48:40.045689Z","description":"Efficient 8B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","name":"meta/llama-3.1-8b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":93000,"quantization_bits":8},{"allowed":true,"max_context_size":40000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":32132582323,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:53.288962Z"}' headers: Content-Length: - - "712" + - "1723" Content-Security-Policy: - default-src 'none'; frame-ancestors 'none' Content-Type: - application/json Date: - - Thu, 24 Oct 2024 13:46:06 GMT + - Thu, 15 May 2025 09:48:04 GMT Server: - - Scaleway API Gateway (fr-par-2;edge03) + - Scaleway API Gateway (fr-par-2;edge02) Strict-Transport-Security: - max-age=63072000 X-Content-Type-Options: @@ -1473,50 +1471,48 @@ interactions: X-Frame-Options: - DENY X-Request-Id: - - eb649ed1-0f46-4c87-a9c9-62164c178987 + - a3efbe77-bce9-4323-908d-e705e3897ec9 status: 200 OK code: 200 - duration: 68.764667ms + duration: 41.891791ms - id: 30 request: proto: HTTP/1.1 proto_major: 1 proto_minor: 1 - content_length: 67 + content_length: 0 transfer_encoding: [] trailer: {} host: api.scaleway.com remote_addr: "" request_uri: "" - body: '{"name":"test-inference-deployment-basic-endpoints-private-public"}' + body: "" form: {} headers: - Content-Type: - - application/json User-Agent: - - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.23.0; darwin; arm64) terraform-provider/develop terraform/terraform-tests - url: https://api.scaleway.com/inference/v1beta1/regions/fr-par/deployments/7aac488b-b5a5-47cf-894a-75bf1759e247 - method: PATCH + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/deployments/986d6dc6-3514-4780-b64b-d8f214ef1757 + method: GET response: proto: HTTP/2.0 proto_major: 2 proto_minor: 0 transfer_encoding: [] trailer: {} - content_length: 726 + content_length: 759 uncompressed: false - body: '{"created_at":"2024-10-24T13:29:03.040316Z","endpoints":[{"disable_auth":false,"id":"02527765-8d08-427e-8c8d-db76fe661126","private_network":{"private_network_id":"5213e862-3c32-4e23-8c5a-fb079958acea"},"url":"https://7aac488b-b5a5-47cf-894a-75bf1759e247.private-network-test-inference"}],"error_message":null,"id":"7aac488b-b5a5-47cf-894a-75bf1759e247","max_size":1,"min_size":1,"model_id":"d33fb5fd-75ca-4dfb-8952-8af8b8b28be5","model_name":"meta/llama-3.1-8b-instruct:fp8","name":"test-inference-deployment-basic-endpoints-private-public","node_type":"L4","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","region":"fr-par","size":1,"status":"ready","tags":[],"updated_at":"2024-10-24T13:46:06.713490Z"}' + body: '{"created_at":"2025-05-15T09:36:50.092088Z","endpoints":[{"disable_auth":false,"id":"60f26766-a204-48cb-8e51-907a62dd69d0","private_network":{"private_network_id":"552f5c0d-40f0-4d67-bb57-727c50641ba9"},"url":"https://986d6dc6-3514-4780-b64b-d8f214ef1757.552f5c0d-40f0-4d67-bb57-727c50641ba9.internal"}],"error_message":"","id":"986d6dc6-3514-4780-b64b-d8f214ef1757","max_size":1,"min_size":1,"model_id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","model_name":"meta/llama-3.1-8b-instruct:bf16","name":"test-inference-deployment-endpoint-private","node_type_name":"L4","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","quantization":{"bits":16},"region":"fr-par","size":1,"status":"ready","tags":[],"updated_at":"2025-05-15T09:47:45.790408Z"}' headers: Content-Length: - - "726" + - "759" Content-Security-Policy: - default-src 'none'; frame-ancestors 'none' Content-Type: - application/json Date: - - Thu, 24 Oct 2024 13:46:06 GMT + - Thu, 15 May 2025 09:48:04 GMT Server: - - Scaleway API Gateway (fr-par-2;edge03) + - Scaleway API Gateway (fr-par-2;edge02) Strict-Transport-Security: - max-age=63072000 X-Content-Type-Options: @@ -1524,10 +1520,10 @@ interactions: X-Frame-Options: - DENY X-Request-Id: - - 799604d9-973f-4282-8fa7-5e82c557f57a + - b0094468-b495-42dc-ae2e-c84eef7be226 status: 200 OK code: 200 - duration: 128.261666ms + duration: 51.463ms - id: 31 request: proto: HTTP/1.1 @@ -1543,8 +1539,8 @@ interactions: form: {} headers: User-Agent: - - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.23.0; darwin; arm64) terraform-provider/develop terraform/terraform-tests - url: https://api.scaleway.com/inference/v1beta1/regions/fr-par/deployments/7aac488b-b5a5-47cf-894a-75bf1759e247 + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models?order_by=display_rank_asc&page_size=1000 method: GET response: proto: HTTP/2.0 @@ -1552,20 +1548,20 @@ interactions: proto_minor: 0 transfer_encoding: [] trailer: {} - content_length: 726 + content_length: 50297 uncompressed: false - body: '{"created_at":"2024-10-24T13:29:03.040316Z","endpoints":[{"disable_auth":false,"id":"02527765-8d08-427e-8c8d-db76fe661126","private_network":{"private_network_id":"5213e862-3c32-4e23-8c5a-fb079958acea"},"url":"https://7aac488b-b5a5-47cf-894a-75bf1759e247.private-network-test-inference"}],"error_message":null,"id":"7aac488b-b5a5-47cf-894a-75bf1759e247","max_size":1,"min_size":1,"model_id":"d33fb5fd-75ca-4dfb-8952-8af8b8b28be5","model_name":"meta/llama-3.1-8b-instruct:fp8","name":"test-inference-deployment-basic-endpoints-private-public","node_type":"L4","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","region":"fr-par","size":1,"status":"ready","tags":[],"updated_at":"2024-10-24T13:46:06.713490Z"}' + body: '{"models":[{"created_at":"2025-04-04T13:11:00.900800Z","description":"Multimodal model for text generation an image understanding supporting up to 128k context window.","has_eula":false,"id":"5c40e594-d40d-452a-991e-5082225155e1","name":"google/gemma-3-27b-it:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":80000,"quantization_bits":8},{"allowed":true,"max_context_size":40000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":54904369444,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-05-09T16:45:10.128397Z"},{"created_at":"2025-04-28T18:48:01.860457Z","description":"","has_eula":false,"id":"a19296a6-4cef-447a-99bc-8f6c3ee30df4","name":"TestAccCustomModel_Basic","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725346,"status":"ready","tags":["custom"],"updated_at":null},{"created_at":"2025-04-30T13:29:24.004776Z","description":"","has_eula":false,"id":"eabb7f74-24a1-4173-911b-26924c1be619","name":"TestAccCustomModel_DeployModelOnServer","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725346,"status":"ready","tags":["custom"],"updated_at":null},{"created_at":"2025-03-27T16:48:11.513249Z","description":"Efficient 8B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"a51ce791-9546-4c28-aa44-24850d84778b","name":"deepseek/deepseek-r1-distill-llama-8b:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":90000,"quantization_bits":8},{"allowed":true,"max_context_size":39000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":16070465043,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:49.797687Z"},{"created_at":"2025-03-27T16:48:14.190404Z","description":"Efficient 8B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"b8dc7f2d-95d6-48ae-a076-a99e76b76e1f","name":"deepseek/deepseek-r1-distill-llama-8b:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":90000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9093169346,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-04-14T09:05:26.354374Z"},{"created_at":"2025-04-04T15:51:25.414165Z","description":"Highly efficient multimodal model with vision and chat capabilities supporting up to 128k context window.","has_eula":false,"id":"efcf0b60-999a-4c1e-981e-b68a428c4702","name":"mistral/mistral-small-3.1-24b-instruct-2503:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":75000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":96077777613,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-05-09T13:51:56.986698Z"},{"created_at":"2025-04-04T15:51:27.773573Z","description":"Highly efficient multimodal model with vision and chat capabilities supporting up to 128k context window.","has_eula":false,"id":"906c0feb-0eb0-4037-94aa-afd4d845b94f","name":"mistral/mistral-small-3.1-24b-instruct-2503:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":96077777613,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-04-08T14:26:24.388332Z"},{"created_at":"2025-03-27T16:47:41.108667Z","description":"Efficient 70B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"014919c1-00cc-43c2-98f2-4ffd263e6f33","name":"deepseek/deepseek-r1-distill-llama-70b:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":56960,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":141117442445,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:48.796286Z"},{"created_at":"2025-03-27T16:47:42.762505Z","description":"Efficient 70B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"bbfeeb62-2428-415d-ad0d-537af9aff946","name":"deepseek/deepseek-r1-distill-llama-70b:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72679175005,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-06T15:17:35.683881Z"},{"created_at":"2025-03-27T16:48:40.045689Z","description":"Efficient 8B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","name":"meta/llama-3.1-8b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":93000,"quantization_bits":8},{"allowed":true,"max_context_size":40000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":32132582323,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:53.288962Z"},{"created_at":"2025-03-27T16:50:12.267422Z","description":"Highly advanced coding model with a 128k context window, excelling in code generation, repairing, and reasoning.","has_eula":false,"id":"a3205fd3-ac4a-47cf-9074-82166d214bac","name":"qwen/qwen2.5-coder-32b-instruct:int8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":35080374444,"status":"ready","tags":["instruct","chat","code","featured"],"updated_at":"2025-05-09T13:52:04.105122Z"},{"created_at":"2025-03-27T16:49:51.968791Z","description":"A large language model customized by NVIDIA in order to improve the helpfulness of generated responses.","has_eula":true,"id":"4e6c9cea-57a1-4215-8a11-24ab51b9d1c8","name":"nvidia/llama-3.1-nemotron-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72679219797,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:01.331740Z"},{"created_at":"2025-05-13T12:13:50.994Z","description":"Best-in-class vision language model by research lab Allen Institute for AI. Available under the Apache 2.0 license.","has_eula":false,"id":"864e7786-4b86-4f4b-8534-25da1fc46a74","name":"allenai/molmo-72b-0924:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":45000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":293245208984,"status":"ready","tags":["instruct","chat","vision"],"updated_at":"2025-05-13T13:34:01.318606Z"},{"created_at":"2025-03-27T16:49:37.342054Z","description":"Efficient 8B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"775cbef7-6527-415d-9e6b-39d574cf39ec","name":"meta/llama-3.1-8b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":93000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9090504772,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:00.700210Z"},{"created_at":"2025-03-27T16:48:15.818596Z","description":"First generation of 8B-param model by Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"bc10c88e-4d18-4854-8250-77aff4763eca","name":"meta/llama-3-8b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":32132572668,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:51.995701Z"},{"created_at":"2025-03-27T16:49:33.359621Z","description":"First generation of 8B-param model by Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"b5a94646-9390-4ced-acba-9b078e63a794","name":"meta/llama-3-8b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9090489355,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:59.473065Z"},{"created_at":"2025-03-27T16:48:42.138410Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"126ad0c4-cfde-4b05-924f-f04c6343ccb2","name":"meta/llama-3.3-70b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":60000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":282254830887,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:53.868968Z"},{"created_at":"2025-03-27T16:50:09.605796Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"1678195b-5af6-4c27-8fdc-16aa84c68c34","name":"meta/llama-3.3-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72687332869,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-07T10:19:23.153808Z"},{"created_at":"2025-03-27T16:48:35.312110Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"7cbe0417-172a-4601-8940-3b71e4d0c8cb","name":"meta/llama-3.1-70b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":60000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":282246710880,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:52.677798Z"},{"created_at":"2025-03-27T16:49:35.836269Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"03150ad5-de83-4c74-afe0-3eeeb67d71a3","name":"meta/llama-3.1-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72665889083,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:52:00.003235Z"},{"created_at":"2025-03-27T16:49:31.715567Z","description":"First generation of 70B-param model from Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"b0c5a8fe-5c9e-49cc-942a-6c4ebaadde67","name":"meta/llama-3-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72665872089,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:58.899458Z"},{"created_at":"2025-03-27T16:49:17.458153Z","description":"A state-of-the-art 24B model with a 32k context window, designed for multilingual chat and agentic applications.","has_eula":false,"id":"1e555754-47fb-4dba-a82c-66f3f1fa9294","name":"mistral/mistral-small-24b-instruct-2501:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":94321843451,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:55.176379Z"},{"created_at":"2025-03-27T16:50:07.300436Z","description":"A state-of-the-art 24B model with a 32k context window, designed for multilingual chat and agentic applications.","has_eula":false,"id":"7bb28f2c-3719-4d71-9bcb-17db392a7118","name":"mistral/mistral-small-24b-instruct-2501:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":20000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":24938988520,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:55.726891Z"},{"created_at":"2025-04-15T10:51:31.291792Z","description":"Vision language model able to analyze images and offer insights without compromising on instruction following.","has_eula":false,"id":"1999f4f5-f038-4039-94ba-11a851917df5","name":"mistral/pixtral-12b-2409:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":50000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":25384844091,"status":"ready","tags":["vision","chat","featured"],"updated_at":"2025-05-09T13:51:58.281971Z"},{"created_at":"2025-03-27T16:49:14.593008Z","description":"A very efficient language model by Mistral AI, optimized for instruction-following tasks. Available under the Apache 2.0 license.","has_eula":false,"id":"bf6be106-c53d-4b93-bb33-1a4bd4d0b573","name":"mistral/mistral-7b-instruct-v0.3:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":28995471292,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:54.595513Z"},{"created_at":"2025-03-27T16:50:06.301430Z","description":"A state-of-the-art 12B model with a 128k context window, designed for multilingual chat applications.","has_eula":false,"id":"07681325-c743-4796-8b7d-1f0b35d4a8e0","name":"mistral/mistral-nemo-instruct-2407:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":13605604415,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-06T15:17:43.837103Z"},{"created_at":"2025-03-27T16:50:08.291821Z","description":"A high-quality Mixture of Experts (MoE) model with open weights by Mistral AI, licensed under Apache 2.0.","has_eula":false,"id":"1aa87d1e-9996-4c54-aa1c-5b900bf59fd4","name":"mistral/mixtral-8x7b-instruct-v0.1:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":46970879717,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:02.960404Z"},{"created_at":"2025-03-27T16:49:19.120192Z","description":"A high-quality Mixture of Experts (MoE) model with open weights by Mistral AI, licensed under Apache 2.0.","has_eula":false,"id":"11ed6599-f460-4e41-b266-87bc9a108fdd","name":"mistral/mixtral-8x7b-instruct-v0.1:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":190483875108,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:57.661626Z"},{"created_at":"2025-03-27T16:46:54.314987Z","description":"An embedding model spanning a broad range of languages and state-of-the-art results on multilingual benchmarks.","has_eula":true,"id":"d58efec4-b667-48e2-8ad8-bcc26c175ae6","name":"baai/bge-multilingual-gemma2:fp32","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]}]}],"parameter_size_bits":32,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":36989461520,"status":"ready","tags":["embedding","featured"],"updated_at":"2025-03-27T17:40:09.534954Z"}],"total_count":29}' headers: Content-Length: - - "726" + - "50297" Content-Security-Policy: - default-src 'none'; frame-ancestors 'none' Content-Type: - application/json Date: - - Thu, 24 Oct 2024 13:46:06 GMT + - Thu, 15 May 2025 09:48:04 GMT Server: - - Scaleway API Gateway (fr-par-2;edge03) + - Scaleway API Gateway (fr-par-2;edge02) Strict-Transport-Security: - max-age=63072000 X-Content-Type-Options: @@ -1573,10 +1569,10 @@ interactions: X-Frame-Options: - DENY X-Request-Id: - - 5d5e6d58-2a36-4c0f-a402-a6be0cbb53b5 + - ffdd9d3d-5d25-46eb-bd69-f28073a04be3 status: 200 OK code: 200 - duration: 62.516209ms + duration: 181.150459ms - id: 32 request: proto: HTTP/1.1 @@ -1592,8 +1588,8 @@ interactions: form: {} headers: User-Agent: - - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.23.0; darwin; arm64) terraform-provider/develop terraform/terraform-tests - url: https://api.scaleway.com/inference/v1beta1/regions/fr-par/deployments/7aac488b-b5a5-47cf-894a-75bf1759e247 + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models/7205dbce-cc80-4b2a-bb7f-3fd3a804afc3 method: GET response: proto: HTTP/2.0 @@ -1601,20 +1597,20 @@ interactions: proto_minor: 0 transfer_encoding: [] trailer: {} - content_length: 726 + content_length: 1723 uncompressed: false - body: '{"created_at":"2024-10-24T13:29:03.040316Z","endpoints":[{"disable_auth":false,"id":"02527765-8d08-427e-8c8d-db76fe661126","private_network":{"private_network_id":"5213e862-3c32-4e23-8c5a-fb079958acea"},"url":"https://7aac488b-b5a5-47cf-894a-75bf1759e247.private-network-test-inference"}],"error_message":null,"id":"7aac488b-b5a5-47cf-894a-75bf1759e247","max_size":1,"min_size":1,"model_id":"d33fb5fd-75ca-4dfb-8952-8af8b8b28be5","model_name":"meta/llama-3.1-8b-instruct:fp8","name":"test-inference-deployment-basic-endpoints-private-public","node_type":"L4","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","region":"fr-par","size":1,"status":"ready","tags":[],"updated_at":"2024-10-24T13:46:06.713490Z"}' + body: '{"created_at":"2025-03-27T16:48:40.045689Z","description":"Efficient 8B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","name":"meta/llama-3.1-8b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":93000,"quantization_bits":8},{"allowed":true,"max_context_size":40000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":32132582323,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:53.288962Z"}' headers: Content-Length: - - "726" + - "1723" Content-Security-Policy: - default-src 'none'; frame-ancestors 'none' Content-Type: - application/json Date: - - Thu, 24 Oct 2024 13:46:06 GMT + - Thu, 15 May 2025 09:48:04 GMT Server: - - Scaleway API Gateway (fr-par-2;edge03) + - Scaleway API Gateway (fr-par-2;edge02) Strict-Transport-Security: - max-age=63072000 X-Content-Type-Options: @@ -1622,48 +1618,50 @@ interactions: X-Frame-Options: - DENY X-Request-Id: - - 45af3d56-b303-4544-8338-cc5d3f870779 + - 79b41526-1bcc-412c-8fae-90aaf306732b status: 200 OK code: 200 - duration: 71.370709ms + duration: 29.836084ms - id: 33 request: proto: HTTP/1.1 proto_major: 1 proto_minor: 1 - content_length: 0 + content_length: 58 transfer_encoding: [] trailer: {} host: api.scaleway.com remote_addr: "" request_uri: "" - body: "" + body: '{"name":"private-network-test-inference-public","tags":[]}' form: {} headers: + Content-Type: + - application/json User-Agent: - - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.23.0; darwin; arm64) terraform-provider/develop terraform/terraform-tests - url: https://api.scaleway.com/vpc/v2/regions/fr-par/private-networks/5213e862-3c32-4e23-8c5a-fb079958acea - method: GET + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/vpc/v2/regions/fr-par/private-networks/552f5c0d-40f0-4d67-bb57-727c50641ba9 + method: PATCH response: proto: HTTP/2.0 proto_major: 2 proto_minor: 0 transfer_encoding: [] trailer: {} - content_length: 1063 + content_length: 1064 uncompressed: false - body: '{"created_at":"2024-10-24T13:29:02.392287Z","dhcp_enabled":true,"id":"5213e862-3c32-4e23-8c5a-fb079958acea","name":"private-network-test-inference-public","organization_id":"105bdce1-64c0-48ab-899d-868455867ecf","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","region":"fr-par","subnets":[{"created_at":"2024-10-24T13:29:02.392287Z","id":"55561a48-f44e-404e-89f9-7d31f442d655","private_network_id":"5213e862-3c32-4e23-8c5a-fb079958acea","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","subnet":"172.16.80.0/22","updated_at":"2024-10-24T13:29:02.392287Z","vpc_id":"8feba4f5-79f9-42cd-b5ce-3ed8c510569e"},{"created_at":"2024-10-24T13:29:02.392287Z","id":"9dda5654-4cf2-44d5-9a94-59075b96ea81","private_network_id":"5213e862-3c32-4e23-8c5a-fb079958acea","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","subnet":"fd5f:519c:6d46:9f1::/64","updated_at":"2024-10-24T13:29:02.392287Z","vpc_id":"8feba4f5-79f9-42cd-b5ce-3ed8c510569e"}],"tags":[],"updated_at":"2024-10-24T13:46:06.534698Z","vpc_id":"8feba4f5-79f9-42cd-b5ce-3ed8c510569e"}' + body: '{"created_at":"2025-05-15T09:36:49.519324Z","dhcp_enabled":true,"id":"552f5c0d-40f0-4d67-bb57-727c50641ba9","name":"private-network-test-inference-public","organization_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","subnets":[{"created_at":"2025-05-15T09:36:49.519324Z","id":"7633077d-66da-4644-bc1b-dbdf8bca995b","private_network_id":"552f5c0d-40f0-4d67-bb57-727c50641ba9","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","subnet":"172.16.68.0/22","updated_at":"2025-05-15T09:36:49.519324Z","vpc_id":"e092f3d5-d85b-46fd-8d08-025e3282c8c1"},{"created_at":"2025-05-15T09:36:49.519324Z","id":"7488b8ca-80eb-42b4-aad5-a17471547dfa","private_network_id":"552f5c0d-40f0-4d67-bb57-727c50641ba9","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","subnet":"fd64:badd:7710:82eb::/64","updated_at":"2025-05-15T09:36:49.519324Z","vpc_id":"e092f3d5-d85b-46fd-8d08-025e3282c8c1"}],"tags":[],"updated_at":"2025-05-15T09:48:05.280018Z","vpc_id":"e092f3d5-d85b-46fd-8d08-025e3282c8c1"}' headers: Content-Length: - - "1063" + - "1064" Content-Security-Policy: - default-src 'none'; frame-ancestors 'none' Content-Type: - application/json Date: - - Thu, 24 Oct 2024 13:46:07 GMT + - Thu, 15 May 2025 09:48:05 GMT Server: - - Scaleway API Gateway (fr-par-2;edge03) + - Scaleway API Gateway (fr-par-2;edge02) Strict-Transport-Security: - max-age=63072000 X-Content-Type-Options: @@ -1671,10 +1669,10 @@ interactions: X-Frame-Options: - DENY X-Request-Id: - - d99a5a0b-bf42-47a7-b6e5-82c23f894d09 + - 32a3f7cf-3be7-4eed-9c2d-cd7963fef8c8 status: 200 OK code: 200 - duration: 60.620875ms + duration: 63.36ms - id: 34 request: proto: HTTP/1.1 @@ -1690,8 +1688,8 @@ interactions: form: {} headers: User-Agent: - - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.23.0; darwin; arm64) terraform-provider/develop terraform/terraform-tests - url: https://api.scaleway.com/inference/v1beta1/regions/fr-par/deployments/7aac488b-b5a5-47cf-894a-75bf1759e247 + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/vpc/v2/regions/fr-par/private-networks/552f5c0d-40f0-4d67-bb57-727c50641ba9 method: GET response: proto: HTTP/2.0 @@ -1699,20 +1697,20 @@ interactions: proto_minor: 0 transfer_encoding: [] trailer: {} - content_length: 726 + content_length: 1064 uncompressed: false - body: '{"created_at":"2024-10-24T13:29:03.040316Z","endpoints":[{"disable_auth":false,"id":"02527765-8d08-427e-8c8d-db76fe661126","private_network":{"private_network_id":"5213e862-3c32-4e23-8c5a-fb079958acea"},"url":"https://7aac488b-b5a5-47cf-894a-75bf1759e247.private-network-test-inference"}],"error_message":null,"id":"7aac488b-b5a5-47cf-894a-75bf1759e247","max_size":1,"min_size":1,"model_id":"d33fb5fd-75ca-4dfb-8952-8af8b8b28be5","model_name":"meta/llama-3.1-8b-instruct:fp8","name":"test-inference-deployment-basic-endpoints-private-public","node_type":"L4","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","region":"fr-par","size":1,"status":"ready","tags":[],"updated_at":"2024-10-24T13:46:06.713490Z"}' + body: '{"created_at":"2025-05-15T09:36:49.519324Z","dhcp_enabled":true,"id":"552f5c0d-40f0-4d67-bb57-727c50641ba9","name":"private-network-test-inference-public","organization_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","subnets":[{"created_at":"2025-05-15T09:36:49.519324Z","id":"7633077d-66da-4644-bc1b-dbdf8bca995b","private_network_id":"552f5c0d-40f0-4d67-bb57-727c50641ba9","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","subnet":"172.16.68.0/22","updated_at":"2025-05-15T09:36:49.519324Z","vpc_id":"e092f3d5-d85b-46fd-8d08-025e3282c8c1"},{"created_at":"2025-05-15T09:36:49.519324Z","id":"7488b8ca-80eb-42b4-aad5-a17471547dfa","private_network_id":"552f5c0d-40f0-4d67-bb57-727c50641ba9","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","subnet":"fd64:badd:7710:82eb::/64","updated_at":"2025-05-15T09:36:49.519324Z","vpc_id":"e092f3d5-d85b-46fd-8d08-025e3282c8c1"}],"tags":[],"updated_at":"2025-05-15T09:48:05.280018Z","vpc_id":"e092f3d5-d85b-46fd-8d08-025e3282c8c1"}' headers: Content-Length: - - "726" + - "1064" Content-Security-Policy: - default-src 'none'; frame-ancestors 'none' Content-Type: - application/json Date: - - Thu, 24 Oct 2024 13:46:07 GMT + - Thu, 15 May 2025 09:48:05 GMT Server: - - Scaleway API Gateway (fr-par-2;edge03) + - Scaleway API Gateway (fr-par-2;edge02) Strict-Transport-Security: - max-age=63072000 X-Content-Type-Options: @@ -1720,10 +1718,10 @@ interactions: X-Frame-Options: - DENY X-Request-Id: - - c3a13b06-691f-4f1a-ae6b-42b4aa5a4d79 + - 2594e4af-3b77-446f-9043-3542fe16a16a status: 200 OK code: 200 - duration: 76.904541ms + duration: 44.222625ms - id: 35 request: proto: HTTP/1.1 @@ -1739,8 +1737,8 @@ interactions: form: {} headers: User-Agent: - - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.23.0; darwin; arm64) terraform-provider/develop terraform/terraform-tests - url: https://api.scaleway.com/inference/v1beta1/regions/fr-par/deployments/7aac488b-b5a5-47cf-894a-75bf1759e247 + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/deployments/986d6dc6-3514-4780-b64b-d8f214ef1757 method: GET response: proto: HTTP/2.0 @@ -1748,20 +1746,20 @@ interactions: proto_minor: 0 transfer_encoding: [] trailer: {} - content_length: 726 + content_length: 759 uncompressed: false - body: '{"created_at":"2024-10-24T13:29:03.040316Z","endpoints":[{"disable_auth":false,"id":"02527765-8d08-427e-8c8d-db76fe661126","private_network":{"private_network_id":"5213e862-3c32-4e23-8c5a-fb079958acea"},"url":"https://7aac488b-b5a5-47cf-894a-75bf1759e247.private-network-test-inference"}],"error_message":null,"id":"7aac488b-b5a5-47cf-894a-75bf1759e247","max_size":1,"min_size":1,"model_id":"d33fb5fd-75ca-4dfb-8952-8af8b8b28be5","model_name":"meta/llama-3.1-8b-instruct:fp8","name":"test-inference-deployment-basic-endpoints-private-public","node_type":"L4","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","region":"fr-par","size":1,"status":"ready","tags":[],"updated_at":"2024-10-24T13:46:06.713490Z"}' + body: '{"created_at":"2025-05-15T09:36:50.092088Z","endpoints":[{"disable_auth":false,"id":"60f26766-a204-48cb-8e51-907a62dd69d0","private_network":{"private_network_id":"552f5c0d-40f0-4d67-bb57-727c50641ba9"},"url":"https://986d6dc6-3514-4780-b64b-d8f214ef1757.552f5c0d-40f0-4d67-bb57-727c50641ba9.internal"}],"error_message":"","id":"986d6dc6-3514-4780-b64b-d8f214ef1757","max_size":1,"min_size":1,"model_id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","model_name":"meta/llama-3.1-8b-instruct:bf16","name":"test-inference-deployment-endpoint-private","node_type_name":"L4","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","quantization":{"bits":16},"region":"fr-par","size":1,"status":"ready","tags":[],"updated_at":"2025-05-15T09:47:45.790408Z"}' headers: Content-Length: - - "726" + - "759" Content-Security-Policy: - default-src 'none'; frame-ancestors 'none' Content-Type: - application/json Date: - - Thu, 24 Oct 2024 13:46:07 GMT + - Thu, 15 May 2025 09:48:05 GMT Server: - - Scaleway API Gateway (fr-par-2;edge03) + - Scaleway API Gateway (fr-par-2;edge02) Strict-Transport-Security: - max-age=63072000 X-Content-Type-Options: @@ -1769,48 +1767,50 @@ interactions: X-Frame-Options: - DENY X-Request-Id: - - 4e65f435-4b0b-40eb-9b4f-5eb57c07500c + - 28235e93-aae3-4812-86dc-1f63e51da000 status: 200 OK code: 200 - duration: 68.688125ms + duration: 48.87775ms - id: 36 request: proto: HTTP/1.1 proto_major: 1 proto_minor: 1 - content_length: 0 + content_length: 67 transfer_encoding: [] trailer: {} host: api.scaleway.com remote_addr: "" request_uri: "" - body: "" + body: '{"name":"test-inference-deployment-basic-endpoints-private-public"}' form: {} headers: + Content-Type: + - application/json User-Agent: - - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.23.0; darwin; arm64) terraform-provider/develop terraform/terraform-tests - url: https://api.scaleway.com/inference/v1beta1/regions/fr-par/deployments/7aac488b-b5a5-47cf-894a-75bf1759e247 - method: DELETE + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/deployments/986d6dc6-3514-4780-b64b-d8f214ef1757 + method: PATCH response: proto: HTTP/2.0 proto_major: 2 proto_minor: 0 transfer_encoding: [] trailer: {} - content_length: 729 + content_length: 777 uncompressed: false - body: '{"created_at":"2024-10-24T13:29:03.040316Z","endpoints":[{"disable_auth":false,"id":"02527765-8d08-427e-8c8d-db76fe661126","private_network":{"private_network_id":"5213e862-3c32-4e23-8c5a-fb079958acea"},"url":"https://7aac488b-b5a5-47cf-894a-75bf1759e247.private-network-test-inference"}],"error_message":null,"id":"7aac488b-b5a5-47cf-894a-75bf1759e247","max_size":1,"min_size":1,"model_id":"d33fb5fd-75ca-4dfb-8952-8af8b8b28be5","model_name":"meta/llama-3.1-8b-instruct:fp8","name":"test-inference-deployment-basic-endpoints-private-public","node_type":"L4","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","region":"fr-par","size":1,"status":"deleting","tags":[],"updated_at":"2024-10-24T13:46:06.713490Z"}' + body: '{"created_at":"2025-05-15T09:36:50.092088Z","endpoints":[{"disable_auth":false,"id":"60f26766-a204-48cb-8e51-907a62dd69d0","private_network":{"private_network_id":"552f5c0d-40f0-4d67-bb57-727c50641ba9"},"url":"https://986d6dc6-3514-4780-b64b-d8f214ef1757.552f5c0d-40f0-4d67-bb57-727c50641ba9.internal"}],"error_message":"","id":"986d6dc6-3514-4780-b64b-d8f214ef1757","max_size":1,"min_size":1,"model_id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","model_name":"meta/llama-3.1-8b-instruct:bf16","name":"test-inference-deployment-basic-endpoints-private-public","node_type_name":"L4","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","quantization":{"bits":16},"region":"fr-par","size":1,"status":"deploying","tags":[],"updated_at":"2025-05-15T09:48:05.451842Z"}' headers: Content-Length: - - "729" + - "777" Content-Security-Policy: - default-src 'none'; frame-ancestors 'none' Content-Type: - application/json Date: - - Thu, 24 Oct 2024 13:46:07 GMT + - Thu, 15 May 2025 09:48:05 GMT Server: - - Scaleway API Gateway (fr-par-2;edge03) + - Scaleway API Gateway (fr-par-2;edge02) Strict-Transport-Security: - max-age=63072000 X-Content-Type-Options: @@ -1818,10 +1818,10 @@ interactions: X-Frame-Options: - DENY X-Request-Id: - - e7daa01e-0f8c-4370-a6fc-c983979998d4 + - 5cd48683-4302-4ded-b27d-14d80c556f66 status: 200 OK code: 200 - duration: 165.447167ms + duration: 147.388917ms - id: 37 request: proto: HTTP/1.1 @@ -1837,8 +1837,8 @@ interactions: form: {} headers: User-Agent: - - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.23.0; darwin; arm64) terraform-provider/develop terraform/terraform-tests - url: https://api.scaleway.com/inference/v1beta1/regions/fr-par/deployments/7aac488b-b5a5-47cf-894a-75bf1759e247 + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/deployments/986d6dc6-3514-4780-b64b-d8f214ef1757 method: GET response: proto: HTTP/2.0 @@ -1846,20 +1846,20 @@ interactions: proto_minor: 0 transfer_encoding: [] trailer: {} - content_length: 729 + content_length: 773 uncompressed: false - body: '{"created_at":"2024-10-24T13:29:03.040316Z","endpoints":[{"disable_auth":false,"id":"02527765-8d08-427e-8c8d-db76fe661126","private_network":{"private_network_id":"5213e862-3c32-4e23-8c5a-fb079958acea"},"url":"https://7aac488b-b5a5-47cf-894a-75bf1759e247.private-network-test-inference"}],"error_message":null,"id":"7aac488b-b5a5-47cf-894a-75bf1759e247","max_size":1,"min_size":1,"model_id":"d33fb5fd-75ca-4dfb-8952-8af8b8b28be5","model_name":"meta/llama-3.1-8b-instruct:fp8","name":"test-inference-deployment-basic-endpoints-private-public","node_type":"L4","project_id":"105bdce1-64c0-48ab-899d-868455867ecf","region":"fr-par","size":1,"status":"deleting","tags":[],"updated_at":"2024-10-24T13:46:06.713490Z"}' + body: '{"created_at":"2025-05-15T09:36:50.092088Z","endpoints":[{"disable_auth":false,"id":"60f26766-a204-48cb-8e51-907a62dd69d0","private_network":{"private_network_id":"552f5c0d-40f0-4d67-bb57-727c50641ba9"},"url":"https://986d6dc6-3514-4780-b64b-d8f214ef1757.552f5c0d-40f0-4d67-bb57-727c50641ba9.internal"}],"error_message":"","id":"986d6dc6-3514-4780-b64b-d8f214ef1757","max_size":1,"min_size":1,"model_id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","model_name":"meta/llama-3.1-8b-instruct:bf16","name":"test-inference-deployment-basic-endpoints-private-public","node_type_name":"L4","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","quantization":{"bits":16},"region":"fr-par","size":1,"status":"ready","tags":[],"updated_at":"2025-05-15T09:48:05.451842Z"}' headers: Content-Length: - - "729" + - "773" Content-Security-Policy: - default-src 'none'; frame-ancestors 'none' Content-Type: - application/json Date: - - Thu, 24 Oct 2024 13:46:07 GMT + - Thu, 15 May 2025 09:48:06 GMT Server: - - Scaleway API Gateway (fr-par-2;edge03) + - Scaleway API Gateway (fr-par-2;edge02) Strict-Transport-Security: - max-age=63072000 X-Content-Type-Options: @@ -1867,10 +1867,10 @@ interactions: X-Frame-Options: - DENY X-Request-Id: - - 6ac7e3bc-5571-4c14-8edd-5039165bb9f0 + - 5fccf032-7849-4957-94d5-d212928c1068 status: 200 OK code: 200 - duration: 78.131959ms + duration: 463.668917ms - id: 38 request: proto: HTTP/1.1 @@ -1886,8 +1886,8 @@ interactions: form: {} headers: User-Agent: - - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.23.0; darwin; arm64) terraform-provider/develop terraform/terraform-tests - url: https://api.scaleway.com/inference/v1beta1/regions/fr-par/deployments/7aac488b-b5a5-47cf-894a-75bf1759e247 + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/deployments/986d6dc6-3514-4780-b64b-d8f214ef1757 method: GET response: proto: HTTP/2.0 @@ -1895,18 +1895,18 @@ interactions: proto_minor: 0 transfer_encoding: [] trailer: {} - content_length: 131 + content_length: 773 uncompressed: false - body: '{"message":"resource is not found","resource":"deployment","resource_id":"7aac488b-b5a5-47cf-894a-75bf1759e247","type":"not_found"}' + body: '{"created_at":"2025-05-15T09:36:50.092088Z","endpoints":[{"disable_auth":false,"id":"60f26766-a204-48cb-8e51-907a62dd69d0","private_network":{"private_network_id":"552f5c0d-40f0-4d67-bb57-727c50641ba9"},"url":"https://986d6dc6-3514-4780-b64b-d8f214ef1757.552f5c0d-40f0-4d67-bb57-727c50641ba9.internal"}],"error_message":"","id":"986d6dc6-3514-4780-b64b-d8f214ef1757","max_size":1,"min_size":1,"model_id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","model_name":"meta/llama-3.1-8b-instruct:bf16","name":"test-inference-deployment-basic-endpoints-private-public","node_type_name":"L4","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","quantization":{"bits":16},"region":"fr-par","size":1,"status":"ready","tags":[],"updated_at":"2025-05-15T09:48:05.451842Z"}' headers: Content-Length: - - "131" + - "773" Content-Security-Policy: - default-src 'none'; frame-ancestors 'none' Content-Type: - application/json Date: - - Thu, 24 Oct 2024 13:47:07 GMT + - Thu, 15 May 2025 09:48:06 GMT Server: - Scaleway API Gateway (fr-par-2;edge02) Strict-Transport-Security: @@ -1916,10 +1916,10 @@ interactions: X-Frame-Options: - DENY X-Request-Id: - - 4f615eb7-d8b5-4575-98fd-bbdbc83fde76 - status: 404 Not Found - code: 404 - duration: 37.984208ms + - b52fb552-1bfa-461e-841f-01ae3bba018b + status: 200 OK + code: 200 + duration: 48.30675ms - id: 39 request: proto: HTTP/1.1 @@ -1935,25 +1935,174 @@ interactions: form: {} headers: User-Agent: - - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.23.0; darwin; arm64) terraform-provider/develop terraform/terraform-tests - url: https://api.scaleway.com/vpc/v2/regions/fr-par/private-networks/5213e862-3c32-4e23-8c5a-fb079958acea - method: DELETE + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models?order_by=display_rank_asc&page_size=1000 + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 50297 + uncompressed: false + body: '{"models":[{"created_at":"2025-04-04T13:11:00.900800Z","description":"Multimodal model for text generation an image understanding supporting up to 128k context window.","has_eula":false,"id":"5c40e594-d40d-452a-991e-5082225155e1","name":"google/gemma-3-27b-it:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":80000,"quantization_bits":8},{"allowed":true,"max_context_size":40000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":54904369444,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-05-09T16:45:10.128397Z"},{"created_at":"2025-04-28T18:48:01.860457Z","description":"","has_eula":false,"id":"a19296a6-4cef-447a-99bc-8f6c3ee30df4","name":"TestAccCustomModel_Basic","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725346,"status":"ready","tags":["custom"],"updated_at":null},{"created_at":"2025-04-30T13:29:24.004776Z","description":"","has_eula":false,"id":"eabb7f74-24a1-4173-911b-26924c1be619","name":"TestAccCustomModel_DeployModelOnServer","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725346,"status":"ready","tags":["custom"],"updated_at":null},{"created_at":"2025-03-27T16:48:11.513249Z","description":"Efficient 8B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"a51ce791-9546-4c28-aa44-24850d84778b","name":"deepseek/deepseek-r1-distill-llama-8b:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":90000,"quantization_bits":8},{"allowed":true,"max_context_size":39000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":16070465043,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:49.797687Z"},{"created_at":"2025-03-27T16:48:14.190404Z","description":"Efficient 8B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"b8dc7f2d-95d6-48ae-a076-a99e76b76e1f","name":"deepseek/deepseek-r1-distill-llama-8b:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":90000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9093169346,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-04-14T09:05:26.354374Z"},{"created_at":"2025-04-04T15:51:25.414165Z","description":"Highly efficient multimodal model with vision and chat capabilities supporting up to 128k context window.","has_eula":false,"id":"efcf0b60-999a-4c1e-981e-b68a428c4702","name":"mistral/mistral-small-3.1-24b-instruct-2503:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":75000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":96077777613,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-05-09T13:51:56.986698Z"},{"created_at":"2025-04-04T15:51:27.773573Z","description":"Highly efficient multimodal model with vision and chat capabilities supporting up to 128k context window.","has_eula":false,"id":"906c0feb-0eb0-4037-94aa-afd4d845b94f","name":"mistral/mistral-small-3.1-24b-instruct-2503:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":96077777613,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-04-08T14:26:24.388332Z"},{"created_at":"2025-03-27T16:47:41.108667Z","description":"Efficient 70B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"014919c1-00cc-43c2-98f2-4ffd263e6f33","name":"deepseek/deepseek-r1-distill-llama-70b:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":56960,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":141117442445,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:48.796286Z"},{"created_at":"2025-03-27T16:47:42.762505Z","description":"Efficient 70B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"bbfeeb62-2428-415d-ad0d-537af9aff946","name":"deepseek/deepseek-r1-distill-llama-70b:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72679175005,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-06T15:17:35.683881Z"},{"created_at":"2025-03-27T16:48:40.045689Z","description":"Efficient 8B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","name":"meta/llama-3.1-8b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":93000,"quantization_bits":8},{"allowed":true,"max_context_size":40000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":32132582323,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:53.288962Z"},{"created_at":"2025-03-27T16:50:12.267422Z","description":"Highly advanced coding model with a 128k context window, excelling in code generation, repairing, and reasoning.","has_eula":false,"id":"a3205fd3-ac4a-47cf-9074-82166d214bac","name":"qwen/qwen2.5-coder-32b-instruct:int8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":35080374444,"status":"ready","tags":["instruct","chat","code","featured"],"updated_at":"2025-05-09T13:52:04.105122Z"},{"created_at":"2025-03-27T16:49:51.968791Z","description":"A large language model customized by NVIDIA in order to improve the helpfulness of generated responses.","has_eula":true,"id":"4e6c9cea-57a1-4215-8a11-24ab51b9d1c8","name":"nvidia/llama-3.1-nemotron-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72679219797,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:01.331740Z"},{"created_at":"2025-05-13T12:13:50.994Z","description":"Best-in-class vision language model by research lab Allen Institute for AI. Available under the Apache 2.0 license.","has_eula":false,"id":"864e7786-4b86-4f4b-8534-25da1fc46a74","name":"allenai/molmo-72b-0924:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":45000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":293245208984,"status":"ready","tags":["instruct","chat","vision"],"updated_at":"2025-05-13T13:34:01.318606Z"},{"created_at":"2025-03-27T16:49:37.342054Z","description":"Efficient 8B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"775cbef7-6527-415d-9e6b-39d574cf39ec","name":"meta/llama-3.1-8b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":93000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9090504772,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:00.700210Z"},{"created_at":"2025-03-27T16:48:15.818596Z","description":"First generation of 8B-param model by Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"bc10c88e-4d18-4854-8250-77aff4763eca","name":"meta/llama-3-8b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":32132572668,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:51.995701Z"},{"created_at":"2025-03-27T16:49:33.359621Z","description":"First generation of 8B-param model by Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"b5a94646-9390-4ced-acba-9b078e63a794","name":"meta/llama-3-8b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9090489355,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:59.473065Z"},{"created_at":"2025-03-27T16:48:42.138410Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"126ad0c4-cfde-4b05-924f-f04c6343ccb2","name":"meta/llama-3.3-70b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":60000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":282254830887,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:53.868968Z"},{"created_at":"2025-03-27T16:50:09.605796Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"1678195b-5af6-4c27-8fdc-16aa84c68c34","name":"meta/llama-3.3-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72687332869,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-07T10:19:23.153808Z"},{"created_at":"2025-03-27T16:48:35.312110Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"7cbe0417-172a-4601-8940-3b71e4d0c8cb","name":"meta/llama-3.1-70b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":60000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":282246710880,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:52.677798Z"},{"created_at":"2025-03-27T16:49:35.836269Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"03150ad5-de83-4c74-afe0-3eeeb67d71a3","name":"meta/llama-3.1-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72665889083,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:52:00.003235Z"},{"created_at":"2025-03-27T16:49:31.715567Z","description":"First generation of 70B-param model from Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"b0c5a8fe-5c9e-49cc-942a-6c4ebaadde67","name":"meta/llama-3-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72665872089,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:58.899458Z"},{"created_at":"2025-03-27T16:49:17.458153Z","description":"A state-of-the-art 24B model with a 32k context window, designed for multilingual chat and agentic applications.","has_eula":false,"id":"1e555754-47fb-4dba-a82c-66f3f1fa9294","name":"mistral/mistral-small-24b-instruct-2501:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":94321843451,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:55.176379Z"},{"created_at":"2025-03-27T16:50:07.300436Z","description":"A state-of-the-art 24B model with a 32k context window, designed for multilingual chat and agentic applications.","has_eula":false,"id":"7bb28f2c-3719-4d71-9bcb-17db392a7118","name":"mistral/mistral-small-24b-instruct-2501:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":20000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":24938988520,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:55.726891Z"},{"created_at":"2025-04-15T10:51:31.291792Z","description":"Vision language model able to analyze images and offer insights without compromising on instruction following.","has_eula":false,"id":"1999f4f5-f038-4039-94ba-11a851917df5","name":"mistral/pixtral-12b-2409:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":50000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":25384844091,"status":"ready","tags":["vision","chat","featured"],"updated_at":"2025-05-09T13:51:58.281971Z"},{"created_at":"2025-03-27T16:49:14.593008Z","description":"A very efficient language model by Mistral AI, optimized for instruction-following tasks. Available under the Apache 2.0 license.","has_eula":false,"id":"bf6be106-c53d-4b93-bb33-1a4bd4d0b573","name":"mistral/mistral-7b-instruct-v0.3:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":28995471292,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:54.595513Z"},{"created_at":"2025-03-27T16:50:06.301430Z","description":"A state-of-the-art 12B model with a 128k context window, designed for multilingual chat applications.","has_eula":false,"id":"07681325-c743-4796-8b7d-1f0b35d4a8e0","name":"mistral/mistral-nemo-instruct-2407:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":13605604415,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-06T15:17:43.837103Z"},{"created_at":"2025-03-27T16:50:08.291821Z","description":"A high-quality Mixture of Experts (MoE) model with open weights by Mistral AI, licensed under Apache 2.0.","has_eula":false,"id":"1aa87d1e-9996-4c54-aa1c-5b900bf59fd4","name":"mistral/mixtral-8x7b-instruct-v0.1:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":46970879717,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:02.960404Z"},{"created_at":"2025-03-27T16:49:19.120192Z","description":"A high-quality Mixture of Experts (MoE) model with open weights by Mistral AI, licensed under Apache 2.0.","has_eula":false,"id":"11ed6599-f460-4e41-b266-87bc9a108fdd","name":"mistral/mixtral-8x7b-instruct-v0.1:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":190483875108,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:57.661626Z"},{"created_at":"2025-03-27T16:46:54.314987Z","description":"An embedding model spanning a broad range of languages and state-of-the-art results on multilingual benchmarks.","has_eula":true,"id":"d58efec4-b667-48e2-8ad8-bcc26c175ae6","name":"baai/bge-multilingual-gemma2:fp32","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]}]}],"parameter_size_bits":32,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":36989461520,"status":"ready","tags":["embedding","featured"],"updated_at":"2025-03-27T17:40:09.534954Z"}],"total_count":29}' + headers: + Content-Length: + - "50297" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Thu, 15 May 2025 09:48:06 GMT + Server: + - Scaleway API Gateway (fr-par-2;edge02) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - 3f65b514-e31a-48cb-b990-5f4ab17a47f0 + status: 200 OK + code: 200 + duration: 188.398583ms + - id: 40 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models/7205dbce-cc80-4b2a-bb7f-3fd3a804afc3 + method: GET response: proto: HTTP/2.0 proto_major: 2 proto_minor: 0 transfer_encoding: [] trailer: {} + content_length: 1723 + uncompressed: false + body: '{"created_at":"2025-03-27T16:48:40.045689Z","description":"Efficient 8B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","name":"meta/llama-3.1-8b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":93000,"quantization_bits":8},{"allowed":true,"max_context_size":40000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":32132582323,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:53.288962Z"}' + headers: + Content-Length: + - "1723" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Thu, 15 May 2025 09:48:06 GMT + Server: + - Scaleway API Gateway (fr-par-2;edge02) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - b25970d7-be67-4b91-8e7d-7644ab41e36b + status: 200 OK + code: 200 + duration: 29.806667ms + - id: 41 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/vpc/v2/regions/fr-par/private-networks/552f5c0d-40f0-4d67-bb57-727c50641ba9 + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 1064 uncompressed: false + body: '{"created_at":"2025-05-15T09:36:49.519324Z","dhcp_enabled":true,"id":"552f5c0d-40f0-4d67-bb57-727c50641ba9","name":"private-network-test-inference-public","organization_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","subnets":[{"created_at":"2025-05-15T09:36:49.519324Z","id":"7633077d-66da-4644-bc1b-dbdf8bca995b","private_network_id":"552f5c0d-40f0-4d67-bb57-727c50641ba9","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","subnet":"172.16.68.0/22","updated_at":"2025-05-15T09:36:49.519324Z","vpc_id":"e092f3d5-d85b-46fd-8d08-025e3282c8c1"},{"created_at":"2025-05-15T09:36:49.519324Z","id":"7488b8ca-80eb-42b4-aad5-a17471547dfa","private_network_id":"552f5c0d-40f0-4d67-bb57-727c50641ba9","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","subnet":"fd64:badd:7710:82eb::/64","updated_at":"2025-05-15T09:36:49.519324Z","vpc_id":"e092f3d5-d85b-46fd-8d08-025e3282c8c1"}],"tags":[],"updated_at":"2025-05-15T09:48:05.280018Z","vpc_id":"e092f3d5-d85b-46fd-8d08-025e3282c8c1"}' + headers: + Content-Length: + - "1064" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Thu, 15 May 2025 09:48:07 GMT + Server: + - Scaleway API Gateway (fr-par-2;edge02) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - 526e3e5d-f96f-47af-9642-9ad4c25a663a + status: 200 OK + code: 200 + duration: 24.071542ms + - id: 42 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models?order_by=display_rank_asc&page_size=1000 + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 50297 + uncompressed: false + body: '{"models":[{"created_at":"2025-04-04T13:11:00.900800Z","description":"Multimodal model for text generation an image understanding supporting up to 128k context window.","has_eula":false,"id":"5c40e594-d40d-452a-991e-5082225155e1","name":"google/gemma-3-27b-it:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":80000,"quantization_bits":8},{"allowed":true,"max_context_size":40000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":54904369444,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-05-09T16:45:10.128397Z"},{"created_at":"2025-04-28T18:48:01.860457Z","description":"","has_eula":false,"id":"a19296a6-4cef-447a-99bc-8f6c3ee30df4","name":"TestAccCustomModel_Basic","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725346,"status":"ready","tags":["custom"],"updated_at":null},{"created_at":"2025-04-30T13:29:24.004776Z","description":"","has_eula":false,"id":"eabb7f74-24a1-4173-911b-26924c1be619","name":"TestAccCustomModel_DeployModelOnServer","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725346,"status":"ready","tags":["custom"],"updated_at":null},{"created_at":"2025-03-27T16:48:11.513249Z","description":"Efficient 8B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"a51ce791-9546-4c28-aa44-24850d84778b","name":"deepseek/deepseek-r1-distill-llama-8b:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":90000,"quantization_bits":8},{"allowed":true,"max_context_size":39000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":16070465043,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:49.797687Z"},{"created_at":"2025-03-27T16:48:14.190404Z","description":"Efficient 8B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"b8dc7f2d-95d6-48ae-a076-a99e76b76e1f","name":"deepseek/deepseek-r1-distill-llama-8b:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":90000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9093169346,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-04-14T09:05:26.354374Z"},{"created_at":"2025-04-04T15:51:25.414165Z","description":"Highly efficient multimodal model with vision and chat capabilities supporting up to 128k context window.","has_eula":false,"id":"efcf0b60-999a-4c1e-981e-b68a428c4702","name":"mistral/mistral-small-3.1-24b-instruct-2503:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":75000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":96077777613,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-05-09T13:51:56.986698Z"},{"created_at":"2025-04-04T15:51:27.773573Z","description":"Highly efficient multimodal model with vision and chat capabilities supporting up to 128k context window.","has_eula":false,"id":"906c0feb-0eb0-4037-94aa-afd4d845b94f","name":"mistral/mistral-small-3.1-24b-instruct-2503:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":96077777613,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-04-08T14:26:24.388332Z"},{"created_at":"2025-03-27T16:47:41.108667Z","description":"Efficient 70B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"014919c1-00cc-43c2-98f2-4ffd263e6f33","name":"deepseek/deepseek-r1-distill-llama-70b:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":56960,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":141117442445,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:48.796286Z"},{"created_at":"2025-03-27T16:47:42.762505Z","description":"Efficient 70B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"bbfeeb62-2428-415d-ad0d-537af9aff946","name":"deepseek/deepseek-r1-distill-llama-70b:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72679175005,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-06T15:17:35.683881Z"},{"created_at":"2025-03-27T16:48:40.045689Z","description":"Efficient 8B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","name":"meta/llama-3.1-8b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":93000,"quantization_bits":8},{"allowed":true,"max_context_size":40000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":32132582323,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:53.288962Z"},{"created_at":"2025-03-27T16:50:12.267422Z","description":"Highly advanced coding model with a 128k context window, excelling in code generation, repairing, and reasoning.","has_eula":false,"id":"a3205fd3-ac4a-47cf-9074-82166d214bac","name":"qwen/qwen2.5-coder-32b-instruct:int8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":35080374444,"status":"ready","tags":["instruct","chat","code","featured"],"updated_at":"2025-05-09T13:52:04.105122Z"},{"created_at":"2025-03-27T16:49:51.968791Z","description":"A large language model customized by NVIDIA in order to improve the helpfulness of generated responses.","has_eula":true,"id":"4e6c9cea-57a1-4215-8a11-24ab51b9d1c8","name":"nvidia/llama-3.1-nemotron-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72679219797,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:01.331740Z"},{"created_at":"2025-05-13T12:13:50.994Z","description":"Best-in-class vision language model by research lab Allen Institute for AI. Available under the Apache 2.0 license.","has_eula":false,"id":"864e7786-4b86-4f4b-8534-25da1fc46a74","name":"allenai/molmo-72b-0924:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":45000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":293245208984,"status":"ready","tags":["instruct","chat","vision"],"updated_at":"2025-05-13T13:34:01.318606Z"},{"created_at":"2025-03-27T16:49:37.342054Z","description":"Efficient 8B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"775cbef7-6527-415d-9e6b-39d574cf39ec","name":"meta/llama-3.1-8b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":93000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9090504772,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:00.700210Z"},{"created_at":"2025-03-27T16:48:15.818596Z","description":"First generation of 8B-param model by Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"bc10c88e-4d18-4854-8250-77aff4763eca","name":"meta/llama-3-8b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":32132572668,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:51.995701Z"},{"created_at":"2025-03-27T16:49:33.359621Z","description":"First generation of 8B-param model by Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"b5a94646-9390-4ced-acba-9b078e63a794","name":"meta/llama-3-8b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9090489355,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:59.473065Z"},{"created_at":"2025-03-27T16:48:42.138410Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"126ad0c4-cfde-4b05-924f-f04c6343ccb2","name":"meta/llama-3.3-70b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":60000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":282254830887,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:53.868968Z"},{"created_at":"2025-03-27T16:50:09.605796Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"1678195b-5af6-4c27-8fdc-16aa84c68c34","name":"meta/llama-3.3-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72687332869,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-07T10:19:23.153808Z"},{"created_at":"2025-03-27T16:48:35.312110Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"7cbe0417-172a-4601-8940-3b71e4d0c8cb","name":"meta/llama-3.1-70b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":60000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":282246710880,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:52.677798Z"},{"created_at":"2025-03-27T16:49:35.836269Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"03150ad5-de83-4c74-afe0-3eeeb67d71a3","name":"meta/llama-3.1-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72665889083,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:52:00.003235Z"},{"created_at":"2025-03-27T16:49:31.715567Z","description":"First generation of 70B-param model from Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"b0c5a8fe-5c9e-49cc-942a-6c4ebaadde67","name":"meta/llama-3-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72665872089,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:58.899458Z"},{"created_at":"2025-03-27T16:49:17.458153Z","description":"A state-of-the-art 24B model with a 32k context window, designed for multilingual chat and agentic applications.","has_eula":false,"id":"1e555754-47fb-4dba-a82c-66f3f1fa9294","name":"mistral/mistral-small-24b-instruct-2501:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":94321843451,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:55.176379Z"},{"created_at":"2025-03-27T16:50:07.300436Z","description":"A state-of-the-art 24B model with a 32k context window, designed for multilingual chat and agentic applications.","has_eula":false,"id":"7bb28f2c-3719-4d71-9bcb-17db392a7118","name":"mistral/mistral-small-24b-instruct-2501:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":20000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":24938988520,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:55.726891Z"},{"created_at":"2025-04-15T10:51:31.291792Z","description":"Vision language model able to analyze images and offer insights without compromising on instruction following.","has_eula":false,"id":"1999f4f5-f038-4039-94ba-11a851917df5","name":"mistral/pixtral-12b-2409:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":50000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":25384844091,"status":"ready","tags":["vision","chat","featured"],"updated_at":"2025-05-09T13:51:58.281971Z"},{"created_at":"2025-03-27T16:49:14.593008Z","description":"A very efficient language model by Mistral AI, optimized for instruction-following tasks. Available under the Apache 2.0 license.","has_eula":false,"id":"bf6be106-c53d-4b93-bb33-1a4bd4d0b573","name":"mistral/mistral-7b-instruct-v0.3:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":28995471292,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:54.595513Z"},{"created_at":"2025-03-27T16:50:06.301430Z","description":"A state-of-the-art 12B model with a 128k context window, designed for multilingual chat applications.","has_eula":false,"id":"07681325-c743-4796-8b7d-1f0b35d4a8e0","name":"mistral/mistral-nemo-instruct-2407:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":13605604415,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-06T15:17:43.837103Z"},{"created_at":"2025-03-27T16:50:08.291821Z","description":"A high-quality Mixture of Experts (MoE) model with open weights by Mistral AI, licensed under Apache 2.0.","has_eula":false,"id":"1aa87d1e-9996-4c54-aa1c-5b900bf59fd4","name":"mistral/mixtral-8x7b-instruct-v0.1:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":46970879717,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:02.960404Z"},{"created_at":"2025-03-27T16:49:19.120192Z","description":"A high-quality Mixture of Experts (MoE) model with open weights by Mistral AI, licensed under Apache 2.0.","has_eula":false,"id":"11ed6599-f460-4e41-b266-87bc9a108fdd","name":"mistral/mixtral-8x7b-instruct-v0.1:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":190483875108,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:57.661626Z"},{"created_at":"2025-03-27T16:46:54.314987Z","description":"An embedding model spanning a broad range of languages and state-of-the-art results on multilingual benchmarks.","has_eula":true,"id":"d58efec4-b667-48e2-8ad8-bcc26c175ae6","name":"baai/bge-multilingual-gemma2:fp32","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]}]}],"parameter_size_bits":32,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":36989461520,"status":"ready","tags":["embedding","featured"],"updated_at":"2025-03-27T17:40:09.534954Z"}],"total_count":29}' headers: + Content-Length: + - "50297" Content-Security-Policy: - default-src 'none'; frame-ancestors 'none' Content-Type: - application/json Date: - - Thu, 24 Oct 2024 13:47:08 GMT + - Thu, 15 May 2025 09:48:07 GMT Server: - Scaleway API Gateway (fr-par-2;edge02) Strict-Transport-Security: @@ -1963,11 +2112,597 @@ interactions: X-Frame-Options: - DENY X-Request-Id: - - 0fa435f9-8c6e-4601-98fa-afa6c2e0f9b3 - status: 204 No Content - code: 204 - duration: 1.044924125s - - id: 40 + - 497f4786-123e-41ca-afd1-2f5fe63bdf1b + status: 200 OK + code: 200 + duration: 173.929292ms + - id: 43 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models/7205dbce-cc80-4b2a-bb7f-3fd3a804afc3 + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 1723 + uncompressed: false + body: '{"created_at":"2025-03-27T16:48:40.045689Z","description":"Efficient 8B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","name":"meta/llama-3.1-8b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":93000,"quantization_bits":8},{"allowed":true,"max_context_size":40000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":32132582323,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:53.288962Z"}' + headers: + Content-Length: + - "1723" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Thu, 15 May 2025 09:48:07 GMT + Server: + - Scaleway API Gateway (fr-par-2;edge02) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - 1fa2ef90-a240-4ae3-a98a-810532512650 + status: 200 OK + code: 200 + duration: 32.686833ms + - id: 44 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/deployments/986d6dc6-3514-4780-b64b-d8f214ef1757 + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 773 + uncompressed: false + body: '{"created_at":"2025-05-15T09:36:50.092088Z","endpoints":[{"disable_auth":false,"id":"60f26766-a204-48cb-8e51-907a62dd69d0","private_network":{"private_network_id":"552f5c0d-40f0-4d67-bb57-727c50641ba9"},"url":"https://986d6dc6-3514-4780-b64b-d8f214ef1757.552f5c0d-40f0-4d67-bb57-727c50641ba9.internal"}],"error_message":"","id":"986d6dc6-3514-4780-b64b-d8f214ef1757","max_size":1,"min_size":1,"model_id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","model_name":"meta/llama-3.1-8b-instruct:bf16","name":"test-inference-deployment-basic-endpoints-private-public","node_type_name":"L4","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","quantization":{"bits":16},"region":"fr-par","size":1,"status":"ready","tags":[],"updated_at":"2025-05-15T09:48:05.451842Z"}' + headers: + Content-Length: + - "773" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Thu, 15 May 2025 09:48:07 GMT + Server: + - Scaleway API Gateway (fr-par-2;edge02) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - bed8e69e-9ce9-44f7-a1e1-8be5e67b53e2 + status: 200 OK + code: 200 + duration: 45.067375ms + - id: 45 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models?order_by=display_rank_asc&page_size=1000 + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 50297 + uncompressed: false + body: '{"models":[{"created_at":"2025-04-04T13:11:00.900800Z","description":"Multimodal model for text generation an image understanding supporting up to 128k context window.","has_eula":false,"id":"5c40e594-d40d-452a-991e-5082225155e1","name":"google/gemma-3-27b-it:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":80000,"quantization_bits":8},{"allowed":true,"max_context_size":40000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":54904369444,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-05-09T16:45:10.128397Z"},{"created_at":"2025-04-28T18:48:01.860457Z","description":"","has_eula":false,"id":"a19296a6-4cef-447a-99bc-8f6c3ee30df4","name":"TestAccCustomModel_Basic","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725346,"status":"ready","tags":["custom"],"updated_at":null},{"created_at":"2025-04-30T13:29:24.004776Z","description":"","has_eula":false,"id":"eabb7f74-24a1-4173-911b-26924c1be619","name":"TestAccCustomModel_DeployModelOnServer","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":18615,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725346,"status":"ready","tags":["custom"],"updated_at":null},{"created_at":"2025-03-27T16:48:11.513249Z","description":"Efficient 8B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"a51ce791-9546-4c28-aa44-24850d84778b","name":"deepseek/deepseek-r1-distill-llama-8b:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":90000,"quantization_bits":8},{"allowed":true,"max_context_size":39000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":16070465043,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:49.797687Z"},{"created_at":"2025-03-27T16:48:14.190404Z","description":"Efficient 8B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"b8dc7f2d-95d6-48ae-a076-a99e76b76e1f","name":"deepseek/deepseek-r1-distill-llama-8b:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":90000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9093169346,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-04-14T09:05:26.354374Z"},{"created_at":"2025-04-04T15:51:25.414165Z","description":"Highly efficient multimodal model with vision and chat capabilities supporting up to 128k context window.","has_eula":false,"id":"efcf0b60-999a-4c1e-981e-b68a428c4702","name":"mistral/mistral-small-3.1-24b-instruct-2503:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":75000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":96077777613,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-05-09T13:51:56.986698Z"},{"created_at":"2025-04-04T15:51:27.773573Z","description":"Highly efficient multimodal model with vision and chat capabilities supporting up to 128k context window.","has_eula":false,"id":"906c0feb-0eb0-4037-94aa-afd4d845b94f","name":"mistral/mistral-small-3.1-24b-instruct-2503:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":96077777613,"status":"ready","tags":["instruct","chat","vision","featured"],"updated_at":"2025-04-08T14:26:24.388332Z"},{"created_at":"2025-03-27T16:47:41.108667Z","description":"Efficient 70B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"014919c1-00cc-43c2-98f2-4ffd263e6f33","name":"deepseek/deepseek-r1-distill-llama-70b:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":56960,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":141117442445,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:48.796286Z"},{"created_at":"2025-03-27T16:47:42.762505Z","description":"Efficient 70B-param distilled model by DeepSeek, balancing performance and compactness.","has_eula":true,"id":"bbfeeb62-2428-415d-ad0d-537af9aff946","name":"deepseek/deepseek-r1-distill-llama-70b:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72679175005,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-06T15:17:35.683881Z"},{"created_at":"2025-03-27T16:48:40.045689Z","description":"Efficient 8B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","name":"meta/llama-3.1-8b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":93000,"quantization_bits":8},{"allowed":true,"max_context_size":40000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":32132582323,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:53.288962Z"},{"created_at":"2025-03-27T16:50:12.267422Z","description":"Highly advanced coding model with a 128k context window, excelling in code generation, repairing, and reasoning.","has_eula":false,"id":"a3205fd3-ac4a-47cf-9074-82166d214bac","name":"qwen/qwen2.5-coder-32b-instruct:int8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":35080374444,"status":"ready","tags":["instruct","chat","code","featured"],"updated_at":"2025-05-09T13:52:04.105122Z"},{"created_at":"2025-03-27T16:49:51.968791Z","description":"A large language model customized by NVIDIA in order to improve the helpfulness of generated responses.","has_eula":true,"id":"4e6c9cea-57a1-4215-8a11-24ab51b9d1c8","name":"nvidia/llama-3.1-nemotron-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72679219797,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:01.331740Z"},{"created_at":"2025-05-13T12:13:50.994Z","description":"Best-in-class vision language model by research lab Allen Institute for AI. Available under the Apache 2.0 license.","has_eula":false,"id":"864e7786-4b86-4f4b-8534-25da1fc46a74","name":"allenai/molmo-72b-0924:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":45000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":293245208984,"status":"ready","tags":["instruct","chat","vision"],"updated_at":"2025-05-13T13:34:01.318606Z"},{"created_at":"2025-03-27T16:49:37.342054Z","description":"Efficient 8B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"775cbef7-6527-415d-9e6b-39d574cf39ec","name":"meta/llama-3.1-8b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":93000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9090504772,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:00.700210Z"},{"created_at":"2025-03-27T16:48:15.818596Z","description":"First generation of 8B-param model by Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"bc10c88e-4d18-4854-8250-77aff4763eca","name":"meta/llama-3-8b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":true,"max_context_size":8192,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":32132572668,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:51.995701Z"},{"created_at":"2025-03-27T16:49:33.359621Z","description":"First generation of 8B-param model by Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"b5a94646-9390-4ced-acba-9b078e63a794","name":"meta/llama-3-8b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":9090489355,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:59.473065Z"},{"created_at":"2025-03-27T16:48:42.138410Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"126ad0c4-cfde-4b05-924f-f04c6343ccb2","name":"meta/llama-3.3-70b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":60000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":282254830887,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:53.868968Z"},{"created_at":"2025-03-27T16:50:09.605796Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"1678195b-5af6-4c27-8fdc-16aa84c68c34","name":"meta/llama-3.3-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72687332869,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-07T10:19:23.153808Z"},{"created_at":"2025-03-27T16:48:35.312110Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"7cbe0417-172a-4601-8940-3b71e4d0c8cb","name":"meta/llama-3.1-70b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":60000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":282246710880,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:52.677798Z"},{"created_at":"2025-03-27T16:49:35.836269Z","description":"Efficient 70B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"03150ad5-de83-4c74-afe0-3eeeb67d71a3","name":"meta/llama-3.1-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":15000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72665889083,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:52:00.003235Z"},{"created_at":"2025-03-27T16:49:31.715567Z","description":"First generation of 70B-param model from Meta, fine-tuned for instruction and automation.","has_eula":true,"id":"b0c5a8fe-5c9e-49cc-942a-6c4ebaadde67","name":"meta/llama-3-70b-instruct:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":8192,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":72665872089,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:58.899458Z"},{"created_at":"2025-03-27T16:49:17.458153Z","description":"A state-of-the-art 24B model with a 32k context window, designed for multilingual chat and agentic applications.","has_eula":false,"id":"1e555754-47fb-4dba-a82c-66f3f1fa9294","name":"mistral/mistral-small-24b-instruct-2501:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":94321843451,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:55.176379Z"},{"created_at":"2025-03-27T16:50:07.300436Z","description":"A state-of-the-art 24B model with a 32k context window, designed for multilingual chat and agentic applications.","has_eula":false,"id":"7bb28f2c-3719-4d71-9bcb-17db392a7118","name":"mistral/mistral-small-24b-instruct-2501:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":20000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":24938988520,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-09T13:51:55.726891Z"},{"created_at":"2025-04-15T10:51:31.291792Z","description":"Vision language model able to analyze images and offer insights without compromising on instruction following.","has_eula":false,"id":"1999f4f5-f038-4039-94ba-11a851917df5","name":"mistral/pixtral-12b-2409:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":50000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":true,"max_context_size":128000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":25384844091,"status":"ready","tags":["vision","chat","featured"],"updated_at":"2025-05-09T13:51:58.281971Z"},{"created_at":"2025-03-27T16:49:14.593008Z","description":"A very efficient language model by Mistral AI, optimized for instruction-following tasks. Available under the Apache 2.0 license.","has_eula":false,"id":"bf6be106-c53d-4b93-bb33-1a4bd4d0b573","name":"mistral/mistral-7b-instruct-v0.3:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":28995471292,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:54.595513Z"},{"created_at":"2025-03-27T16:50:06.301430Z","description":"A state-of-the-art 12B model with a 128k context window, designed for multilingual chat applications.","has_eula":false,"id":"07681325-c743-4796-8b7d-1f0b35d4a8e0","name":"mistral/mistral-nemo-instruct-2407:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":128000,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":13605604415,"status":"ready","tags":["instruct","chat","featured"],"updated_at":"2025-05-06T15:17:43.837103Z"},{"created_at":"2025-03-27T16:50:08.291821Z","description":"A high-quality Mixture of Experts (MoE) model with open weights by Mistral AI, licensed under Apache 2.0.","has_eula":false,"id":"1aa87d1e-9996-4c54-aa1c-5b900bf59fd4","name":"mistral/mixtral-8x7b-instruct-v0.1:fp8","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":8,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":46970879717,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:52:02.960404Z"},{"created_at":"2025-03-27T16:49:19.120192Z","description":"A high-quality Mixture of Experts (MoE) model with open weights by Mistral AI, licensed under Apache 2.0.","has_eula":false,"id":"11ed6599-f460-4e41-b266-87bc9a108fdd","name":"mistral/mixtral-8x7b-instruct-v0.1:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":32768,"quantization_bits":8},{"allowed":true,"max_context_size":32768,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":190483875108,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:57.661626Z"},{"created_at":"2025-03-27T16:46:54.314987Z","description":"An embedding model spanning a broad range of languages and state-of-the-art results on multilingual benchmarks.","has_eula":true,"id":"d58efec4-b667-48e2-8ad8-bcc26c175ae6","name":"baai/bge-multilingual-gemma2:fp32","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":true,"max_context_size":8192,"quantization_bits":32}]}]}],"parameter_size_bits":32,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":36989461520,"status":"ready","tags":["embedding","featured"],"updated_at":"2025-03-27T17:40:09.534954Z"}],"total_count":29}' + headers: + Content-Length: + - "50297" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Thu, 15 May 2025 09:48:07 GMT + Server: + - Scaleway API Gateway (fr-par-2;edge02) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - d1162d15-be00-4801-b9fc-efcbcff76ee7 + status: 200 OK + code: 200 + duration: 176.634125ms + - id: 46 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models/7205dbce-cc80-4b2a-bb7f-3fd3a804afc3 + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 1723 + uncompressed: false + body: '{"created_at":"2025-03-27T16:48:40.045689Z","description":"Efficient 8B-param model by Meta, optimized for multilingual dialogue.","has_eula":true,"id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","name":"meta/llama-3.1-8b-instruct:bf16","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":93000,"quantization_bits":8},{"allowed":true,"max_context_size":40000,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]},{"node_type_name":"H100-2","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":4},{"allowed":true,"max_context_size":131072,"quantization_bits":8},{"allowed":true,"max_context_size":131072,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":32}]}]}],"parameter_size_bits":16,"project_id":"00000000-0000-0000-0000-000000000000","region":"fr-par","size_bytes":32132582323,"status":"ready","tags":["instruct","chat"],"updated_at":"2025-05-09T13:51:53.288962Z"}' + headers: + Content-Length: + - "1723" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Thu, 15 May 2025 09:48:07 GMT + Server: + - Scaleway API Gateway (fr-par-2;edge02) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - 9a07cad4-f153-4c51-92b0-c3ffd2609f1d + status: 200 OK + code: 200 + duration: 29.628834ms + - id: 47 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/deployments/986d6dc6-3514-4780-b64b-d8f214ef1757 + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 777 + uncompressed: false + body: '{"created_at":"2025-05-15T09:36:50.092088Z","endpoints":[{"disable_auth":false,"id":"60f26766-a204-48cb-8e51-907a62dd69d0","private_network":{"private_network_id":"552f5c0d-40f0-4d67-bb57-727c50641ba9"},"url":"https://986d6dc6-3514-4780-b64b-d8f214ef1757.552f5c0d-40f0-4d67-bb57-727c50641ba9.internal"}],"error_message":"","id":"986d6dc6-3514-4780-b64b-d8f214ef1757","max_size":1,"min_size":1,"model_id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","model_name":"meta/llama-3.1-8b-instruct:bf16","name":"test-inference-deployment-basic-endpoints-private-public","node_type_name":"L4","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","quantization":{"bits":16},"region":"fr-par","size":1,"status":"deploying","tags":[],"updated_at":"2025-05-15T09:48:07.477689Z"}' + headers: + Content-Length: + - "777" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Thu, 15 May 2025 09:48:08 GMT + Server: + - Scaleway API Gateway (fr-par-2;edge02) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - d525b0ee-a31c-4322-a243-55c4118de079 + status: 200 OK + code: 200 + duration: 43.001375ms + - id: 48 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/deployments/986d6dc6-3514-4780-b64b-d8f214ef1757 + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 777 + uncompressed: false + body: '{"created_at":"2025-05-15T09:36:50.092088Z","endpoints":[{"disable_auth":false,"id":"60f26766-a204-48cb-8e51-907a62dd69d0","private_network":{"private_network_id":"552f5c0d-40f0-4d67-bb57-727c50641ba9"},"url":"https://986d6dc6-3514-4780-b64b-d8f214ef1757.552f5c0d-40f0-4d67-bb57-727c50641ba9.internal"}],"error_message":"","id":"986d6dc6-3514-4780-b64b-d8f214ef1757","max_size":1,"min_size":1,"model_id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","model_name":"meta/llama-3.1-8b-instruct:bf16","name":"test-inference-deployment-basic-endpoints-private-public","node_type_name":"L4","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","quantization":{"bits":16},"region":"fr-par","size":1,"status":"deploying","tags":[],"updated_at":"2025-05-15T09:48:07.477689Z"}' + headers: + Content-Length: + - "777" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Thu, 15 May 2025 09:49:08 GMT + Server: + - Scaleway API Gateway (fr-par-2;edge02) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - e6edcbf3-edd8-4a69-ba0c-a8b49d95dfef + status: 200 OK + code: 200 + duration: 99.261375ms + - id: 49 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/deployments/986d6dc6-3514-4780-b64b-d8f214ef1757 + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 777 + uncompressed: false + body: '{"created_at":"2025-05-15T09:36:50.092088Z","endpoints":[{"disable_auth":false,"id":"60f26766-a204-48cb-8e51-907a62dd69d0","private_network":{"private_network_id":"552f5c0d-40f0-4d67-bb57-727c50641ba9"},"url":"https://986d6dc6-3514-4780-b64b-d8f214ef1757.552f5c0d-40f0-4d67-bb57-727c50641ba9.internal"}],"error_message":"","id":"986d6dc6-3514-4780-b64b-d8f214ef1757","max_size":1,"min_size":1,"model_id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","model_name":"meta/llama-3.1-8b-instruct:bf16","name":"test-inference-deployment-basic-endpoints-private-public","node_type_name":"L4","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","quantization":{"bits":16},"region":"fr-par","size":1,"status":"deploying","tags":[],"updated_at":"2025-05-15T09:48:07.477689Z"}' + headers: + Content-Length: + - "777" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Thu, 15 May 2025 09:50:09 GMT + Server: + - Scaleway API Gateway (fr-par-2;edge02) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - fb2b61f0-04c3-4cff-a06c-3e2d95b4630a + status: 200 OK + code: 200 + duration: 100.894333ms + - id: 50 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/deployments/986d6dc6-3514-4780-b64b-d8f214ef1757 + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 773 + uncompressed: false + body: '{"created_at":"2025-05-15T09:36:50.092088Z","endpoints":[{"disable_auth":false,"id":"60f26766-a204-48cb-8e51-907a62dd69d0","private_network":{"private_network_id":"552f5c0d-40f0-4d67-bb57-727c50641ba9"},"url":"https://986d6dc6-3514-4780-b64b-d8f214ef1757.552f5c0d-40f0-4d67-bb57-727c50641ba9.internal"}],"error_message":"","id":"986d6dc6-3514-4780-b64b-d8f214ef1757","max_size":1,"min_size":1,"model_id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","model_name":"meta/llama-3.1-8b-instruct:bf16","name":"test-inference-deployment-basic-endpoints-private-public","node_type_name":"L4","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","quantization":{"bits":16},"region":"fr-par","size":1,"status":"ready","tags":[],"updated_at":"2025-05-15T09:50:15.710830Z"}' + headers: + Content-Length: + - "773" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Thu, 15 May 2025 09:51:09 GMT + Server: + - Scaleway API Gateway (fr-par-2;edge01) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - 9c33278f-b521-4c41-94aa-5fb335efdf64 + status: 200 OK + code: 200 + duration: 147.120583ms + - id: 51 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/deployments/986d6dc6-3514-4780-b64b-d8f214ef1757 + method: DELETE + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 776 + uncompressed: false + body: '{"created_at":"2025-05-15T09:36:50.092088Z","endpoints":[{"disable_auth":false,"id":"60f26766-a204-48cb-8e51-907a62dd69d0","private_network":{"private_network_id":"552f5c0d-40f0-4d67-bb57-727c50641ba9"},"url":"https://986d6dc6-3514-4780-b64b-d8f214ef1757.552f5c0d-40f0-4d67-bb57-727c50641ba9.internal"}],"error_message":"","id":"986d6dc6-3514-4780-b64b-d8f214ef1757","max_size":1,"min_size":1,"model_id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","model_name":"meta/llama-3.1-8b-instruct:bf16","name":"test-inference-deployment-basic-endpoints-private-public","node_type_name":"L4","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","quantization":{"bits":16},"region":"fr-par","size":1,"status":"deleting","tags":[],"updated_at":"2025-05-15T09:50:15.710830Z"}' + headers: + Content-Length: + - "776" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Thu, 15 May 2025 09:51:09 GMT + Server: + - Scaleway API Gateway (fr-par-2;edge01) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - 120a0bbe-cac6-4a2b-b274-d6c0883a8c56 + status: 200 OK + code: 200 + duration: 139.438042ms + - id: 52 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/deployments/986d6dc6-3514-4780-b64b-d8f214ef1757 + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 776 + uncompressed: false + body: '{"created_at":"2025-05-15T09:36:50.092088Z","endpoints":[{"disable_auth":false,"id":"60f26766-a204-48cb-8e51-907a62dd69d0","private_network":{"private_network_id":"552f5c0d-40f0-4d67-bb57-727c50641ba9"},"url":"https://986d6dc6-3514-4780-b64b-d8f214ef1757.552f5c0d-40f0-4d67-bb57-727c50641ba9.internal"}],"error_message":"","id":"986d6dc6-3514-4780-b64b-d8f214ef1757","max_size":1,"min_size":1,"model_id":"7205dbce-cc80-4b2a-bb7f-3fd3a804afc3","model_name":"meta/llama-3.1-8b-instruct:bf16","name":"test-inference-deployment-basic-endpoints-private-public","node_type_name":"L4","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","quantization":{"bits":16},"region":"fr-par","size":1,"status":"deleting","tags":[],"updated_at":"2025-05-15T09:50:15.710830Z"}' + headers: + Content-Length: + - "776" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Thu, 15 May 2025 09:51:09 GMT + Server: + - Scaleway API Gateway (fr-par-2;edge01) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - 9c01d0a0-283f-4cc1-a981-0355b3d24b08 + status: 200 OK + code: 200 + duration: 58.92075ms + - id: 53 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/deployments/986d6dc6-3514-4780-b64b-d8f214ef1757 + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 131 + uncompressed: false + body: '{"message":"resource is not found","resource":"deployment","resource_id":"986d6dc6-3514-4780-b64b-d8f214ef1757","type":"not_found"}' + headers: + Content-Length: + - "131" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Thu, 15 May 2025 09:52:09 GMT + Server: + - Scaleway API Gateway (fr-par-2;edge02) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - 975d76f0-da30-4483-a366-e01bc71339c4 + status: 404 Not Found + code: 404 + duration: 63.447792ms + - id: 54 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/vpc/v2/regions/fr-par/private-networks/552f5c0d-40f0-4d67-bb57-727c50641ba9 + method: DELETE + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 0 + uncompressed: false + body: "" + headers: + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Thu, 15 May 2025 09:52:10 GMT + Server: + - Scaleway API Gateway (fr-par-2;edge02) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - 51412238-2aca-41a4-8c5f-51b8b6e416df + status: 204 No Content + code: 204 + duration: 1.734887375s + - id: 55 request: proto: HTTP/1.1 proto_major: 1 @@ -1982,8 +2717,8 @@ interactions: form: {} headers: User-Agent: - - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.23.0; darwin; arm64) terraform-provider/develop terraform/terraform-tests - url: https://api.scaleway.com/inference/v1beta1/regions/fr-par/deployments/7aac488b-b5a5-47cf-894a-75bf1759e247 + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/deployments/986d6dc6-3514-4780-b64b-d8f214ef1757 method: GET response: proto: HTTP/2.0 @@ -1993,7 +2728,7 @@ interactions: trailer: {} content_length: 131 uncompressed: false - body: '{"message":"resource is not found","resource":"deployment","resource_id":"7aac488b-b5a5-47cf-894a-75bf1759e247","type":"not_found"}' + body: '{"message":"resource is not found","resource":"deployment","resource_id":"986d6dc6-3514-4780-b64b-d8f214ef1757","type":"not_found"}' headers: Content-Length: - "131" @@ -2002,7 +2737,7 @@ interactions: Content-Type: - application/json Date: - - Thu, 24 Oct 2024 13:47:08 GMT + - Thu, 15 May 2025 09:52:11 GMT Server: - Scaleway API Gateway (fr-par-2;edge02) Strict-Transport-Security: @@ -2012,7 +2747,7 @@ interactions: X-Frame-Options: - DENY X-Request-Id: - - 5a7a36df-7106-4cd2-95fe-dfbfc03f6701 + - 0a23b017-edbc-463d-ac44-7c038765b509 status: 404 Not Found code: 404 - duration: 55.77325ms + duration: 32.897583ms diff --git a/internal/services/inference/testdata/model-basic.cassette.yaml b/internal/services/inference/testdata/model-basic.cassette.yaml new file mode 100644 index 0000000000..3760790141 --- /dev/null +++ b/internal/services/inference/testdata/model-basic.cassette.yaml @@ -0,0 +1,395 @@ +--- +version: 2 +interactions: + - id: 0 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 158 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: '{"name":"TestAccModel_Basic","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","source":{"url":"https://huggingface.co/agentica-org/DeepCoder-14B-Preview"}}' + form: {} + headers: + Content-Type: + - application/json + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models + method: POST + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 1583 + uncompressed: false + body: '{"created_at":"2025-04-30T13:41:15.299061Z","description":"","has_eula":false,"id":"2cfdcbf0-e135-4e69-8f9d-f54da7704b9d","name":"TestAccModel_Basic","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":12410,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725346,"status":"ready","tags":["custom"],"updated_at":null}' + headers: + Content-Length: + - "1583" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Wed, 30 Apr 2025 13:41:15 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge01) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - 4b0ee75d-7570-4fb4-b7a9-d72a1fcab16d + status: 200 OK + code: 200 + duration: 411.01ms + - id: 1 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models/2cfdcbf0-e135-4e69-8f9d-f54da7704b9d + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 1583 + uncompressed: false + body: '{"created_at":"2025-04-30T13:41:15.299061Z","description":"","has_eula":false,"id":"2cfdcbf0-e135-4e69-8f9d-f54da7704b9d","name":"TestAccModel_Basic","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":12410,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725346,"status":"ready","tags":["custom"],"updated_at":null}' + headers: + Content-Length: + - "1583" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Wed, 30 Apr 2025 13:41:15 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge01) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - 2bd3ffae-15de-4f58-9bc2-45c05f991cb1 + status: 200 OK + code: 200 + duration: 56.742042ms + - id: 2 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models/2cfdcbf0-e135-4e69-8f9d-f54da7704b9d + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 1583 + uncompressed: false + body: '{"created_at":"2025-04-30T13:41:15.299061Z","description":"","has_eula":false,"id":"2cfdcbf0-e135-4e69-8f9d-f54da7704b9d","name":"TestAccModel_Basic","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":12410,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725346,"status":"ready","tags":["custom"],"updated_at":null}' + headers: + Content-Length: + - "1583" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Wed, 30 Apr 2025 13:41:15 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge01) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - 2368bd09-fec5-4d1d-b263-b4e9663df0e4 + status: 200 OK + code: 200 + duration: 47.954166ms + - id: 3 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models/2cfdcbf0-e135-4e69-8f9d-f54da7704b9d + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 1583 + uncompressed: false + body: '{"created_at":"2025-04-30T13:41:15.299061Z","description":"","has_eula":false,"id":"2cfdcbf0-e135-4e69-8f9d-f54da7704b9d","name":"TestAccModel_Basic","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":12410,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725346,"status":"ready","tags":["custom"],"updated_at":null}' + headers: + Content-Length: + - "1583" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Wed, 30 Apr 2025 13:41:15 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge01) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - f4320da3-a133-46ac-9039-2951bf22ec00 + status: 200 OK + code: 200 + duration: 58.897083ms + - id: 4 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models/2cfdcbf0-e135-4e69-8f9d-f54da7704b9d + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 1583 + uncompressed: false + body: '{"created_at":"2025-04-30T13:41:15.299061Z","description":"","has_eula":false,"id":"2cfdcbf0-e135-4e69-8f9d-f54da7704b9d","name":"TestAccModel_Basic","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":12410,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725346,"status":"ready","tags":["custom"],"updated_at":null}' + headers: + Content-Length: + - "1583" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Wed, 30 Apr 2025 13:41:16 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge01) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - 94d7c556-d07d-4ba4-82e3-4b6bba19ea67 + status: 200 OK + code: 200 + duration: 48.84075ms + - id: 5 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models/2cfdcbf0-e135-4e69-8f9d-f54da7704b9d + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 1583 + uncompressed: false + body: '{"created_at":"2025-04-30T13:41:15.299061Z","description":"","has_eula":false,"id":"2cfdcbf0-e135-4e69-8f9d-f54da7704b9d","name":"TestAccModel_Basic","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":12410,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725346,"status":"ready","tags":["custom"],"updated_at":null}' + headers: + Content-Length: + - "1583" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Wed, 30 Apr 2025 13:41:17 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge01) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - 7e8ac44d-003c-40d9-b8c3-41f8d9a5bc15 + status: 200 OK + code: 200 + duration: 56.951667ms + - id: 6 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models/2cfdcbf0-e135-4e69-8f9d-f54da7704b9d + method: DELETE + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 0 + uncompressed: false + body: "" + headers: + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Wed, 30 Apr 2025 13:41:17 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge01) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - c025016b-d42b-4a29-ac0e-aa6a55425e20 + status: 204 No Content + code: 204 + duration: 71.118042ms + - id: 7 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models/2cfdcbf0-e135-4e69-8f9d-f54da7704b9d + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 126 + uncompressed: false + body: '{"message":"resource is not found","resource":"Model","resource_id":"2cfdcbf0-e135-4e69-8f9d-f54da7704b9d","type":"not_found"}' + headers: + Content-Length: + - "126" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Wed, 30 Apr 2025 13:41:17 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge01) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - bf579c4b-e84c-437b-9afe-e5a02ececd6a + status: 404 Not Found + code: 404 + duration: 23.354417ms diff --git a/internal/services/inference/testdata/model-deploy-model-on-server.cassette.yaml b/internal/services/inference/testdata/model-deploy-model-on-server.cassette.yaml new file mode 100644 index 0000000000..8264d69340 --- /dev/null +++ b/internal/services/inference/testdata/model-deploy-model-on-server.cassette.yaml @@ -0,0 +1,1230 @@ +--- +version: 2 +interactions: + - id: 0 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 172 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: '{"name":"TestAccModel_DeployModelOnServer","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","source":{"url":"https://huggingface.co/agentica-org/DeepCoder-14B-Preview"}}' + form: {} + headers: + Content-Type: + - application/json + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models + method: POST + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 1597 + uncompressed: false + body: '{"created_at":"2025-04-30T13:40:50.812300Z","description":"","has_eula":false,"id":"16614399-a705-45db-81d8-67296833afe5","name":"TestAccModel_DeployModelOnServer","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":12410,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725346,"status":"ready","tags":["custom"],"updated_at":null}' + headers: + Content-Length: + - "1597" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Wed, 30 Apr 2025 13:40:50 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge03) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - 8ad5b1f7-5848-40c1-b70a-4ba50be8afef + status: 200 OK + code: 200 + duration: 523.227792ms + - id: 1 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models/16614399-a705-45db-81d8-67296833afe5 + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 1597 + uncompressed: false + body: '{"created_at":"2025-04-30T13:40:50.812300Z","description":"","has_eula":false,"id":"16614399-a705-45db-81d8-67296833afe5","name":"TestAccModel_DeployModelOnServer","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":12410,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725346,"status":"ready","tags":["custom"],"updated_at":null}' + headers: + Content-Length: + - "1597" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Wed, 30 Apr 2025 13:40:51 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge03) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - b68784d6-51c3-4f23-9e43-52a95aa1869c + status: 200 OK + code: 200 + duration: 47.419042ms + - id: 2 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models/16614399-a705-45db-81d8-67296833afe5 + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 1597 + uncompressed: false + body: '{"created_at":"2025-04-30T13:40:50.812300Z","description":"","has_eula":false,"id":"16614399-a705-45db-81d8-67296833afe5","name":"TestAccModel_DeployModelOnServer","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":12410,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725346,"status":"ready","tags":["custom"],"updated_at":null}' + headers: + Content-Length: + - "1597" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Wed, 30 Apr 2025 13:40:51 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge03) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - 15d8c59e-86e9-497f-bb25-6523efc622d8 + status: 200 OK + code: 200 + duration: 55.97ms + - id: 3 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 281 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: '{"name":"TestAccModel_DeployModelOnServer","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","model_id":"16614399-a705-45db-81d8-67296833afe5","accept_eula":true,"node_type_name":"H100","tags":[],"min_size":1,"max_size":1,"endpoints":[{"public_network":{},"disable_auth":false}]}' + form: {} + headers: + Content-Type: + - application/json + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/deployments + method: POST + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 628 + uncompressed: false + body: '{"created_at":"2025-04-30T13:40:51.120421Z","endpoints":[{"disable_auth":false,"id":"ee919317-46e5-4096-99e3-8de41c45ef00","public_network":{},"url":"https://a6953323-0213-4696-b23b-6dd0b44b18b2.ifr.fr-par.scaleway.com"}],"id":"a6953323-0213-4696-b23b-6dd0b44b18b2","max_size":1,"min_size":1,"model_id":"16614399-a705-45db-81d8-67296833afe5","model_name":"TestAccModel_DeployModelOnServer","name":"TestAccModel_DeployModelOnServer","node_type_name":"H100","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","quantization":{"bits":32},"region":"fr-par","size":0,"status":"creating","tags":[],"updated_at":null}' + headers: + Content-Length: + - "628" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Wed, 30 Apr 2025 13:40:51 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge03) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - 4e589f21-2242-48c1-9a1e-8653f54886a5 + status: 200 OK + code: 200 + duration: 374.472959ms + - id: 4 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/deployments/a6953323-0213-4696-b23b-6dd0b44b18b2 + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 628 + uncompressed: false + body: '{"created_at":"2025-04-30T13:40:51.120421Z","endpoints":[{"disable_auth":false,"id":"ee919317-46e5-4096-99e3-8de41c45ef00","public_network":{},"url":"https://a6953323-0213-4696-b23b-6dd0b44b18b2.ifr.fr-par.scaleway.com"}],"id":"a6953323-0213-4696-b23b-6dd0b44b18b2","max_size":1,"min_size":1,"model_id":"16614399-a705-45db-81d8-67296833afe5","model_name":"TestAccModel_DeployModelOnServer","name":"TestAccModel_DeployModelOnServer","node_type_name":"H100","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","quantization":{"bits":32},"region":"fr-par","size":0,"status":"creating","tags":[],"updated_at":null}' + headers: + Content-Length: + - "628" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Wed, 30 Apr 2025 13:40:51 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge03) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - 989110d5-7426-49c9-b223-78d3d05d18d8 + status: 200 OK + code: 200 + duration: 76.484292ms + - id: 5 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/deployments/a6953323-0213-4696-b23b-6dd0b44b18b2 + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 628 + uncompressed: false + body: '{"created_at":"2025-04-30T13:40:51.120421Z","endpoints":[{"disable_auth":false,"id":"ee919317-46e5-4096-99e3-8de41c45ef00","public_network":{},"url":"https://a6953323-0213-4696-b23b-6dd0b44b18b2.ifr.fr-par.scaleway.com"}],"id":"a6953323-0213-4696-b23b-6dd0b44b18b2","max_size":1,"min_size":1,"model_id":"16614399-a705-45db-81d8-67296833afe5","model_name":"TestAccModel_DeployModelOnServer","name":"TestAccModel_DeployModelOnServer","node_type_name":"H100","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","quantization":{"bits":32},"region":"fr-par","size":0,"status":"creating","tags":[],"updated_at":null}' + headers: + Content-Length: + - "628" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Wed, 30 Apr 2025 13:41:52 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge02) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - 11c8435e-5777-4e3e-a1fe-9bcfdf472017 + status: 200 OK + code: 200 + duration: 1.133746458s + - id: 6 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/deployments/a6953323-0213-4696-b23b-6dd0b44b18b2 + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 628 + uncompressed: false + body: '{"created_at":"2025-04-30T13:40:51.120421Z","endpoints":[{"disable_auth":false,"id":"ee919317-46e5-4096-99e3-8de41c45ef00","public_network":{},"url":"https://a6953323-0213-4696-b23b-6dd0b44b18b2.ifr.fr-par.scaleway.com"}],"id":"a6953323-0213-4696-b23b-6dd0b44b18b2","max_size":1,"min_size":1,"model_id":"16614399-a705-45db-81d8-67296833afe5","model_name":"TestAccModel_DeployModelOnServer","name":"TestAccModel_DeployModelOnServer","node_type_name":"H100","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","quantization":{"bits":32},"region":"fr-par","size":0,"status":"creating","tags":[],"updated_at":null}' + headers: + Content-Length: + - "628" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Wed, 30 Apr 2025 13:42:52 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge03) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - 7a9b270d-5fc6-439a-8e99-8a48af005835 + status: 200 OK + code: 200 + duration: 102.873666ms + - id: 7 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/deployments/a6953323-0213-4696-b23b-6dd0b44b18b2 + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 674 + uncompressed: false + body: '{"created_at":"2025-04-30T13:40:51.120421Z","endpoints":[{"disable_auth":false,"id":"ee919317-46e5-4096-99e3-8de41c45ef00","public_network":{},"url":"https://a6953323-0213-4696-b23b-6dd0b44b18b2.ifr.fr-par.scaleway.com"}],"error_message":"","id":"a6953323-0213-4696-b23b-6dd0b44b18b2","max_size":1,"min_size":1,"model_id":"16614399-a705-45db-81d8-67296833afe5","model_name":"TestAccModel_DeployModelOnServer","name":"TestAccModel_DeployModelOnServer","node_type_name":"H100","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","quantization":{"bits":32},"region":"fr-par","size":0,"status":"deploying","tags":[],"updated_at":"2025-04-30T13:43:33.076212Z"}' + headers: + Content-Length: + - "674" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Wed, 30 Apr 2025 13:43:52 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge01) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - 8607bac5-fa5f-4d05-a425-f8e07be76728 + status: 200 OK + code: 200 + duration: 100.190334ms + - id: 8 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/deployments/a6953323-0213-4696-b23b-6dd0b44b18b2 + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 674 + uncompressed: false + body: '{"created_at":"2025-04-30T13:40:51.120421Z","endpoints":[{"disable_auth":false,"id":"ee919317-46e5-4096-99e3-8de41c45ef00","public_network":{},"url":"https://a6953323-0213-4696-b23b-6dd0b44b18b2.ifr.fr-par.scaleway.com"}],"error_message":"","id":"a6953323-0213-4696-b23b-6dd0b44b18b2","max_size":1,"min_size":1,"model_id":"16614399-a705-45db-81d8-67296833afe5","model_name":"TestAccModel_DeployModelOnServer","name":"TestAccModel_DeployModelOnServer","node_type_name":"H100","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","quantization":{"bits":32},"region":"fr-par","size":0,"status":"deploying","tags":[],"updated_at":"2025-04-30T13:43:33.076212Z"}' + headers: + Content-Length: + - "674" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Wed, 30 Apr 2025 13:44:53 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge01) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - 79703f98-6cef-4021-8fbf-3bdd98d2449e + status: 200 OK + code: 200 + duration: 88.873209ms + - id: 9 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/deployments/a6953323-0213-4696-b23b-6dd0b44b18b2 + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 674 + uncompressed: false + body: '{"created_at":"2025-04-30T13:40:51.120421Z","endpoints":[{"disable_auth":false,"id":"ee919317-46e5-4096-99e3-8de41c45ef00","public_network":{},"url":"https://a6953323-0213-4696-b23b-6dd0b44b18b2.ifr.fr-par.scaleway.com"}],"error_message":"","id":"a6953323-0213-4696-b23b-6dd0b44b18b2","max_size":1,"min_size":1,"model_id":"16614399-a705-45db-81d8-67296833afe5","model_name":"TestAccModel_DeployModelOnServer","name":"TestAccModel_DeployModelOnServer","node_type_name":"H100","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","quantization":{"bits":32},"region":"fr-par","size":0,"status":"deploying","tags":[],"updated_at":"2025-04-30T13:43:33.076212Z"}' + headers: + Content-Length: + - "674" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Wed, 30 Apr 2025 13:45:54 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge01) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - 105f5bbd-a1f4-4ecc-89ee-734d7b864e10 + status: 200 OK + code: 200 + duration: 1.111759541s + - id: 10 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/deployments/a6953323-0213-4696-b23b-6dd0b44b18b2 + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 674 + uncompressed: false + body: '{"created_at":"2025-04-30T13:40:51.120421Z","endpoints":[{"disable_auth":false,"id":"ee919317-46e5-4096-99e3-8de41c45ef00","public_network":{},"url":"https://a6953323-0213-4696-b23b-6dd0b44b18b2.ifr.fr-par.scaleway.com"}],"error_message":"","id":"a6953323-0213-4696-b23b-6dd0b44b18b2","max_size":1,"min_size":1,"model_id":"16614399-a705-45db-81d8-67296833afe5","model_name":"TestAccModel_DeployModelOnServer","name":"TestAccModel_DeployModelOnServer","node_type_name":"H100","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","quantization":{"bits":32},"region":"fr-par","size":0,"status":"deploying","tags":[],"updated_at":"2025-04-30T13:43:33.076212Z"}' + headers: + Content-Length: + - "674" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Wed, 30 Apr 2025 13:46:54 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge02) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - db784cf5-1e85-4562-9700-4968b19e7727 + status: 200 OK + code: 200 + duration: 110.197334ms + - id: 11 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/deployments/a6953323-0213-4696-b23b-6dd0b44b18b2 + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 674 + uncompressed: false + body: '{"created_at":"2025-04-30T13:40:51.120421Z","endpoints":[{"disable_auth":false,"id":"ee919317-46e5-4096-99e3-8de41c45ef00","public_network":{},"url":"https://a6953323-0213-4696-b23b-6dd0b44b18b2.ifr.fr-par.scaleway.com"}],"error_message":"","id":"a6953323-0213-4696-b23b-6dd0b44b18b2","max_size":1,"min_size":1,"model_id":"16614399-a705-45db-81d8-67296833afe5","model_name":"TestAccModel_DeployModelOnServer","name":"TestAccModel_DeployModelOnServer","node_type_name":"H100","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","quantization":{"bits":32},"region":"fr-par","size":0,"status":"deploying","tags":[],"updated_at":"2025-04-30T13:43:33.076212Z"}' + headers: + Content-Length: + - "674" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Wed, 30 Apr 2025 13:47:54 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge01) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - 0d4d1487-03f3-478a-8645-27d102deaa88 + status: 200 OK + code: 200 + duration: 108.827875ms + - id: 12 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/deployments/a6953323-0213-4696-b23b-6dd0b44b18b2 + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 674 + uncompressed: false + body: '{"created_at":"2025-04-30T13:40:51.120421Z","endpoints":[{"disable_auth":false,"id":"ee919317-46e5-4096-99e3-8de41c45ef00","public_network":{},"url":"https://a6953323-0213-4696-b23b-6dd0b44b18b2.ifr.fr-par.scaleway.com"}],"error_message":"","id":"a6953323-0213-4696-b23b-6dd0b44b18b2","max_size":1,"min_size":1,"model_id":"16614399-a705-45db-81d8-67296833afe5","model_name":"TestAccModel_DeployModelOnServer","name":"TestAccModel_DeployModelOnServer","node_type_name":"H100","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","quantization":{"bits":32},"region":"fr-par","size":0,"status":"deploying","tags":[],"updated_at":"2025-04-30T13:43:33.076212Z"}' + headers: + Content-Length: + - "674" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Wed, 30 Apr 2025 13:48:54 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge01) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - c2b80fbb-5322-4bb9-a5e6-d6e70b85cd79 + status: 200 OK + code: 200 + duration: 483.5265ms + - id: 13 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/deployments/a6953323-0213-4696-b23b-6dd0b44b18b2 + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 670 + uncompressed: false + body: '{"created_at":"2025-04-30T13:40:51.120421Z","endpoints":[{"disable_auth":false,"id":"ee919317-46e5-4096-99e3-8de41c45ef00","public_network":{},"url":"https://a6953323-0213-4696-b23b-6dd0b44b18b2.ifr.fr-par.scaleway.com"}],"error_message":"","id":"a6953323-0213-4696-b23b-6dd0b44b18b2","max_size":1,"min_size":1,"model_id":"16614399-a705-45db-81d8-67296833afe5","model_name":"TestAccModel_DeployModelOnServer","name":"TestAccModel_DeployModelOnServer","node_type_name":"H100","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","quantization":{"bits":32},"region":"fr-par","size":1,"status":"ready","tags":[],"updated_at":"2025-04-30T13:49:33.669275Z"}' + headers: + Content-Length: + - "670" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Wed, 30 Apr 2025 13:49:54 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge01) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - 18323957-99e2-40e5-ac6f-8444ed350a1e + status: 200 OK + code: 200 + duration: 106.458417ms + - id: 14 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/deployments/a6953323-0213-4696-b23b-6dd0b44b18b2 + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 670 + uncompressed: false + body: '{"created_at":"2025-04-30T13:40:51.120421Z","endpoints":[{"disable_auth":false,"id":"ee919317-46e5-4096-99e3-8de41c45ef00","public_network":{},"url":"https://a6953323-0213-4696-b23b-6dd0b44b18b2.ifr.fr-par.scaleway.com"}],"error_message":"","id":"a6953323-0213-4696-b23b-6dd0b44b18b2","max_size":1,"min_size":1,"model_id":"16614399-a705-45db-81d8-67296833afe5","model_name":"TestAccModel_DeployModelOnServer","name":"TestAccModel_DeployModelOnServer","node_type_name":"H100","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","quantization":{"bits":32},"region":"fr-par","size":1,"status":"ready","tags":[],"updated_at":"2025-04-30T13:49:33.669275Z"}' + headers: + Content-Length: + - "670" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Wed, 30 Apr 2025 13:49:55 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge01) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - c4ff88bf-9b19-4a01-b42c-5526caa4e590 + status: 200 OK + code: 200 + duration: 70.225792ms + - id: 15 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/deployments/a6953323-0213-4696-b23b-6dd0b44b18b2 + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 670 + uncompressed: false + body: '{"created_at":"2025-04-30T13:40:51.120421Z","endpoints":[{"disable_auth":false,"id":"ee919317-46e5-4096-99e3-8de41c45ef00","public_network":{},"url":"https://a6953323-0213-4696-b23b-6dd0b44b18b2.ifr.fr-par.scaleway.com"}],"error_message":"","id":"a6953323-0213-4696-b23b-6dd0b44b18b2","max_size":1,"min_size":1,"model_id":"16614399-a705-45db-81d8-67296833afe5","model_name":"TestAccModel_DeployModelOnServer","name":"TestAccModel_DeployModelOnServer","node_type_name":"H100","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","quantization":{"bits":32},"region":"fr-par","size":1,"status":"ready","tags":[],"updated_at":"2025-04-30T13:49:33.669275Z"}' + headers: + Content-Length: + - "670" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Wed, 30 Apr 2025 13:49:55 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge01) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - 9f346b29-d2eb-401f-ab5a-d1e7d81cc9be + status: 200 OK + code: 200 + duration: 74.416167ms + - id: 16 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models/16614399-a705-45db-81d8-67296833afe5 + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 1597 + uncompressed: false + body: '{"created_at":"2025-04-30T13:40:50.812300Z","description":"","has_eula":false,"id":"16614399-a705-45db-81d8-67296833afe5","name":"TestAccModel_DeployModelOnServer","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":12410,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725346,"status":"ready","tags":["custom"],"updated_at":null}' + headers: + Content-Length: + - "1597" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Wed, 30 Apr 2025 13:49:55 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge01) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - 5538078e-5b6d-4d14-a05f-5d747fc19d43 + status: 200 OK + code: 200 + duration: 62.723042ms + - id: 17 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/deployments/a6953323-0213-4696-b23b-6dd0b44b18b2 + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 670 + uncompressed: false + body: '{"created_at":"2025-04-30T13:40:51.120421Z","endpoints":[{"disable_auth":false,"id":"ee919317-46e5-4096-99e3-8de41c45ef00","public_network":{},"url":"https://a6953323-0213-4696-b23b-6dd0b44b18b2.ifr.fr-par.scaleway.com"}],"error_message":"","id":"a6953323-0213-4696-b23b-6dd0b44b18b2","max_size":1,"min_size":1,"model_id":"16614399-a705-45db-81d8-67296833afe5","model_name":"TestAccModel_DeployModelOnServer","name":"TestAccModel_DeployModelOnServer","node_type_name":"H100","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","quantization":{"bits":32},"region":"fr-par","size":1,"status":"ready","tags":[],"updated_at":"2025-04-30T13:49:33.669275Z"}' + headers: + Content-Length: + - "670" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Wed, 30 Apr 2025 13:49:55 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge01) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - 628c56f2-cfe6-430e-b245-d58f1e7fc802 + status: 200 OK + code: 200 + duration: 54.192375ms + - id: 18 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/deployments/a6953323-0213-4696-b23b-6dd0b44b18b2 + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 670 + uncompressed: false + body: '{"created_at":"2025-04-30T13:40:51.120421Z","endpoints":[{"disable_auth":false,"id":"ee919317-46e5-4096-99e3-8de41c45ef00","public_network":{},"url":"https://a6953323-0213-4696-b23b-6dd0b44b18b2.ifr.fr-par.scaleway.com"}],"error_message":"","id":"a6953323-0213-4696-b23b-6dd0b44b18b2","max_size":1,"min_size":1,"model_id":"16614399-a705-45db-81d8-67296833afe5","model_name":"TestAccModel_DeployModelOnServer","name":"TestAccModel_DeployModelOnServer","node_type_name":"H100","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","quantization":{"bits":32},"region":"fr-par","size":1,"status":"ready","tags":[],"updated_at":"2025-04-30T13:49:33.669275Z"}' + headers: + Content-Length: + - "670" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Wed, 30 Apr 2025 13:49:56 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge01) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - 3cbe50ed-cdda-4ce1-9a3d-d629043dff7a + status: 200 OK + code: 200 + duration: 67.658875ms + - id: 19 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/deployments/a6953323-0213-4696-b23b-6dd0b44b18b2 + method: DELETE + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 673 + uncompressed: false + body: '{"created_at":"2025-04-30T13:40:51.120421Z","endpoints":[{"disable_auth":false,"id":"ee919317-46e5-4096-99e3-8de41c45ef00","public_network":{},"url":"https://a6953323-0213-4696-b23b-6dd0b44b18b2.ifr.fr-par.scaleway.com"}],"error_message":"","id":"a6953323-0213-4696-b23b-6dd0b44b18b2","max_size":1,"min_size":1,"model_id":"16614399-a705-45db-81d8-67296833afe5","model_name":"TestAccModel_DeployModelOnServer","name":"TestAccModel_DeployModelOnServer","node_type_name":"H100","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","quantization":{"bits":32},"region":"fr-par","size":1,"status":"deleting","tags":[],"updated_at":"2025-04-30T13:49:33.669275Z"}' + headers: + Content-Length: + - "673" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Wed, 30 Apr 2025 13:49:56 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge01) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - 2fe2c2e0-e354-48b2-aede-7fdefa8f9f75 + status: 200 OK + code: 200 + duration: 155.799583ms + - id: 20 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/deployments/a6953323-0213-4696-b23b-6dd0b44b18b2 + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 673 + uncompressed: false + body: '{"created_at":"2025-04-30T13:40:51.120421Z","endpoints":[{"disable_auth":false,"id":"ee919317-46e5-4096-99e3-8de41c45ef00","public_network":{},"url":"https://a6953323-0213-4696-b23b-6dd0b44b18b2.ifr.fr-par.scaleway.com"}],"error_message":"","id":"a6953323-0213-4696-b23b-6dd0b44b18b2","max_size":1,"min_size":1,"model_id":"16614399-a705-45db-81d8-67296833afe5","model_name":"TestAccModel_DeployModelOnServer","name":"TestAccModel_DeployModelOnServer","node_type_name":"H100","project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","quantization":{"bits":32},"region":"fr-par","size":1,"status":"deleting","tags":[],"updated_at":"2025-04-30T13:49:33.669275Z"}' + headers: + Content-Length: + - "673" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Wed, 30 Apr 2025 13:49:56 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge01) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - 80ff2e0a-5010-4d0a-9ab8-934a55c4843a + status: 200 OK + code: 200 + duration: 60.215042ms + - id: 21 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/deployments/a6953323-0213-4696-b23b-6dd0b44b18b2 + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 131 + uncompressed: false + body: '{"message":"resource is not found","resource":"deployment","resource_id":"a6953323-0213-4696-b23b-6dd0b44b18b2","type":"not_found"}' + headers: + Content-Length: + - "131" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Wed, 30 Apr 2025 13:50:57 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge01) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - 04359efd-c03f-4d77-bed0-7202aae74198 + status: 404 Not Found + code: 404 + duration: 73.797875ms + - id: 22 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models/16614399-a705-45db-81d8-67296833afe5 + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 1597 + uncompressed: false + body: '{"created_at":"2025-04-30T13:40:50.812300Z","description":"","has_eula":false,"id":"16614399-a705-45db-81d8-67296833afe5","name":"TestAccModel_DeployModelOnServer","nodes_support":[{"nodes":[{"node_type_name":"L4","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"L40S","quantizations":[{"allowed":false,"max_context_size":0,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100","quantizations":[{"allowed":true,"max_context_size":12410,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]},{"node_type_name":"H100-2","quantizations":[{"allowed":true,"max_context_size":131072,"quantization_bits":32},{"allowed":false,"max_context_size":0,"quantization_bits":16},{"allowed":false,"max_context_size":0,"quantization_bits":8},{"allowed":false,"max_context_size":0,"quantization_bits":4}]}]}],"parameter_size_bits":32,"project_id":"d3520a52-2c75-4ba0-bda8-82dd087f07f2","region":"fr-par","size_bytes":59091725346,"status":"ready","tags":["custom"],"updated_at":null}' + headers: + Content-Length: + - "1597" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Wed, 30 Apr 2025 13:50:57 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge01) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - cda24130-4e7a-40ed-b6f4-9d776c606040 + status: 200 OK + code: 200 + duration: 147.785375ms + - id: 23 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models/16614399-a705-45db-81d8-67296833afe5 + method: DELETE + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 0 + uncompressed: false + body: "" + headers: + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Wed, 30 Apr 2025 13:50:57 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge01) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - c1eabfb4-62ae-473b-a67a-530f26470d8d + status: 204 No Content + code: 204 + duration: 75.828083ms + - id: 24 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.scaleway.com + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + User-Agent: + - scaleway-sdk-go/v1.0.0-beta.7+dev (go1.24.1; darwin; arm64) terraform-provider/develop terraform/terraform-tests + url: https://api.scaleway.com/inference/v1/regions/fr-par/models/16614399-a705-45db-81d8-67296833afe5 + method: GET + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 126 + uncompressed: false + body: '{"message":"resource is not found","resource":"Model","resource_id":"16614399-a705-45db-81d8-67296833afe5","type":"not_found"}' + headers: + Content-Length: + - "126" + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Wed, 30 Apr 2025 13:50:57 GMT + Server: + - Scaleway API Gateway (fr-par-1;edge01) + Strict-Transport-Security: + - max-age=63072000 + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Request-Id: + - d3170a3d-8763-4a16-a353-ac27546dcb6f + status: 404 Not Found + code: 404 + duration: 23.153583ms diff --git a/internal/services/inference/testfuncs/checks.go b/internal/services/inference/testfuncs/checks.go index 5f9afb2e9a..f617f5a7fd 100644 --- a/internal/services/inference/testfuncs/checks.go +++ b/internal/services/inference/testfuncs/checks.go @@ -5,7 +5,7 @@ import ( "github.com/hashicorp/terraform-plugin-sdk/v2/helper/resource" "github.com/hashicorp/terraform-plugin-sdk/v2/terraform" - inferenceSDK "github.com/scaleway/scaleway-sdk-go/api/inference/v1beta1" + inferenceSDK "github.com/scaleway/scaleway-sdk-go/api/inference/v1" "github.com/scaleway/terraform-provider-scaleway/v2/internal/acctest" "github.com/scaleway/terraform-provider-scaleway/v2/internal/httperrors" "github.com/scaleway/terraform-provider-scaleway/v2/internal/services/inference" @@ -40,3 +40,33 @@ func IsDeploymentDestroyed(tt *acctest.TestTools) resource.TestCheckFunc { return nil } } + +func IsModelDestroyed(tt *acctest.TestTools) resource.TestCheckFunc { + return func(s *terraform.State) error { + for _, rs := range s.RootModule().Resources { + if rs.Type != "scaleway_inference_model" { + continue + } + + inferenceAPI, region, id, err := inference.NewAPIWithRegionAndID(tt.Meta, rs.Primary.ID) + if err != nil { + return err + } + + model, err := inferenceAPI.GetModel(&inferenceSDK.GetModelRequest{ + Region: region, + ModelID: id, + }) + + if err == nil { + return fmt.Errorf("model %s (%s) still exists", model.Name, model.ID) + } + + if !httperrors.Is404(err) { + return err + } + } + + return nil + } +} diff --git a/internal/services/inference/testfuncs/sweep.go b/internal/services/inference/testfuncs/sweep.go index bba702b349..0d1ddaedbc 100644 --- a/internal/services/inference/testfuncs/sweep.go +++ b/internal/services/inference/testfuncs/sweep.go @@ -4,18 +4,23 @@ import ( "fmt" "github.com/hashicorp/terraform-plugin-sdk/v2/helper/resource" - inference "github.com/scaleway/scaleway-sdk-go/api/inference/v1beta1" + "github.com/scaleway/scaleway-sdk-go/api/inference/v1" "github.com/scaleway/scaleway-sdk-go/scw" "github.com/scaleway/terraform-provider-scaleway/v2/internal/acctest" "github.com/scaleway/terraform-provider-scaleway/v2/internal/logging" ) func AddTestSweepers() { - resource.AddTestSweepers("scaleway_instance_deployment", &resource.Sweeper{ - Name: "scaleway_instance_deployment", + resource.AddTestSweepers("scaleway_inference_deployment", &resource.Sweeper{ + Name: "scaleway_inference_deployment", Dependencies: nil, F: testSweepDeployment, }) + resource.AddTestSweepers("scaleway_inference_model", &resource.Sweeper{ + Name: "scaleway_inference_model", + Dependencies: nil, + F: testSweepModel, + }) } func testSweepDeployment(_ string) error { @@ -47,3 +52,32 @@ func testSweepDeployment(_ string) error { return nil }) } + +func testSweepModel(_ string) error { + return acctest.SweepRegions((&inference.API{}).Regions(), func(scwClient *scw.Client, region scw.Region) error { + inferenceAPI := inference.NewAPI(scwClient) + + logging.L.Debugf("sweeper: destroying the inference models in (%s)", region) + + listModels, err := inferenceAPI.ListModels(&inference.ListModelsRequest{ + Region: region, + }, scw.WithAllPages()) + if err != nil { + return fmt.Errorf("error listing models in (%s) in sweeper: %w", region, err) + } + + for _, model := range listModels.Models { + err := inferenceAPI.DeleteModel(&inference.DeleteModelRequest{ + Region: model.Region, + ModelID: model.ID, + }) + if err != nil { + logging.L.Debugf("sweeper: error (%s)", err) + + return fmt.Errorf("error deleting model in sweeper: %w", err) + } + } + + return nil + }) +} diff --git a/internal/services/inference/types.go b/internal/services/inference/types.go new file mode 100644 index 0000000000..b7f6842bab --- /dev/null +++ b/internal/services/inference/types.go @@ -0,0 +1,40 @@ +package inference + +import "github.com/scaleway/scaleway-sdk-go/api/inference/v1" + +func flattenNodeSupport(nodesSupportInfo []*inference.ModelSupportInfo) []interface{} { + if len(nodesSupportInfo) == 0 { + return nil + } + + var result []interface{} + + for _, nodeSupport := range nodesSupportInfo { + if nodeSupport == nil { + continue + } + + for _, node := range nodeSupport.Nodes { + flattenQuantization := make([]interface{}, 0, len(node.Quantizations)) + + for _, quantization := range node.Quantizations { + if quantization == nil { + continue + } + + flattenQuantization = append(flattenQuantization, map[string]interface{}{ + "quantization_bits": quantization.QuantizationBits, + "allowed": quantization.Allowed, + "max_context_size": quantization.MaxContextSize, + }) + } + + result = append(result, map[string]interface{}{ + "node_type_name": node.NodeTypeName, + "quantization": flattenQuantization, + }) + } + } + + return result +} diff --git a/internal/services/inference/waiter.go b/internal/services/inference/waiter.go index 712020b1cb..44c84fe686 100644 --- a/internal/services/inference/waiter.go +++ b/internal/services/inference/waiter.go @@ -4,7 +4,7 @@ import ( "context" "time" - inference "github.com/scaleway/scaleway-sdk-go/api/inference/v1beta1" + "github.com/scaleway/scaleway-sdk-go/api/inference/v1" "github.com/scaleway/scaleway-sdk-go/scw" "github.com/scaleway/terraform-provider-scaleway/v2/internal/transport" ) @@ -24,3 +24,19 @@ func waitForDeployment(ctx context.Context, inferenceAPI *inference.API, region return deployment, err } + +func waitForModel(ctx context.Context, inferenceAPI *inference.API, region scw.Region, id string, timeout time.Duration) (*inference.Model, error) { + retryInterval := defaultModelRetryInterval + if transport.DefaultWaitRetryInterval != nil { + retryInterval = *transport.DefaultWaitRetryInterval + } + + model, err := inferenceAPI.WaitForModel(&inference.WaitForModelRequest{ + ModelID: id, + Region: region, + RetryInterval: &retryInterval, + Timeout: scw.TimeDurationPtr(timeout), + }, scw.WithContext(ctx)) + + return model, err +}