From fbb865d24cc497cadc399dd5521612e185823e0c Mon Sep 17 00:00:00 2001 From: Benedikt Rollik Date: Tue, 3 Jun 2025 15:21:58 +0200 Subject: [PATCH 1/6] feat(infr): add scale documentation --- menu/navigation.json | 4 ++ .../how-to/create-deployment.mdx | 10 +++-- .../how-to/scale-deployment.mdx | 37 +++++++++++++++++++ 3 files changed, 48 insertions(+), 3 deletions(-) create mode 100644 pages/managed-inference/how-to/scale-deployment.mdx diff --git a/menu/navigation.json b/menu/navigation.json index 6b0905c5db..b3451524a1 100644 --- a/menu/navigation.json +++ b/menu/navigation.json @@ -900,6 +900,10 @@ "label": "Monitor a deployment", "slug": "monitor-deployment" }, + { + "label": "Scale a deployment", + "slug": "scale-deployment" + }, { "label": "Manage allowed IP addresses", "slug": "manage-allowed-ips" diff --git a/pages/managed-inference/how-to/create-deployment.mdx b/pages/managed-inference/how-to/create-deployment.mdx index ad5ed35260..a979180630 100644 --- a/pages/managed-inference/how-to/create-deployment.mdx +++ b/pages/managed-inference/how-to/create-deployment.mdx @@ -28,12 +28,16 @@ dates: - Choose the geographical **region** for the deployment. - Specify the GPU Instance type to be used with your deployment. -4. Enter a **name** for the deployment, and optional tags. -5. Configure the **network connectivity** settings for the deployment: +4. Choose the number of nodes for your deployment. + + High availability is only guaranteed with two or more nodes. + +5. Enter a **name** for the deployment, and optional tags. +6. Configure the **network connectivity** settings for the deployment: - Attach to a **Private Network** for secure communication and restricted availability. Choose an existing Private Network from the drop-down list, or create a new one. - Set up **Public connectivity** to access resources via the public internet. Authentication by API key is enabled by default. - Enabling both private and public connectivity will result in two distinct endpoints (public and private) for your deployment. - Deployments must have at least one endpoint, either public or private. -6. Click **Deploy model** to launch the deployment process. Once the model is ready, it will be listed among your deployments. \ No newline at end of file +7. Click **Deploy model** to launch the deployment process. Once the model is ready, it will be listed among your deployments. \ No newline at end of file diff --git a/pages/managed-inference/how-to/scale-deployment.mdx b/pages/managed-inference/how-to/scale-deployment.mdx new file mode 100644 index 0000000000..70a5238b5d --- /dev/null +++ b/pages/managed-inference/how-to/scale-deployment.mdx @@ -0,0 +1,37 @@ +--- +meta: + title: How to scale Managed Inference deployments + description: This page explains how to scale Managed Inference deployments in size +content: + h1: How to manage scale Managed Inference deployments + paragraph: This page explains how to scale Managed Inference deployments in size +tags: managed-inference ai-data ip-address +dates: + validation: 2025-06-03 + posted: 2025-06-03 +categories: + - ai-data +--- + +You can scale your Managed Inference deployment up or down to match it to the incoming load of your deployment. + + + + + - A Scaleway account logged into the [console](https://console.scaleway.com) + - A [Managed Inference deployment](/managed-inference/quickstart/) + - [Owner](/iam/concepts/#owner) status or [IAM permissions](/iam/concepts/#permission) allowing you to perform actions in the intended Organization + +## How to scale a Managed Inference deployement in size + +1. Click **Managed Inference** in the **AI** section of the [Scaleway console](https://console.scaleway.com) side menu. A list of your deployments displays. +2. Click a deployment name or > **More info** to access the deployment dashboard. +3. Click the **Settings** tab and navigate to the **Scaling** section. +4. Click **Change node number** and adjust the number of nodes in your deployment. + + High availability is only guaranteed with two or more nodes. + +5. Click **Update node type** to update the numbmer of nodes in your deployment. + + Note that your deployment will be unavailable for 15-30 minutes while the node update is in progress + \ No newline at end of file From 6f13925b247e1842c7d86755ef64d8bb06fc5804 Mon Sep 17 00:00:00 2001 From: Benedikt Rollik Date: Tue, 3 Jun 2025 15:33:50 +0200 Subject: [PATCH 2/6] feat(infr): update wording --- pages/managed-inference/how-to/scale-deployment.mdx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pages/managed-inference/how-to/scale-deployment.mdx b/pages/managed-inference/how-to/scale-deployment.mdx index 70a5238b5d..f870aac896 100644 --- a/pages/managed-inference/how-to/scale-deployment.mdx +++ b/pages/managed-inference/how-to/scale-deployment.mdx @@ -27,11 +27,11 @@ You can scale your Managed Inference deployment up or down to match it to the in 1. Click **Managed Inference** in the **AI** section of the [Scaleway console](https://console.scaleway.com) side menu. A list of your deployments displays. 2. Click a deployment name or > **More info** to access the deployment dashboard. 3. Click the **Settings** tab and navigate to the **Scaling** section. -4. Click **Change node number** and adjust the number of nodes in your deployment. +4. Click **Update node count** and adjust the number of nodes in your deployment. High availability is only guaranteed with two or more nodes. -5. Click **Update node type** to update the numbmer of nodes in your deployment. +5. Click **Update node count** to update the numbmer of nodes in your deployment. Note that your deployment will be unavailable for 15-30 minutes while the node update is in progress \ No newline at end of file From c48f39422a4c27d638abb897f8690356841031fe Mon Sep 17 00:00:00 2001 From: Benedikt Rollik Date: Tue, 3 Jun 2025 17:39:48 +0200 Subject: [PATCH 3/6] Apply suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Néda <87707325+nerda-codes@users.noreply.github.com> --- pages/managed-inference/how-to/scale-deployment.mdx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pages/managed-inference/how-to/scale-deployment.mdx b/pages/managed-inference/how-to/scale-deployment.mdx index f870aac896..fd1a165958 100644 --- a/pages/managed-inference/how-to/scale-deployment.mdx +++ b/pages/managed-inference/how-to/scale-deployment.mdx @@ -31,7 +31,7 @@ You can scale your Managed Inference deployment up or down to match it to the in High availability is only guaranteed with two or more nodes. -5. Click **Update node count** to update the numbmer of nodes in your deployment. +5. Click **Update node count** to update the number of nodes in your deployment. - Note that your deployment will be unavailable for 15-30 minutes while the node update is in progress + Your deployment will be unavailable for 15-30 minutes while the node update is in progress. \ No newline at end of file From 692120596e2fa04e11c10edce5dbb296486a2eff Mon Sep 17 00:00:00 2001 From: Benedikt Rollik Date: Wed, 4 Jun 2025 14:39:22 +0200 Subject: [PATCH 4/6] Apply suggestions from code review Co-authored-by: Jessica <113192637+jcirinosclwy@users.noreply.github.com> --- pages/managed-inference/how-to/scale-deployment.mdx | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pages/managed-inference/how-to/scale-deployment.mdx b/pages/managed-inference/how-to/scale-deployment.mdx index fd1a165958..7d3ccbe14d 100644 --- a/pages/managed-inference/how-to/scale-deployment.mdx +++ b/pages/managed-inference/how-to/scale-deployment.mdx @@ -15,14 +15,13 @@ categories: You can scale your Managed Inference deployment up or down to match it to the incoming load of your deployment. - - A Scaleway account logged into the [console](https://console.scaleway.com) - A [Managed Inference deployment](/managed-inference/quickstart/) - [Owner](/iam/concepts/#owner) status or [IAM permissions](/iam/concepts/#permission) allowing you to perform actions in the intended Organization -## How to scale a Managed Inference deployement in size +## How to scale a Managed Inference deployment in size 1. Click **Managed Inference** in the **AI** section of the [Scaleway console](https://console.scaleway.com) side menu. A list of your deployments displays. 2. Click a deployment name or > **More info** to access the deployment dashboard. From 7784799af3cb39ebf1864c3710383ed10903f2e7 Mon Sep 17 00:00:00 2001 From: Benedikt Rollik Date: Wed, 4 Jun 2025 15:20:50 +0200 Subject: [PATCH 5/6] feat(infr): update file name --- menu/navigation.json | 4 ++-- .../{scale-deployment.mdx => configure-autoscaling.mdx} | 0 2 files changed, 2 insertions(+), 2 deletions(-) rename pages/managed-inference/how-to/{scale-deployment.mdx => configure-autoscaling.mdx} (100%) diff --git a/menu/navigation.json b/menu/navigation.json index b3451524a1..0b084d5902 100644 --- a/menu/navigation.json +++ b/menu/navigation.json @@ -901,8 +901,8 @@ "slug": "monitor-deployment" }, { - "label": "Scale a deployment", - "slug": "scale-deployment" + "label": "Configure autoscaling", + "slug": "configure-autoscaling" }, { "label": "Manage allowed IP addresses", diff --git a/pages/managed-inference/how-to/scale-deployment.mdx b/pages/managed-inference/how-to/configure-autoscaling.mdx similarity index 100% rename from pages/managed-inference/how-to/scale-deployment.mdx rename to pages/managed-inference/how-to/configure-autoscaling.mdx From f84a1d6559e3027d8253abcfc2e3707d886b8782 Mon Sep 17 00:00:00 2001 From: Rowena Date: Wed, 18 Jun 2025 16:13:26 +0200 Subject: [PATCH 6/6] fix(inf): add beta info re node number --- pages/managed-inference/concepts.mdx | 4 ++++ pages/managed-inference/how-to/configure-autoscaling.mdx | 4 ++++ pages/managed-inference/how-to/create-deployment.mdx | 2 +- pages/managed-inference/quickstart.mdx | 1 + 4 files changed, 10 insertions(+), 1 deletion(-) diff --git a/pages/managed-inference/concepts.mdx b/pages/managed-inference/concepts.mdx index 45a4dab29b..0ebec3bc72 100644 --- a/pages/managed-inference/concepts.mdx +++ b/pages/managed-inference/concepts.mdx @@ -71,6 +71,10 @@ LLMs are advanced artificial intelligence systems capable of understanding and g These models, such as Llama-3, are trained on vast amounts of data to learn the patterns and structures of language, enabling them to generate coherent and contextually relevant responses to queries or prompts. LLMs have applications in natural language processing, text generation, translation, and other tasks requiring sophisticated language understanding and production. +## Node number + +The node number (or node count) defines the number of nodes, or Instances, that are running your Managed Inference deployment. [Increasing the node number](/managed-inference/how-to/configure-autoscaling/) scales your deployment, so that it can handle more load. + ## Prompt In the context of generative AI models, a prompt refers to the input provided to the model to generate a desired response. diff --git a/pages/managed-inference/how-to/configure-autoscaling.mdx b/pages/managed-inference/how-to/configure-autoscaling.mdx index 7d3ccbe14d..63a56bc438 100644 --- a/pages/managed-inference/how-to/configure-autoscaling.mdx +++ b/pages/managed-inference/how-to/configure-autoscaling.mdx @@ -15,6 +15,10 @@ categories: You can scale your Managed Inference deployment up or down to match it to the incoming load of your deployment. + +This feature is currently in [Public Beta](https://www.scaleway.com/betas/). + + - A Scaleway account logged into the [console](https://console.scaleway.com) diff --git a/pages/managed-inference/how-to/create-deployment.mdx b/pages/managed-inference/how-to/create-deployment.mdx index a979180630..9c98e3c4b6 100644 --- a/pages/managed-inference/how-to/create-deployment.mdx +++ b/pages/managed-inference/how-to/create-deployment.mdx @@ -28,7 +28,7 @@ dates: - Choose the geographical **region** for the deployment. - Specify the GPU Instance type to be used with your deployment. -4. Choose the number of nodes for your deployment. +4. Choose the number of nodes for your deployment. Note that this feature is currently in [Public Beta](https://www.scaleway.com/betas/). High availability is only guaranteed with two or more nodes. diff --git a/pages/managed-inference/quickstart.mdx b/pages/managed-inference/quickstart.mdx index d95f94faeb..beed812381 100644 --- a/pages/managed-inference/quickstart.mdx +++ b/pages/managed-inference/quickstart.mdx @@ -43,6 +43,7 @@ Here are some of the key features of Scaleway Managed Inference: - Choose the geographical **region** for the deployment. - Specify the GPU Instance type to be used with your deployment. + - Choose the number of nodes for your deployment. Note that this feature is currently in [Public Beta](https://www.scaleway.com/betas/). 4. Enter a **name** for the deployment, along with optional tags to aid in organization. 5. Configure the **network** settings for the deployment: - Enable **Private Network** for secure communication and restricted availability within Private Networks. Choose an existing Private Network from the drop-down list, or create a new one.