red-hat-data-services
diff --git a/‎examples/kfto-sft-llm/README.md
Lines changed: 39 additions & 1 deletion b/‎examples/kfto-sft-llm/README.md
Lines changed: 39 additions & 1 deletion
diff --git a/‎examples/kfto-sft-llm/sft.ipynb
Lines changed: 12 additions & 0 deletions b/‎examples/kfto-sft-llm/sft.ipynb
Lines changed: 12 additions & 0 deletions
diff --git a/‎tests/kfto/kfto_training_test.go
Lines changed: 0 additions & 22 deletions b/‎tests/kfto/kfto_training_test.go
Lines changed: 0 additions & 22 deletions
@@ -3,6 +3,9 @@
 This example demonstrates how to fine-tune LLMs with the Kubeflow Training operator on OpenShift AI.
 It uses HuggingFace SFTTrainer, with PEFT for LoRA and qLoRA, and PyTorch FSDP to distribute the training on multiple GPUs / nodes.
 
+> [!TIP]
+> **Multi-Team Resource Management**: For enterprise scenarios with multiple teams sharing GPU resources, see the [**Kueue Multi-Team Resource Management Workshop**](../../workshops/kueue/README.md). It demonstrates how to use this LLM fine-tuning example with Kueue for fair resource allocation, borrowing policies, and workload scheduling across teams.
+
 > [!IMPORTANT]
 > This example has been tested with the configurations listed in the [validation](#validation) section.
 > Its configuration space is highly dimensional, and tightly coupled to runtime / hardware configuration.
@@ -17,6 +20,8 @@ It uses HuggingFace SFTTrainer, with PEFT for LoRA and qLoRA, and PyTorch FSDP t
 
 ## Setup
 
+### Setup Workbench
+
 * Access the OpenShift AI dashboard, for example from the top navigation bar menu:
 ![](./docs/01.png)
 * Log in, then go to _Data Science Projects_ and create a project:
@@ -43,8 +48,41 @@ It uses HuggingFace SFTTrainer, with PEFT for LoRA and qLoRA, and PyTorch FSDP t
 ![](./docs/06.png)
 * Navigate to the `distributed-workloads/examples/kfto-sft-llm` directory and open the `sft` notebook
 
+> [!IMPORTANT]
+> * You will need a Hugging Face token if using gated models:
+>   * The examples use gated Llama models that require a token (e.g., https://huggingface.co/meta-llama/Llama-3.1-8B)
+>   * Set the `HF_TOKEN` environment variable in your job configuration
+>   * Note: You can skip the token if switching to non-gated models
+> * If using RHOAI 2.21+, the example supports Kueue integration for workload management:
+>   * When using Kueue:
+>     * Follow the [Configure Kueue (Optional)](#configure-kueue-optional) section to set up required resources
+>     * Add the local-queue name label to your job configuration to enforce workload management
+>   * You can skip Kueue usage by:
+>     > Note: Kueue Enablement via Validating Admission Policy was introduced in RHOAI 2.21. You can skip this section if using an earlier RHOAI release version.
+>     * Disabling the existing `kueue-validating-admission-policy-binding`
+>     * Omitting the local-queue-name label in your job configuration
+
 You can now proceed with the instructions from the notebook. Enjoy!
 
+### Configure Kueue (Optional)
+
+> [!NOTE]
+> This section is only required if you plan to use Kueue for workload management (RHOAI 2.21+) or Kueue is not already configured in your cluster.
+
+* Update the `nodeLabels` in the `workshops/kueue/resources/resource_flavor.yaml` file to match your AI worker nodes
+* Create the ResourceFlavor:
+    ```console
+    oc apply -f workshops/kueue/resources/resource_flavor.yaml
+    ```
+* Create the ClusterQueue:
+    ```console
+    oc apply -f workshops/kueue/resources/team1_cluster_queue.yaml
+    ```
+* Create a LocalQueue in your namespace:
+    ```console
+    oc apply -f workshops/kueue/resources/team1_local_queue.yaml -n <your-namespace>
+    ```
+
 ## Validation
 
 This example has been validated with the following configurations:
@@ -176,7 +214,7 @@ This example has been validated with the following configurations:
     num_workers: 16
     num_procs_per_worker: 1
     resources_per_worker:
-      "amd.com/gpu": 1
+      "nvidia.com/gpu": 1
       "memory": 192Gi
       "cpu": 4
     base_image: quay.io/modh/training:py311-cuda121-torch241
 
@@ -287,6 +287,17 @@
     "Configure the SDK client by providing the authentication token:"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4e8ac3ef",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# IMPORTANT: Labels and annotations support in create_job() method requires kubeflow-training v1.9.2+. Skip this cell if using RHOAI 2.21 or later.\n",
+    "%pip install -U kubeflow-training "
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -356,6 +367,7 @@
     "        # NCCL / RCCL\n",
     "        \"NCCL_DEBUG\": \"INFO\",\n",
     "    },\n",
+    "    # labels={\"kueue.x-k8s.io/queue-name\": \"<LOCAL_QUEUE_NAME>\"}, # Optional: Add local queue name and uncomment these lines if using Kueue for resource management\n",
     "    parameters=parameters,\n",
     "    volumes=[\n",
     "        V1Volume(name=\"shared\",\n",
 
@@ -19,7 +19,6 @@ package kfto
 import (
 	"fmt"
 	"testing"
-	"time"
 
 	kftov1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1"
 	. "github.com/onsi/gomega"
@@ -183,27 +182,6 @@ func runKFTOPyTorchJob(t *testing.T, image string, gpu Accelerator, numGpus, num
 	test.Eventually(PyTorchJob(test, namespace, tuningJob.Name), TestTimeoutDouble).
 		Should(WithTransform(PyTorchJobConditionRunning, Equal(corev1.ConditionTrue)))
 
-	// Verify GPU utilization
-	if IsOpenShift(test) && gpu == NVIDIA {
-		trainingPods := GetPods(test, namespace, metav1.ListOptions{LabelSelector: "training.kubeflow.org/job-name=" + tuningJob.GetName()})
-		test.Expect(trainingPods).To(HaveLen(numberOfWorkerNodes + 1)) // +1 is a master node
-
-		for _, trainingPod := range trainingPods {
-			// Check that GPUs for training pods were utilized recently
-			test.Eventually(OpenShiftPrometheusGpuUtil(test, trainingPod, gpu), 10*time.Minute).
-				Should(
-					And(
-						HaveLen(numGpus),
-						ContainElement(
-							// Check that at least some GPU was utilized on more than 10%
-							HaveField("Value", BeNumerically(">", 10)),
-						),
-					),
-				)
-		}
-		test.T().Log("All GPUs were successfully utilized")
-	}
-
 	// Make sure the PyTorch job succeeded
 	test.Eventually(PyTorchJob(test, namespace, tuningJob.Name), TestTimeoutLong).Should(WithTransform(PyTorchJobConditionSucceeded, Equal(corev1.ConditionTrue)))
 	test.T().Logf("PytorchJob %s/%s ran successfully", tuningJob.Namespace, tuningJob.Name)