From f9f0c36434c94b51af5ed364ca4989625d713eaf Mon Sep 17 00:00:00 2001 From: wwanarif Date: Fri, 22 Aug 2025 01:02:01 +0800 Subject: [PATCH 01/23] revamp setup scripts supporting proxies and added studio cleanup scripts Signed-off-by: wwanarif --- .github/workflows/_e2e-test.yml | 21 +- app-backend/Dockerfile | 10 + app-frontend/Dockerfile | 10 + .../buildpush-genaicomps-images.yml | 24 +- .../buildpush-genaistudio-images.yml | 15 +- .../build-image-to-registry/vars.yml | 8 +- .../cleanup-genai-studio.sh | 218 ++++++++++++++++++ .../setup-genai-studio/genai-studio.yml | 6 + .../manifests/studio-manifest.yaml | 7 + .../playbooks/create-ssh-secrets.yml | 12 +- .../playbooks/deploy-monitoring.yml | 35 +-- .../playbooks/deploy-mysqldb.yml | 30 ++- .../playbooks/deploy-studio.yml | 29 +-- .../playbooks/deploy-tracing.yml | 42 ++-- .../playbooks/install-prerequisites.yml | 29 +++ .../playbooks/setup-local-storageclass.yml | 24 ++ setup-scripts/setup-genai-studio/readme.md | 29 +++ setup-scripts/setup-genai-studio/vars.yml | 8 +- .../playbooks/setup-local-registry.yml | 2 +- studio-backend/Dockerfile | 10 + studio-frontend/Dockerfile | 10 + 21 files changed, 481 insertions(+), 98 deletions(-) create mode 100755 setup-scripts/setup-genai-studio/cleanup-genai-studio.sh create mode 100644 setup-scripts/setup-genai-studio/playbooks/install-prerequisites.yml create mode 100644 setup-scripts/setup-genai-studio/playbooks/setup-local-storageclass.yml diff --git a/.github/workflows/_e2e-test.yml b/.github/workflows/_e2e-test.yml index f19bdff..6f367bf 100644 --- a/.github/workflows/_e2e-test.yml +++ b/.github/workflows/_e2e-test.yml @@ -74,25 +74,10 @@ jobs: name: playwright-test-results path: ${{ github.workspace }}/tests/playwright/playwright-report - - name: Cleanup sandbox namespaces + - name: Cleanup GenAI Studio if: always() run: | - if kubectl get namespace mysql; then - kubectl delete ns mysql || true - fi - if kubectl get namespace tracing; then - kubectl delete ns tracing || true - fi - for ns in $(kubectl get namespaces -o jsonpath='{.items[*].metadata.name}' | tr ' ' '\n' | grep '^sandbox-'); do - kubectl delete namespace $ns || true - done - if kubectl get namespace studio; then - kubectl delete -f manifests/studio-manifest.yaml || true - kubectl wait --for=delete pod --all --namespace=studio --timeout=300s - fi - if kubectl get namespace monitoring; then - kubectl delete -f manifests/monitoring-manifest.yaml || true - kubectl wait --for=delete pod --all --namespace=monitoring --timeout=300s - fi + echo "Running GenAI Studio cleanup script..." + ./cleanup-genai-studio.sh || echo "Cleanup script completed with warnings" working-directory: ${{ github.workspace }}/setup-scripts/setup-genai-studio/ diff --git a/app-backend/Dockerfile b/app-backend/Dockerfile index eddbb4d..ee9995b 100644 --- a/app-backend/Dockerfile +++ b/app-backend/Dockerfile @@ -1,5 +1,15 @@ FROM python:3.11-slim +# Accept proxy build arguments +ARG http_proxy +ARG https_proxy +ARG no_proxy + +# Set proxy environment variables for package managers +ENV http_proxy=${http_proxy} +ENV https_proxy=${https_proxy} +ENV no_proxy=${no_proxy} + RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ libsqlite3-0 \ libjemalloc-dev \ diff --git a/app-frontend/Dockerfile b/app-frontend/Dockerfile index 4c4d727..4f89930 100644 --- a/app-frontend/Dockerfile +++ b/app-frontend/Dockerfile @@ -3,6 +3,16 @@ # Use node 20.11.1 as the base image FROM node:20.11.1 as vite-app + +# Accept proxy build arguments +ARG http_proxy +ARG https_proxy +ARG no_proxy + +# Set proxy environment variables for package managers +ENV http_proxy=${http_proxy} +ENV https_proxy=${https_proxy} +ENV no_proxy=${no_proxy} COPY ./react /usr/app/react WORKDIR /usr/app/react diff --git a/setup-scripts/build-image-to-registry/buildpush-genaicomps-images.yml b/setup-scripts/build-image-to-registry/buildpush-genaicomps-images.yml index 55207b0..3395114 100755 --- a/setup-scripts/build-image-to-registry/buildpush-genaicomps-images.yml +++ b/setup-scripts/build-image-to-registry/buildpush-genaicomps-images.yml @@ -15,6 +15,10 @@ dest: /tmp/GenAIComps clone: yes update: no + environment: + http_proxy: "{{ http_proxy }}" + https_proxy: "{{ http_proxy }}" + no_proxy: "{{ no_proxy }}" when: not genaicomp_dir.stat.exists - name: Pull latest changes in GenAIComps repo @@ -22,6 +26,10 @@ repo: https://github.com/opea-project/GenAIComps.git dest: /tmp/GenAIComps update: yes + environment: + http_proxy: "{{ http_proxy }}" + https_proxy: "{{ http_proxy }}" + no_proxy: "{{ no_proxy }}" when: genaicomp_dir.stat.exists - name: Build and push GenAIComps images @@ -38,11 +46,25 @@ - { name: 'asr', dockerfile: 'comps/asr/src/Dockerfile' } block: - name: Build image - command: docker build -t {{ container_registry }}/{{ item.name }}:{{ container_tag }} -f {{ item.dockerfile }} . + command: > + docker build + --build-arg http_proxy="{{ http_proxy }}" + --build-arg https_proxy="{{ http_proxy }}" + --build-arg no_proxy="{{ no_proxy }}" + -t {{ container_registry }}/{{ item.name }}:{{ container_tag }} + -f {{ item.dockerfile }} . args: chdir: /tmp/GenAIComps + environment: + http_proxy: "{{ http_proxy }}" + https_proxy: "{{ http_proxy }}" + no_proxy: "{{ no_proxy }}" loop: "{{ genaicomp_images }}" - name: Push image command: docker push {{ container_registry }}/{{ item.name }}:{{ container_tag }} + environment: + http_proxy: "{{ http_proxy }}" + https_proxy: "{{ http_proxy }}" + no_proxy: "{{ no_proxy }}" loop: "{{ genaicomp_images }}" diff --git a/setup-scripts/build-image-to-registry/buildpush-genaistudio-images.yml b/setup-scripts/build-image-to-registry/buildpush-genaistudio-images.yml index e4f916f..20839a2 100755 --- a/setup-scripts/build-image-to-registry/buildpush-genaistudio-images.yml +++ b/setup-scripts/build-image-to-registry/buildpush-genaistudio-images.yml @@ -5,9 +5,18 @@ - vars.yml tasks: - name: Build Docker image - command: docker build -t "{{ container_registry }}/{{ item.image_name }}:{{ container_tag }}" . + command: > + docker build + --build-arg http_proxy="{{ http_proxy }}" + --build-arg https_proxy="{{ http_proxy }}" + --build-arg no_proxy="{{ no_proxy }}" + -t "{{ container_registry }}/{{ item.image_name }}:{{ container_tag }}" . args: chdir: "{{ item.directory }}" + environment: + http_proxy: "{{ http_proxy }}" + https_proxy: "{{ http_proxy }}" + no_proxy: "{{ no_proxy }}" loop: - { directory: '../../studio-frontend/', image_name: 'studio-frontend' } - { directory: '../../studio-backend/', image_name: 'studio-backend' } @@ -17,6 +26,10 @@ - name: Push Docker image command: docker push "{{ container_registry }}/{{ item.image_name }}:{{ container_tag }}" + environment: + http_proxy: "{{ http_proxy }}" + https_proxy: "{{ http_proxy }}" + no_proxy: "{{ no_proxy }}" loop: - { image_name: 'studio-frontend' } - { image_name: 'studio-backend' } diff --git a/setup-scripts/build-image-to-registry/vars.yml b/setup-scripts/build-image-to-registry/vars.yml index 2c97a25..65af073 100644 --- a/setup-scripts/build-image-to-registry/vars.yml +++ b/setup-scripts/build-image-to-registry/vars.yml @@ -1,2 +1,6 @@ -container_registry: 'opea' -container_tag: 'latest' \ No newline at end of file +# Container registry configuration +# Replace {{ ansible_default_ipv4.address }} with your Kubernetes master/API endpoint IP if needed +container_registry: '{{ ansible_default_ipv4.address }}:5000/opea' +container_tag: 'latest' +http_proxy: '' +no_proxy: 'localhost,127.0.0.1,.local,.svc.cluster.local,{{ ansible_default_ipv4.address }}' \ No newline at end of file diff --git a/setup-scripts/setup-genai-studio/cleanup-genai-studio.sh b/setup-scripts/setup-genai-studio/cleanup-genai-studio.sh new file mode 100755 index 0000000..dc4fcb4 --- /dev/null +++ b/setup-scripts/setup-genai-studio/cleanup-genai-studio.sh @@ -0,0 +1,218 @@ +#!/bin/bash + +# GenAI Studio Complete Cleanup Script +# This script ensures all namespaces and resources are properly deleted + +set -e + +echo "========================================" +echo "GenAI Studio Complete Cleanup Script" +echo "========================================" +echo + +# Function to check if namespace exists +namespace_exists() { + kubectl get namespace "$1" &>/dev/null +} + +# Function to delete namespace with comprehensive cleanup +cleanup_namespace() { + local ns=$1 + local manifest_file=$2 + + if ! namespace_exists "$ns"; then + echo "โœ… Namespace '$ns' does not exist, skipping..." + return 0 + fi + + echo "๐Ÿงน Cleaning up namespace: $ns" + + # Delete resources using manifest if provided + echo " - Deleting resources using manifest: $manifest_file" + if [ ! -z "$manifest_file" ] && [ -f "$manifest_file" ]; then + kubectl delete -f "$manifest_file" --timeout=120s --ignore-not-found=true || true + elif [ ! -z "$manifest_file" ]; then + echo " - Namespace $ns has no manifest" + fi + + # Delete Helm releases in the namespace + echo " - Checking for Helm releases in $ns..." + helm list -n "$ns" -q 2>/dev/null | xargs -r -I {} helm delete {} -n "$ns" --timeout=120s || true + + # Wait for pods to terminate gracefully + echo " - Waiting for pods to terminate gracefully..." + if kubectl get pods -n "$ns" --no-headers 2>/dev/null | grep -q .; then + kubectl wait --for=delete pod --all --namespace="$ns" --timeout=180s || true + else + echo " No pods found in namespace $ns" + fi + + # Force delete any remaining pods + echo " - Force deleting any remaining pods..." + REMAINING_PODS=$(kubectl get pods -n "$ns" --no-headers 2>/dev/null | awk '{print $1}' || true) + if [ ! -z "$REMAINING_PODS" ]; then + echo " Found remaining pods: $REMAINING_PODS" + echo "$REMAINING_PODS" | xargs -r kubectl delete pod -n "$ns" --force --grace-period=0 || true + else + echo " No remaining pods to force delete" + fi + + # Delete PVCs + echo " - Deleting PersistentVolumeClaims..." + PVCS=$(kubectl get pvc -n "$ns" --no-headers 2>/dev/null | awk '{print $1}' || true) + if [ ! -z "$PVCS" ]; then + echo " Found PVCs: $PVCS" + echo "$PVCS" | xargs -r kubectl delete pvc -n "$ns" --timeout=60s || true + else + echo " No PVCs found in namespace $ns" + fi + + # Delete secrets (except default service account token) + echo " - Deleting secrets..." + SECRETS=$(kubectl get secrets -n "$ns" --no-headers 2>/dev/null | grep -v "default-token" | awk '{print $1}' || true) + if [ ! -z "$SECRETS" ]; then + echo " Found secrets: $SECRETS" + echo "$SECRETS" | xargs -r kubectl delete secret -n "$ns" || true + else + echo " No custom secrets found in namespace $ns" + fi + + # Delete configmaps + echo " - Deleting configmaps..." + CONFIGMAPS=$(kubectl get configmaps -n "$ns" --no-headers 2>/dev/null | grep -v "kube-root-ca.crt" | awk '{print $1}' || true) + if [ ! -z "$CONFIGMAPS" ]; then + echo " Found configmaps: $CONFIGMAPS" + echo "$CONFIGMAPS" | xargs -r kubectl delete configmap -n "$ns" || true + else + echo " No custom configmaps found in namespace $ns" + fi + + # Finally delete the namespace + echo " - Deleting namespace..." + kubectl delete namespace "$ns" --timeout=120s || true + + # If namespace still exists, patch it to remove finalizers + if namespace_exists "$ns"; then + echo " - Namespace still exists, removing finalizers..." + kubectl patch namespace "$ns" -p '{"metadata":{"finalizers":[]}}' --type=merge || true + kubectl delete namespace "$ns" --force --grace-period=0 || true + fi + + # Final check + if namespace_exists "$ns"; then + echo " โŒ WARNING: Namespace '$ns' still exists after cleanup" + return 1 + else + echo " โœ… SUCCESS: Namespace '$ns' has been deleted" + return 0 + fi +} + +# Main cleanup process +echo "Starting comprehensive cleanup..." +echo + +# Change to the setup directory +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +cd "$SCRIPT_DIR" + +# Clean up sandbox namespaces first +echo "๐Ÿ” Looking for sandbox namespaces..." +SANDBOX_NAMESPACES=$(kubectl get namespaces -o jsonpath='{.items[*].metadata.name}' | tr ' ' '\n' | grep '^sandbox-' || true) + +if [ ! -z "$SANDBOX_NAMESPACES" ]; then + echo "Found sandbox namespaces: $SANDBOX_NAMESPACES" + for ns in $SANDBOX_NAMESPACES; do + cleanup_namespace "$ns" + done +else + echo "โœ… No sandbox namespaces found" +fi + +echo + +# Clean up main namespaces +MAIN_NAMESPACES=( + "studio:manifests/studio-manifest.yaml" + "monitoring:manifests/monitoring-manifest.yaml" + "tracing:" + "mysql:" +) + +for ns_info in "${MAIN_NAMESPACES[@]}"; do + IFS=':' read -r ns manifest <<< "$ns_info" + cleanup_namespace "$ns" "$manifest" + echo +done + +# # Clean up any remaining Helm releases globally +# echo "๐Ÿงน Cleaning up any remaining Helm releases..." +# helm list --all-namespaces --filter="mysql|kube-prometheus-stack|clickhouse|pascaliske" -q 2>/dev/null | \ +# while read -r release; do +# if [ ! -z "$release" ]; then +# echo " - Deleting Helm release: $release" +# helm delete "$release" --timeout=60s || true +# fi +# done + +# Check local-path-storage namespace and explain why it's preserved +echo "๐Ÿ”’ Checking local-path-storage namespace..." +if namespace_exists "local-path-storage"; then + echo " โœ… INTENTIONALLY PRESERVED: local-path-storage namespace exists" + echo " ๐Ÿ“ This namespace provides storage provisioning and is NOT cleaned up because:" + echo " - It may be used by other applications beyond GenAI Studio" + echo " - Deleting it would break any existing PVCs using local-path storage" + echo " - The local-path StorageClass would become non-functional" + echo " - It's a cluster-wide infrastructure component" + echo "" + echo " ๐Ÿ’ก To manually remove local-path-storage later (if you're sure it's safe):" + echo " kubectl delete namespace local-path-storage" + echo " kubectl delete storageclass local-path" +else + echo " โ„น๏ธ local-path-storage namespace does not exist" +fi + + +echo +echo "========================================" +echo "Cleanup Summary" +echo "========================================" + +# Final verification +FAILED_CLEANUP=() +NAMESPACES_TO_CHECK="studio monitoring tracing mysql" + +for ns in $NAMESPACES_TO_CHECK; do + if namespace_exists "$ns"; then + echo "โŒ FAILED: Namespace '$ns' still exists" + FAILED_CLEANUP+=("$ns") + else + echo "โœ… SUCCESS: Namespace '$ns' deleted" + fi +done + +# Special handling for local-path-storage (intentionally preserved) +if namespace_exists "local-path-storage"; then + echo "๐Ÿ”’ PRESERVED: Namespace 'local-path-storage' intentionally kept" +else + echo "โ„น๏ธ INFO: Namespace 'local-path-storage' was not present" +fi + +# Check for remaining sandbox namespaces +REMAINING_SANDBOX=$(kubectl get namespaces -o jsonpath='{.items[*].metadata.name}' | tr ' ' '\n' | grep '^sandbox-' || true) +if [ ! -z "$REMAINING_SANDBOX" ]; then + echo "โŒ FAILED: Remaining sandbox namespaces: $REMAINING_SANDBOX" + FAILED_CLEANUP+=("sandbox namespaces") +else + echo "โœ… SUCCESS: All sandbox namespaces deleted" +fi + +echo +if [ ${#FAILED_CLEANUP[@]} -eq 0 ]; then + echo "๐ŸŽ‰ All namespaces have been successfully cleaned up!" + exit 0 +else + echo "โš ๏ธ Some namespaces failed to cleanup: ${FAILED_CLEANUP[*]}" + echo "You may need to manually investigate and clean up these namespaces." + exit 1 +fi diff --git a/setup-scripts/setup-genai-studio/genai-studio.yml b/setup-scripts/setup-genai-studio/genai-studio.yml index 10fdc80..0c86d5b 100644 --- a/setup-scripts/setup-genai-studio/genai-studio.yml +++ b/setup-scripts/setup-genai-studio/genai-studio.yml @@ -1,3 +1,9 @@ +- name: Install prerequisites (Helm, etc.) + import_playbook: playbooks/install-prerequisites.yml + +- name: Setup local path storage provisioner + import_playbook: playbooks/setup-local-storageclass.yml + - name: Create ssh secrets import_playbook: playbooks/create-ssh-secrets.yml diff --git a/setup-scripts/setup-genai-studio/manifests/studio-manifest.yaml b/setup-scripts/setup-genai-studio/manifests/studio-manifest.yaml index 721f860..ad97372 100644 --- a/setup-scripts/setup-genai-studio/manifests/studio-manifest.yaml +++ b/setup-scripts/setup-genai-studio/manifests/studio-manifest.yaml @@ -502,6 +502,13 @@ spec: DOWNLOAD_URL="https://codeload.github.com/${OWNER}/${REPO}/tar.gz/${BRANCH}" curl "${DOWNLOAD_URL}" | tar -xz --strip-components=4 -C /opt/keycloak/themes "${REPO}-${BRANCH}/${KC_ASSETS_DIR}/themes" curl "${DOWNLOAD_URL}" | tar -xz --strip-components=4 -C /opt/keycloak/data "${REPO}-${BRANCH}/${KC_ASSETS_DIR}/data" + env: + - name: http_proxy + value: "${HTTP_PROXY}" + - name: https_proxy + value: "${HTTP_PROXY}" + - name: no_proxy + value: "${NO_PROXY}" envFrom: - configMapRef: name: studio-config diff --git a/setup-scripts/setup-genai-studio/playbooks/create-ssh-secrets.yml b/setup-scripts/setup-genai-studio/playbooks/create-ssh-secrets.yml index a4532e5..7841188 100644 --- a/setup-scripts/setup-genai-studio/playbooks/create-ssh-secrets.yml +++ b/setup-scripts/setup-genai-studio/playbooks/create-ssh-secrets.yml @@ -1,5 +1,7 @@ - name: Create ssh keys in k8 secrets using shell and kubectl commands hosts: localhost + vars_files: + - ../vars.yml tasks: @@ -23,9 +25,17 @@ command: kubectl wait --for=condition=Ready pod/ubuntu-ssh-keygen -n studio --timeout=60s when: "'NotFound' in kubectl_secret_check.stderr" + - name: Install openssh-client in pod + shell: | + kubectl exec -n studio ubuntu-ssh-keygen -- bash -c " + export http_proxy='{{ http_proxy }}' + export https_proxy='{{ http_proxy }}' + export no_proxy='{{ no_proxy }}' + apt-get update && apt-get install -y openssh-client" + when: "'NotFound' in kubectl_secret_check.stderr" + - name: Generate SSH key inside pod shell: | - kubectl exec -n studio ubuntu-ssh-keygen -- bash -c "apt-get update && apt-get install -y openssh-client" kubectl exec -n studio ubuntu-ssh-keygen -- bash -c "ssh-keygen -t rsa -b 2048 -f /tmp/id_rsa -N '' -C ''" when: "'NotFound' in kubectl_secret_check.stderr" diff --git a/setup-scripts/setup-genai-studio/playbooks/deploy-monitoring.yml b/setup-scripts/setup-genai-studio/playbooks/deploy-monitoring.yml index f64b1cd..5bfe0f2 100644 --- a/setup-scripts/setup-genai-studio/playbooks/deploy-monitoring.yml +++ b/setup-scripts/setup-genai-studio/playbooks/deploy-monitoring.yml @@ -1,38 +1,39 @@ - name: Deploy prometheus and grafana with local-path-storage hosts: localhost + vars_files: + - ../vars.yml tasks: - - name: Check if local-path-storage namespace exists - shell: kubectl get namespace local-path-storage --ignore-not-found - register: namespace_check - ignore_errors: yes + - name: Check if monitoring namespace exists + shell: kubectl get namespace monitoring --ignore-not-found + register: monitoring_namespace_check changed_when: false - - name: Install local-path-provisioner if namespace does not exist - shell: kubectl apply -f https://raw.githubusercontent.com/rancher/local-path-provisioner/v0.0.30/deploy/local-path-storage.yaml - when: namespace_check.stdout == "" - register: apply_output - - - name: Wait for local-path-provisioner to be ready - shell: kubectl wait --for=condition=Ready pod -l app=local-path-provisioner -n local-path-storage --timeout=120s - when: namespace_check.stdout == "" - - name: Create monitoring namespace command: kubectl create namespace monitoring - ignore_errors: yes - - - name: Install Helm - shell: curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash + when: monitoring_namespace_check.stdout == "" - name: Add Prometheus Helm repository command: helm repo add prometheus-community https://prometheus-community.github.io/helm-charts + environment: + http_proxy: "{{ http_proxy }}" + https_proxy: "{{ http_proxy }}" + no_proxy: "{{ no_proxy }}" - name: Update Helm repositories command: helm repo update + environment: + http_proxy: "{{ http_proxy }}" + https_proxy: "{{ http_proxy }}" + no_proxy: "{{ no_proxy }}" # Installing the CRDs needed - name: Helm install kube-prometheus-stack command: helm install kube-prometheus-stack prometheus-community/kube-prometheus-stack -n monitoring ignore_errors: yes + environment: + http_proxy: "{{ http_proxy }}" + https_proxy: "{{ http_proxy }}" + no_proxy: "{{ no_proxy }}" - name: Delete kube-prometheus-stack command: helm delete kube-prometheus-stack -n monitoring diff --git a/setup-scripts/setup-genai-studio/playbooks/deploy-mysqldb.yml b/setup-scripts/setup-genai-studio/playbooks/deploy-mysqldb.yml index 591d826..745438d 100644 --- a/setup-scripts/setup-genai-studio/playbooks/deploy-mysqldb.yml +++ b/setup-scripts/setup-genai-studio/playbooks/deploy-mysqldb.yml @@ -1,25 +1,11 @@ - name: Deploy mysql database hosts: localhost + vars_files: + - ../vars.yml tasks: - - name: Check if local-path-storage namespace exists - shell: kubectl get namespace local-path-storage --ignore-not-found - register: namespace_check - ignore_errors: yes - changed_when: false - - - name: Install local-path-provisioner if namespace does not exist - shell: kubectl apply -f https://raw.githubusercontent.com/rancher/local-path-provisioner/v0.0.30/deploy/local-path-storage.yaml - when: namespace_check.stdout == "" - register: apply_output - - - name: Wait for local-path-provisioner to be ready - shell: kubectl wait --for=condition=Ready pod -l app=local-path-provisioner -n local-path-storage --timeout=120s - when: namespace_check.stdout == "" - - name: Check if mysql namespace exists shell: kubectl get namespace mysql --ignore-not-found register: namespace_check - ignore_errors: yes changed_when: false - name: End playbook if mysql namespace exists @@ -28,15 +14,27 @@ - name: Add bitnami Helm repository command: helm repo add bitnami https://charts.bitnami.com/bitnami + environment: + http_proxy: "{{ http_proxy }}" + https_proxy: "{{ http_proxy }}" + no_proxy: "{{ no_proxy }}" - name: Update Helm repositories command: helm repo update + environment: + http_proxy: "{{ http_proxy }}" + https_proxy: "{{ http_proxy }}" + no_proxy: "{{ no_proxy }}" - name: Create 'mysql' namespace command: kubectl create ns mysql - name: Install MySQL using Helm command: helm install mysql bitnami/mysql -n mysql -f ../helm-values/mysqldb.yaml + environment: + http_proxy: "{{ http_proxy }}" + https_proxy: "{{ http_proxy }}" + no_proxy: "{{ no_proxy }}" - name: Wait for mysql-0 pod to be ready command: kubectl wait --for=condition=ready pod -l app.kubernetes.io/instance=mysql -n mysql --timeout=300s \ No newline at end of file diff --git a/setup-scripts/setup-genai-studio/playbooks/deploy-studio.yml b/setup-scripts/setup-genai-studio/playbooks/deploy-studio.yml index 1d2ecef..f28c231 100644 --- a/setup-scripts/setup-genai-studio/playbooks/deploy-studio.yml +++ b/setup-scripts/setup-genai-studio/playbooks/deploy-studio.yml @@ -3,29 +3,14 @@ vars_files: - ../vars.yml tasks: - - name: Check if local-path-storage namespace exists - shell: kubectl get namespace local-path-storage --ignore-not-found - register: namespace_check - ignore_errors: yes - changed_when: false - - - name: Install local-path-provisioner if namespace does not exist - shell: kubectl apply -f https://raw.githubusercontent.com/rancher/local-path-provisioner/v0.0.30/deploy/local-path-storage.yaml - when: namespace_check.stdout == "" - register: apply_output - - - name: Wait for local-path-provisioner to be ready - shell: kubectl wait --for=condition=Ready pod -l app=local-path-provisioner -n local-path-storage --timeout=120s - when: namespace_check.stdout == "" - - name: Check if studio namespace exists - command: kubectl get namespace studio - register: studio_namespace - ignore_errors: yes + shell: kubectl get namespace studio --ignore-not-found + register: studio_namespace_check + changed_when: false - name: Create studio namespace command: kubectl create namespace studio - when: studio_namespace.rc != 0 + when: studio_namespace_check.stdout == "" - name: Check for coredns service shell: kubectl get svc coredns -n kube-system --ignore-not-found @@ -38,16 +23,16 @@ when: coredns_check.stdout != '' - name: Check if app-tls exists in studio namespace - command: kubectl get secret app-tls -n studio + shell: kubectl get secret app-tls -n studio --ignore-not-found register: app_tls_secret_check - ignore_errors: yes + changed_when: false - name: Generate TLS certificate and create app-tls shell: | openssl req -x509 -nodes -days 365 -newkey rsa:4096 -keyout app-tls.key -out app-tls.crt -subj "/CN=studio/O=studio" kubectl create secret generic app-tls --from-file=app-tls.crt --from-file=app-tls.key -n studio rm app-tls.key app-tls.crt - when: app_tls_secret_check.rc != 0 + when: app_tls_secret_check.stdout == "" - name: Apply studio configuration command: kubectl apply -f ../studio-config.yaml diff --git a/setup-scripts/setup-genai-studio/playbooks/deploy-tracing.yml b/setup-scripts/setup-genai-studio/playbooks/deploy-tracing.yml index 75bed58..6a4859a 100644 --- a/setup-scripts/setup-genai-studio/playbooks/deploy-tracing.yml +++ b/setup-scripts/setup-genai-studio/playbooks/deploy-tracing.yml @@ -1,26 +1,12 @@ --- - name: Deploy clickhouse and otel collector for tracing hosts: localhost + vars_files: + - ../vars.yml tasks: - - name: Check if local-path-storage namespace exists - shell: kubectl get namespace local-path-storage --ignore-not-found - register: namespace_check - ignore_errors: yes - changed_when: false - - - name: Install local-path-provisioner if namespace does not exist - shell: kubectl apply -f https://raw.githubusercontent.com/rancher/local-path-provisioner/v0.0.30/deploy/local-path-storage.yaml - when: namespace_check.stdout == "" - register: apply_output - - - name: Wait for local-path-provisioner to be ready - shell: kubectl wait --for=condition=Ready pod -l app=local-path-provisioner -n local-path-storage --timeout=120s - when: namespace_check.stdout == "" - - name: Check if tracing namespace exists shell: kubectl get namespace tracing --ignore-not-found register: namespace_check - ignore_errors: yes changed_when: false - name: End playbook if tracing namespace exists @@ -29,15 +15,27 @@ - name: Add Pascaliske Helm repository command: helm repo add pascaliske https://charts.pascaliske.dev + environment: + http_proxy: "{{ http_proxy }}" + https_proxy: "{{ http_proxy }}" + no_proxy: "{{ no_proxy }}" - name: Update Helm repositories command: helm repo update + environment: + http_proxy: "{{ http_proxy }}" + https_proxy: "{{ http_proxy }}" + no_proxy: "{{ no_proxy }}" - name: Create 'tracing' namespace command: kubectl create ns tracing - name: Install Clickhouse Helm chart in 'tracing' namespace command: helm install clickhouse pascaliske/clickhouse -n tracing --set persistentVolumeClaim.storageClassName=local-path + environment: + http_proxy: "{{ http_proxy }}" + https_proxy: "{{ http_proxy }}" + no_proxy: "{{ no_proxy }}" - name: Wait for Clickhouse pod to be ready command: kubectl wait --namespace tracing --for=condition=ready pod -l app.kubernetes.io/name=clickhouse --timeout=120s @@ -47,12 +45,24 @@ - name: Add OpenTelemetry Helm repository command: helm repo add open-telemetry https://open-telemetry.github.io/opentelemetry-helm-charts + environment: + http_proxy: "{{ http_proxy }}" + https_proxy: "{{ http_proxy }}" + no_proxy: "{{ no_proxy }}" - name: Update Helm repositories command: helm repo update + environment: + http_proxy: "{{ http_proxy }}" + https_proxy: "{{ http_proxy }}" + no_proxy: "{{ no_proxy }}" - name: Install OpenTelemetry Collector Helm chart in 'tracing' namespace command: helm install tracing open-telemetry/opentelemetry-collector -n tracing -f ../helm-values/otel-collector.yaml + environment: + http_proxy: "{{ http_proxy }}" + https_proxy: "{{ http_proxy }}" + no_proxy: "{{ no_proxy }}" - name: Wait for OpenTelemetry Collector pod to be ready command: kubectl wait --namespace tracing --for=condition=ready pod -l app.kubernetes.io/name=opentelemetry-collector --timeout=120s \ No newline at end of file diff --git a/setup-scripts/setup-genai-studio/playbooks/install-prerequisites.yml b/setup-scripts/setup-genai-studio/playbooks/install-prerequisites.yml new file mode 100644 index 0000000..67ad486 --- /dev/null +++ b/setup-scripts/setup-genai-studio/playbooks/install-prerequisites.yml @@ -0,0 +1,29 @@ +--- +- name: Install prerequisites for GenAI Studio + hosts: localhost + vars_files: + - ../vars.yml + tasks: + - name: Check if Helm is installed + command: helm version --short + register: helm_check + failed_when: false + changed_when: false + + - name: Install Helm + shell: curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash + environment: + http_proxy: "{{ http_proxy }}" + https_proxy: "{{ http_proxy }}" + no_proxy: "{{ no_proxy }}" + when: helm_check.rc != 0 + + - name: Verify Helm installation + command: helm version --short + changed_when: false + + - name: Check if kubectl is available + command: kubectl version --client + register: kubectl_check + failed_when: kubectl_check.rc != 0 + changed_when: false diff --git a/setup-scripts/setup-genai-studio/playbooks/setup-local-storageclass.yml b/setup-scripts/setup-genai-studio/playbooks/setup-local-storageclass.yml new file mode 100644 index 0000000..af504c1 --- /dev/null +++ b/setup-scripts/setup-genai-studio/playbooks/setup-local-storageclass.yml @@ -0,0 +1,24 @@ +--- +- name: Setup local path storage provisioner + hosts: localhost + vars_files: + - ../vars.yml + tasks: + - name: Check if local-path-storage namespace exists + shell: kubectl get namespace local-path-storage --ignore-not-found + register: namespace_check + ignore_errors: yes + changed_when: false + + - name: Install local-path-provisioner if namespace does not exist + shell: kubectl apply -f https://raw.githubusercontent.com/rancher/local-path-provisioner/v0.0.30/deploy/local-path-storage.yaml + environment: + http_proxy: "{{ http_proxy }}" + https_proxy: "{{ http_proxy }}" + no_proxy: "{{ no_proxy }}" + when: namespace_check.stdout == "" + register: apply_output + + - name: Wait for local-path-provisioner to be ready + shell: kubectl wait --for=condition=Ready pod -l app=local-path-provisioner -n local-path-storage --timeout=120s + when: namespace_check.stdout == "" diff --git a/setup-scripts/setup-genai-studio/readme.md b/setup-scripts/setup-genai-studio/readme.md index 58815c6..e6d896f 100644 --- a/setup-scripts/setup-genai-studio/readme.md +++ b/setup-scripts/setup-genai-studio/readme.md @@ -32,3 +32,32 @@ Run below commands to do a /health test: ```sh curl http://localhost:30007/studio-backend/health ``` + +## Cleanup + +To completely remove GenAI Studio and all its components: + +```sh +./cleanup-genai-studio.sh +``` + +This script will: +- Delete all GenAI Studio namespaces (studio, monitoring, tracing, mysql) +- Remove all sandbox namespaces +- Clean up Helm releases +- Remove PVCs, secrets, and configmaps +- Provide detailed feedback on the cleanup process + +### Important Notes + +**Local Path Storage Preservation:** +The cleanup script intentionally **does NOT** remove the `local-path-storage` namespace because: +- It may be used by other applications beyond GenAI Studio +- Deleting it would break existing PVCs that use the `local-path` StorageClass +- It's a cluster-wide infrastructure component that should be managed separately + +If you need to remove local-path-storage after ensuring it's safe to do so: +```sh +kubectl delete namespace local-path-storage +kubectl delete storageclass local-path +``` \ No newline at end of file diff --git a/setup-scripts/setup-genai-studio/vars.yml b/setup-scripts/setup-genai-studio/vars.yml index d53d819..b277acf 100644 --- a/setup-scripts/setup-genai-studio/vars.yml +++ b/setup-scripts/setup-genai-studio/vars.yml @@ -1,5 +1,7 @@ -container_registry: 'opea' +# Container registry configuration +# Replace {{ ansible_default_ipv4.address }} with your Kubernetes master/API endpoint IP if needed +container_registry: '{{ ansible_default_ipv4.address }}:5000/opea' container_tag: 'latest' +mysql_host: 'mysql.mysql.svc.cluster.local' http_proxy: '' -no_proxy: '' -mysql_host: 'mysql.mysql.svc.cluster.local' \ No newline at end of file +no_proxy: 'localhost,127.0.0.1,.local,.svc.cluster.local,{{ ansible_default_ipv4.address }}' \ No newline at end of file diff --git a/setup-scripts/setup-onpremise-kubernetes/playbooks/setup-local-registry.yml b/setup-scripts/setup-onpremise-kubernetes/playbooks/setup-local-registry.yml index c14f7c4..56e6e73 100644 --- a/setup-scripts/setup-onpremise-kubernetes/playbooks/setup-local-registry.yml +++ b/setup-scripts/setup-onpremise-kubernetes/playbooks/setup-local-registry.yml @@ -97,7 +97,7 @@ content: | [Service] Environment="HTTP_PROXY={{ http_proxy }}" - Environment="HTTPS_PROXY={{ http_proxy }}" + Environment="HTTPS_PROXY={{ https_proxy }}" Environment="NO_PROXY={{ no_proxy }}" notify: - Reload systemd daemon diff --git a/studio-backend/Dockerfile b/studio-backend/Dockerfile index 00b557d..71848f0 100644 --- a/studio-backend/Dockerfile +++ b/studio-backend/Dockerfile @@ -1,6 +1,16 @@ # Use an official Python runtime as a parent image FROM python:3.11-slim +# Accept proxy build arguments +ARG http_proxy +ARG https_proxy +ARG no_proxy + +# Set proxy environment variables for package managers +ENV http_proxy=${http_proxy} +ENV https_proxy=${https_proxy} +ENV no_proxy=${no_proxy} + # Set the working directory in the container WORKDIR /usr/src/ diff --git a/studio-frontend/Dockerfile b/studio-frontend/Dockerfile index e3079f4..b30aed4 100644 --- a/studio-frontend/Dockerfile +++ b/studio-frontend/Dockerfile @@ -1,5 +1,15 @@ FROM node:23-alpine +# Accept proxy build arguments +ARG http_proxy +ARG https_proxy +ARG no_proxy + +# Set proxy environment variables for package managers +ENV http_proxy=${http_proxy} +ENV https_proxy=${https_proxy} +ENV no_proxy=${no_proxy} + # Install necessary packages RUN apk update && apk upgrade && \ apk add --no-cache gcompat python3 make g++ git \ From 0fdd86d34df9859bb7a73cc64d7f90b2010c6868 Mon Sep 17 00:00:00 2001 From: wwanarif Date: Wed, 27 Aug 2025 06:21:42 +0000 Subject: [PATCH 02/23] update build genaicomp script and fix proxy bugs Signed-off-by: wwanarif --- app-backend/megaservice.py | 2 +- app-backend/orchestrator.py | 4 -- .../buildpush-genaicomps-images.yml | 4 +- .../build-image-to-registry/vars.yml | 1 + .../manifests/studio-manifest.yaml | 16 ++++--- setup-scripts/setup-genai-studio/readme.md | 1 - .../setup-genai-studio/studio-config.yaml | 2 +- .../playbooks/setup-local-registry.yml | 2 +- studio-backend/app/routers/debuglog_router.py | 28 +++++++++++++ .../app/services/exporter_service.py | 16 ++++++- .../app/templates/app/app.manifest.yaml | 6 +++ .../microsvc-manifests/asr-usvc.yaml | 6 +-- .../microsvc-manifests/data-prep.yaml | 6 +-- .../microsvc-manifests/embedding-usvc.yaml | 6 +-- .../microsvc-manifests/reranking-usvc.yaml | 6 +-- .../microsvc-manifests/retriever-usvc.yaml | 6 +-- studio-backend/app/utils/exporter_utils.py | 2 +- .../app/utils/placeholders_utils.py | 18 +++++++- studio-frontend/packages/server/package.json | 3 +- .../server/src/services/chatflows/index.ts | 42 ++++++++++++++----- 20 files changed, 133 insertions(+), 44 deletions(-) diff --git a/app-backend/megaservice.py b/app-backend/megaservice.py index 3335323..4cb7e13 100644 --- a/app-backend/megaservice.py +++ b/app-backend/megaservice.py @@ -170,7 +170,7 @@ def add_remote_service(self): microservice_name = node['name'].split('@')[1] if "docsum" in microservice_name: self.is_docsum = True - service_node_ip = node_id.split('@')[1].replace('_','-') if USE_NODE_ID_AS_IP else HOST_IP + service_node_ip = f"opea-{node_id.split('@')[1].replace('_','-')}" if USE_NODE_ID_AS_IP else HOST_IP microservice = templates[microservice_name].get_service(host_ip=service_node_ip, node_id_as_ip=USE_NODE_ID_AS_IP, port=os.getenv(f"{node_id.split('@')[1]}_port", None)) microservice.name = node_id self.services[node_id] = microservice diff --git a/app-backend/orchestrator.py b/app-backend/orchestrator.py index f4b949b..f6357f6 100644 --- a/app-backend/orchestrator.py +++ b/app-backend/orchestrator.py @@ -272,7 +272,6 @@ async def execute( url=endpoint, data=json.dumps(inputs), headers={"Content-type": "application/json", "Authorization": f"Bearer {access_token}"}, - proxies={"http": None}, stream=True, timeout=2000, ) @@ -283,7 +282,6 @@ async def execute( headers={ "Content-type": "application/json", }, - proxies={"http": None}, stream=True, timeout=2000, ) @@ -316,7 +314,6 @@ def generate(): "Content-type": "application/json", "Authorization": f"Bearer {access_token}", }, - proxies={"http": None}, timeout=2000, ) else: @@ -326,7 +323,6 @@ def generate(): headers={ "Content-type": "application/json", }, - proxies={"http": None}, timeout=2000, ) res_json = res.json() diff --git a/setup-scripts/build-image-to-registry/buildpush-genaicomps-images.yml b/setup-scripts/build-image-to-registry/buildpush-genaicomps-images.yml index 3395114..02e1d4a 100755 --- a/setup-scripts/build-image-to-registry/buildpush-genaicomps-images.yml +++ b/setup-scripts/build-image-to-registry/buildpush-genaicomps-images.yml @@ -21,11 +21,11 @@ no_proxy: "{{ no_proxy }}" when: not genaicomp_dir.stat.exists - - name: Pull latest changes in GenAIComps repo + - name: Checkout specific GenAIComps tag git: repo: https://github.com/opea-project/GenAIComps.git dest: /tmp/GenAIComps - update: yes + version: "{{ genaicomps_tag }}" environment: http_proxy: "{{ http_proxy }}" https_proxy: "{{ http_proxy }}" diff --git a/setup-scripts/build-image-to-registry/vars.yml b/setup-scripts/build-image-to-registry/vars.yml index 65af073..7646ae8 100644 --- a/setup-scripts/build-image-to-registry/vars.yml +++ b/setup-scripts/build-image-to-registry/vars.yml @@ -2,5 +2,6 @@ # Replace {{ ansible_default_ipv4.address }} with your Kubernetes master/API endpoint IP if needed container_registry: '{{ ansible_default_ipv4.address }}:5000/opea' container_tag: 'latest' +genaicomps_tag: 'main' http_proxy: '' no_proxy: 'localhost,127.0.0.1,.local,.svc.cluster.local,{{ ansible_default_ipv4.address }}' \ No newline at end of file diff --git a/setup-scripts/setup-genai-studio/manifests/studio-manifest.yaml b/setup-scripts/setup-genai-studio/manifests/studio-manifest.yaml index ad97372..94d04c9 100644 --- a/setup-scripts/setup-genai-studio/manifests/studio-manifest.yaml +++ b/setup-scripts/setup-genai-studio/manifests/studio-manifest.yaml @@ -361,7 +361,7 @@ spec: - name: SBX_HTTP_PROXY value: ${HTTP_PROXY} - name: SBX_NO_PROXY - value: ${NO_PROXY} + value: ${NO_PROXY} envFrom: - configMapRef: name: studio-config @@ -449,6 +449,12 @@ spec: value: studio - name: DATABASE_SSL value: "false" + - name: HTTP_PROXY + value: "${HTTP_PROXY}" + - name: HTTPS_PROXY + value: "${HTTP_PROXY}" + - name: NO_PROXY + value: "${NO_PROXY}" ports: - name: studio-frontend containerPort: 8080 @@ -504,11 +510,11 @@ spec: curl "${DOWNLOAD_URL}" | tar -xz --strip-components=4 -C /opt/keycloak/data "${REPO}-${BRANCH}/${KC_ASSETS_DIR}/data" env: - name: http_proxy - value: "${HTTP_PROXY}" + value: ${HTTP_PROXY} - name: https_proxy - value: "${HTTP_PROXY}" - - name: no_proxy - value: "${NO_PROXY}" + value: ${HTTP_PROXY} + - name: NO_PROXY + value: ${NO_PROXY} envFrom: - configMapRef: name: studio-config diff --git a/setup-scripts/setup-genai-studio/readme.md b/setup-scripts/setup-genai-studio/readme.md index e6d896f..328e1e8 100644 --- a/setup-scripts/setup-genai-studio/readme.md +++ b/setup-scripts/setup-genai-studio/readme.md @@ -21,7 +21,6 @@ The genai-studio playbook script will: Run below commands: ```sh -curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash sudo apt install ansible -y ansible-playbook genai-studio.yml ``` diff --git a/setup-scripts/setup-genai-studio/studio-config.yaml b/setup-scripts/setup-genai-studio/studio-config.yaml index 8574900..af9d391 100644 --- a/setup-scripts/setup-genai-studio/studio-config.yaml +++ b/setup-scripts/setup-genai-studio/studio-config.yaml @@ -13,5 +13,5 @@ data: APP_FRONTEND_DNS: "app-frontend.$namespace.svc.cluster.local:5275" APP_BACKEND_DNS: "app-backend.$namespace.svc.cluster.local:8899" APP_CHATHISTORY_DNS: "chathistory-mongo.$namespace.svc.cluster.local:6012" - PREPARE_DOC_REDIS_PREP_DNS: "prepare-doc-redis-prep-0.$namespace.svc.cluster.local:6007" + PREPARE_DOC_REDIS_PREP_DNS: "opea-prepare-doc-redis-prep-0.$namespace.svc.cluster.local:6007" STUDIO_BACKEND_DNS: "studio-backend.studio.svc.cluster.local:5000" \ No newline at end of file diff --git a/setup-scripts/setup-onpremise-kubernetes/playbooks/setup-local-registry.yml b/setup-scripts/setup-onpremise-kubernetes/playbooks/setup-local-registry.yml index 56e6e73..c14f7c4 100644 --- a/setup-scripts/setup-onpremise-kubernetes/playbooks/setup-local-registry.yml +++ b/setup-scripts/setup-onpremise-kubernetes/playbooks/setup-local-registry.yml @@ -97,7 +97,7 @@ content: | [Service] Environment="HTTP_PROXY={{ http_proxy }}" - Environment="HTTPS_PROXY={{ https_proxy }}" + Environment="HTTPS_PROXY={{ http_proxy }}" Environment="NO_PROXY={{ no_proxy }}" notify: - Reload systemd daemon diff --git a/studio-backend/app/routers/debuglog_router.py b/studio-backend/app/routers/debuglog_router.py index 0d62100..760a072 100644 --- a/studio-backend/app/routers/debuglog_router.py +++ b/studio-backend/app/routers/debuglog_router.py @@ -85,6 +85,34 @@ def find_pod_dependencies(pod, all_pods, services, namespace, core_v1_api): # Combine all environment variables for further analysis all_env_vars = env_vars + init_env_vars + configmap_env_vars + # Special handling for app-backend pods - filter out dependent services + is_app_backend = pod.metadata.name and 'app-backend' in pod.metadata.name + if is_app_backend: + # For app-backend, we want to exclude references from dependent_services + # but keep direct OPEA service references + filtered_env_vars = [] + for env_val in all_env_vars: + # Skip if this looks like workflow-info.json content with dependent_services + if isinstance(env_val, str) and '"dependent_services"' in env_val: + # Parse the JSON to extract only direct service references, not dependent ones + try: + import json + workflow_data = json.loads(env_val) + if 'nodes' in workflow_data: + # Only include OPEA service names, not their dependencies + opea_services = [] + for node_id, node_data in workflow_data['nodes'].items(): + if node_data.get('name', '').startswith('opea_service@'): + opea_services.append(node_data['name']) + # Add these as simple strings for pattern matching + filtered_env_vars.extend(opea_services) + except: + # If JSON parsing fails, skip this env var + pass + else: + filtered_env_vars.append(env_val) + all_env_vars = filtered_env_vars + # # Debug output # print(f"Analyzing dependencies for pod: {pod.metadata.name}") # print(f"ConfigMap refs: {configmap_refs}") diff --git a/studio-backend/app/services/exporter_service.py b/studio-backend/app/services/exporter_service.py index 31d6911..0dd0d49 100644 --- a/studio-backend/app/services/exporter_service.py +++ b/studio-backend/app/services/exporter_service.py @@ -24,7 +24,21 @@ def convert_proj_info_to_manifest(proj_info_json, output_file=None): with open(service_file_path, "r") as service_file: service_manifest_read = service_file.read() service_manifest_raw = list(ordered_load_all(replace_dynamic_manifest_placeholder(service_manifest_read, service_info, proj_info_json), yaml.SafeLoader)) - service_manifest = [replace_manifest_placeholders(doc, service_info) for doc in service_manifest_raw] + # For app-backend, include all service endpoints in variables so it can connect to all services + if service_info.get('service_type') == 'app': + # Add only OPEA service endpoints to app-backend's variables + opea_service_endpoints = {} + for svc_name, svc_info in opea_services["services"].items(): + if 'endpoint' in svc_info and svc_info['endpoint'].startswith('opea-'): + # Clean the service name for use as variable key (remove @ symbols) + clean_svc_name = svc_name.replace('@', '_').replace('opea_service_', '') + opea_service_endpoints[f"{clean_svc_name}_endpoint"] = svc_info['endpoint'] + + # Merge with existing service_info + enhanced_service_info = {**service_info, **opea_service_endpoints} + service_manifest = [replace_manifest_placeholders(doc, enhanced_service_info) for doc in service_manifest_raw] + else: + service_manifest = [replace_manifest_placeholders(doc, service_info) for doc in service_manifest_raw] output_manifest.extend((doc, service_name) for doc in service_manifest) # print("Manifest generation completed.") diff --git a/studio-backend/app/templates/app/app.manifest.yaml b/studio-backend/app/templates/app/app.manifest.yaml index 9b43420..0395a6e 100644 --- a/studio-backend/app/templates/app/app.manifest.yaml +++ b/studio-backend/app/templates/app/app.manifest.yaml @@ -53,6 +53,12 @@ spec: value: 'true' - name: LOGFLAG value: 'True' + - name: http_proxy + value: "${HTTP_PROXY}" + - name: https_proxy + value: "${HTTP_PROXY}" + - name: no_proxy + value: "${NO_PROXY}" __TELEMETRY_ENDPOINT__ securityContext: allowPrivilegeEscalation: false diff --git a/studio-backend/app/templates/microsvc-manifests/asr-usvc.yaml b/studio-backend/app/templates/microsvc-manifests/asr-usvc.yaml index cff1ece..868643b 100644 --- a/studio-backend/app/templates/microsvc-manifests/asr-usvc.yaml +++ b/studio-backend/app/templates/microsvc-manifests/asr-usvc.yaml @@ -10,9 +10,9 @@ metadata: data: HEALTHCHECK_ENDPOINT: "{whisper_endpoint}:{whisper_port}" ASR_ENDPOINT: "http://{whisper_endpoint}:{whisper_port}" - http_proxy: "" - https_proxy: "" - no_proxy: "" + http_proxy: "${HTTP_PROXY}" + https_proxy: "${HTTP_PROXY}" + no_proxy: "${NO_PROXY}" LOGFLAG: "True" --- diff --git a/studio-backend/app/templates/microsvc-manifests/data-prep.yaml b/studio-backend/app/templates/microsvc-manifests/data-prep.yaml index 734821a..31337b6 100644 --- a/studio-backend/app/templates/microsvc-manifests/data-prep.yaml +++ b/studio-backend/app/templates/microsvc-manifests/data-prep.yaml @@ -17,9 +17,9 @@ data: SEARCH_BATCH_SIZE: "10" HF_TOKEN: "{huggingFaceToken}" HF_HOME: "/tmp/.cache/huggingface" - http_proxy: "" - https_proxy: "" - no_proxy: "" + http_proxy: "${HTTP_PROXY}" + https_proxy: "${HTTP_PROXY}" + no_proxy: "${NO_PROXY}" LOGFLAG: "True" --- # Source: data-prep/templates/service.yaml diff --git a/studio-backend/app/templates/microsvc-manifests/embedding-usvc.yaml b/studio-backend/app/templates/microsvc-manifests/embedding-usvc.yaml index 997d13c..e6befed 100644 --- a/studio-backend/app/templates/microsvc-manifests/embedding-usvc.yaml +++ b/studio-backend/app/templates/microsvc-manifests/embedding-usvc.yaml @@ -10,9 +10,9 @@ metadata: data: HEALTHCHECK_ENDPOINT: "{tei_endpoint}:{tei_port}" TEI_EMBEDDING_ENDPOINT: "http://{tei_endpoint}:{tei_port}" - http_proxy: "" - https_proxy: "" - no_proxy: "" + http_proxy: "${HTTP_PROXY}" + https_proxy: "${HTTP_PROXY}" + no_proxy: "${NO_PROXY}" TRANSFORMERS_CACHE: "/tmp/transformers_cache" HF_HOME: "/tmp/.cache/huggingface" LOGFLAG: "True" diff --git a/studio-backend/app/templates/microsvc-manifests/reranking-usvc.yaml b/studio-backend/app/templates/microsvc-manifests/reranking-usvc.yaml index 07cec32..4916907 100644 --- a/studio-backend/app/templates/microsvc-manifests/reranking-usvc.yaml +++ b/studio-backend/app/templates/microsvc-manifests/reranking-usvc.yaml @@ -10,9 +10,9 @@ metadata: data: HEALTHCHECK_ENDPOINT: "{tei_endpoint}:{tei_port}" TEI_RERANKING_ENDPOINT: "http://{tei_endpoint}:{tei_port}" - http_proxy: "" - https_proxy: "" - no_proxy: "" + http_proxy: "${HTTP_PROXY}" + https_proxy: "${HTTP_PROXY}" + no_proxy: "${NO_PROXY}" LOGFLAG: "True" --- # Source: reranking-usvc/templates/service.yaml diff --git a/studio-backend/app/templates/microsvc-manifests/retriever-usvc.yaml b/studio-backend/app/templates/microsvc-manifests/retriever-usvc.yaml index a508bb1..7b09a81 100644 --- a/studio-backend/app/templates/microsvc-manifests/retriever-usvc.yaml +++ b/studio-backend/app/templates/microsvc-manifests/retriever-usvc.yaml @@ -14,9 +14,9 @@ data: REDIS_URL: "redis://{redis_vector_store_endpoint}:{redis_vector_store_port}" INDEX_NAME: "rag-redis" EASYOCR_MODULE_PATH: "/tmp/.EasyOCR" - http_proxy: "" - https_proxy: "" - no_proxy: "" + http_proxy: "${HTTP_PROXY}" + https_proxy: "${HTTP_PROXY}" + no_proxy: "${NO_PROXY}" HF_HOME: "/tmp/.cache/huggingface" HF_TOKEN: "{huggingFaceToken}" LOGFLAG: "True" diff --git a/studio-backend/app/utils/exporter_utils.py b/studio-backend/app/utils/exporter_utils.py index 0fd6fe7..766034a 100644 --- a/studio-backend/app/utils/exporter_utils.py +++ b/studio-backend/app/utils/exporter_utils.py @@ -206,7 +206,7 @@ def process_opea_services(proj_info_json): # Remove the 'opea_service@' prefix and append the node_name suffix if any node_suffix = node_name.split('_')[-1] if '_' in node_name else '' service_type_cleaned = node_info['service_type'].replace('opea_service@', '') - opea_service_endpoint = f"{service_type_cleaned.replace('_','-')}-{node_suffix}".strip('-') + opea_service_endpoint = f"opea-{service_type_cleaned.replace('_','-')}-{node_suffix}".strip('-') # Iterate through the dependent_services to map to the service info for service_type, service_info in node_info.get('dependent_services', {}).items(): diff --git a/studio-backend/app/utils/placeholders_utils.py b/studio-backend/app/utils/placeholders_utils.py index 6e686fd..17d4fca 100644 --- a/studio-backend/app/utils/placeholders_utils.py +++ b/studio-backend/app/utils/placeholders_utils.py @@ -63,7 +63,23 @@ def replace_manifest_placeholders(obj, variables): value = value.replace("${REGISTRY}", os.getenv("REGISTRY", "opea")) value = value.replace("${TAG}", os.getenv("TAG", "latest")) value = value.replace("${HTTP_PROXY}", os.getenv("SBX_HTTP_PROXY", "")) - value = value.replace("${NO_PROXY}", os.getenv("SBX_NO_PROXY", "")) + + # Enhanced NO_PROXY handling - extract service hostnames from variables + base_no_proxy = os.getenv("SBX_NO_PROXY", "") + if "${NO_PROXY}" in value and variables: + service_hostnames = [] + # Extract hostnames from all services in variables + for var_key, var_value in variables.items(): + if var_key.endswith('_endpoint') and isinstance(var_value, str): + service_hostnames.append(var_value) + + if service_hostnames: + enhanced_no_proxy = f"{base_no_proxy},{','.join(service_hostnames)}" if base_no_proxy else ','.join(service_hostnames) + value = value.replace("${NO_PROXY}", enhanced_no_proxy) + else: + value = value.replace("${NO_PROXY}", base_no_proxy) + else: + value = value.replace("${NO_PROXY}", base_no_proxy) # Attempt to replace placeholders in the string formatted_value = value.format(**variables) # If the key is a port-related field and the formatted value is a digit, convert to int diff --git a/studio-frontend/packages/server/package.json b/studio-frontend/packages/server/package.json index 5ab2abd..29ca02a 100644 --- a/studio-frontend/packages/server/package.json +++ b/studio-frontend/packages/server/package.json @@ -85,7 +85,8 @@ "sqlite3": "^5.1.6", "typeorm": "^0.3.6", "uuid": "^9.0.1", - "winston": "^3.9.0" + "winston": "^3.9.0", + "https-proxy-agent": "^7.0.4" }, "devDependencies": { "@types/content-disposition": "0.5.8", diff --git a/studio-frontend/packages/server/src/services/chatflows/index.ts b/studio-frontend/packages/server/src/services/chatflows/index.ts index 4ae6c1d..d5abbcd 100644 --- a/studio-frontend/packages/server/src/services/chatflows/index.ts +++ b/studio-frontend/packages/server/src/services/chatflows/index.ts @@ -13,8 +13,25 @@ import { containsBase64File, updateFlowDataWithFilePaths } from '../../utils/fil import { getRunningExpressApp } from '../../utils/getRunningExpressApp' import { utilGetUploadsConfig } from '../../utils/getUploadsConfig' import logger from '../../utils/logger' -import axios from 'axios' -import { Readable } from 'stream' +import axios, { AxiosRequestConfig } from 'axios' +import { HttpsProxyAgent } from 'https-proxy-agent' + +// Configure github axios to support HTTP_PROXY/HTTPS_PROXY environment variables +const getGithubAxiosConfig = (): AxiosRequestConfig => { + const http_proxy = process.env.http_proxy || process.env.HTTP_PROXY + const agent = (http_proxy && http_proxy.trim() !== "") ? new HttpsProxyAgent(http_proxy) : undefined + + return { + headers: { + Accept: 'application/vnd.github.v3+json', + }, + proxy: false, + ...(agent && { + httpAgent: agent, + httpsAgent: agent, + }), + } +} const STUDIO_SERVER_URL = process.env.STUDIO_SERVER_URL || 'http://studio-backend.studio.svc.cluster.local:5000' @@ -161,17 +178,23 @@ const getAllChatflowsbyUserId = async (userid: string, type?: ChatflowType): Pro const importSampleChatflowsbyUserId = async (userid: string, type?: ChatflowType): Promise => { try { - const response = await axios.get('https://api.github.com/repos/opea-project/GenAIStudio/contents/sample-workflows'); + const axiosConfig = getGithubAxiosConfig() + + console.log('Importing sample chatflows for user:', userid); + + const response = await axios.get( + 'https://api.github.com/repos/opea-project/GenAIStudio/contents/sample-workflows', + axiosConfig + ); + + console.log('Response from GitHub:', response.data); + const files = response.data.filter((item: any) => item.type === 'file'); - console.log(`Number of files: ${files.length}`); const chatflows: Partial[] = []; - for (const file of files) { - console.log(`Download URL: ${file.download_url}`); - const fileResponse = await axios.get(file.download_url); + const fileResponse = await axios.get(file.download_url, axiosConfig); const parsedFlowData = fileResponse.data; - const newChatflow: Partial = { userid: userid, name: file.name.replace('.json', ''), @@ -180,7 +203,6 @@ const importSampleChatflowsbyUserId = async (userid: string, type?: ChatflowType deployed: false, isPublic: false }; - chatflows.push(newChatflow); } const insertResponse = await importChatflows(chatflows); @@ -617,4 +639,4 @@ export default { getSinglePublicChatbotConfig, oneClickDeploymentService, updateDeploymentStatus -} +} \ No newline at end of file From 14c6b08db2e89ea1fb9401bad7d4f7a9a12b865a Mon Sep 17 00:00:00 2001 From: wwanarif Date: Thu, 28 Aug 2025 15:51:25 +0000 Subject: [PATCH 03/23] enable development env for studio-frontend Signed-off-by: wwanarif --- studio-frontend/docker-compose.dev.yml | 28 +++++ studio-frontend/packages/ui/.env.development | 2 + .../packages/ui/src/KeycloakContext.jsx | 106 ++++++++++++------ 3 files changed, 102 insertions(+), 34 deletions(-) create mode 100644 studio-frontend/docker-compose.dev.yml create mode 100644 studio-frontend/packages/ui/.env.development diff --git a/studio-frontend/docker-compose.dev.yml b/studio-frontend/docker-compose.dev.yml new file mode 100644 index 0000000..5d5133c --- /dev/null +++ b/studio-frontend/docker-compose.dev.yml @@ -0,0 +1,28 @@ +version: '3.8' + +services: + studio-frontend: + build: + context: . + dockerfile: Dockerfile + args: + - http_proxy=${http_proxy} + - https_proxy=${https_proxy} + - no_proxy=${no_proxy} + container_name: studio-frontend-dev + ports: + - "3000:3000" + - "8088:8088" + volumes: + - .:/usr/src + - /usr/src/node_modules + - /usr/src/.pnpm-store + - /usr/src/packages/ui/build + command: ["sh", "-c", "pnpm dev"] + environment: + - http_proxy=${http_proxy} + - https_proxy=${https_proxy} + - no_proxy=${no_proxy} + stdin_open: true + tty: true + restart: unless-stopped \ No newline at end of file diff --git a/studio-frontend/packages/ui/.env.development b/studio-frontend/packages/ui/.env.development new file mode 100644 index 0000000..3269e6c --- /dev/null +++ b/studio-frontend/packages/ui/.env.development @@ -0,0 +1,2 @@ +NODE_TLS_REJECT_UNAUTHORIZED=0 +VITE_DISABLE_KEYCLOAK=true \ No newline at end of file diff --git a/studio-frontend/packages/ui/src/KeycloakContext.jsx b/studio-frontend/packages/ui/src/KeycloakContext.jsx index 9753ee6..48b9001 100644 --- a/studio-frontend/packages/ui/src/KeycloakContext.jsx +++ b/studio-frontend/packages/ui/src/KeycloakContext.jsx @@ -1,51 +1,89 @@ import React, { createContext, useContext, useEffect, useState } from 'react'; -import Keycloak from 'keycloak-js'; // Create the Keycloak context const KeycloakContext = createContext(null); +// Check if Keycloak is disabled via environment variable +const isKeycloakDisabled = import.meta.env.VITE_DISABLE_KEYCLOAK === 'true'; +console.log('isKeycloakDisabled: ', isKeycloakDisabled); + +// Simple user object for when Keycloak is disabled +const createAdminUser = () => ({ + authenticated: true, + tokenParsed: { + email: 'admin@admin.com', + preferred_username: 'admin', + name: 'Admin User', + given_name: 'Admin', + family_name: 'User', + resource_access: { + genaistudio: { + roles: ['admin'] + } + } + }, + logout: () => { + console.log('Logout called - refreshing page'); + window.location.href = '/'; + } +}); + // Provide the Keycloak context to the application export const KeycloakProvider = ({ children }) => { const [keycloak, setKeycloak] = useState(null); const [isInitialized, setIsInitialized] = useState(false); useEffect(() => { - if (!window.crypto || !window.crypto.subtle) { - console.error("Web Crypto API is not available. This may cause security issues."); + // If Keycloak is disabled, use simple admin user + if (isKeycloakDisabled) { + console.info("Keycloak authentication is disabled. Using admin@admin.com as default user."); + const adminUser = createAdminUser(); + setKeycloak(adminUser); + setIsInitialized(true); + return; } - const initOptions = { - url: '/auth/', - realm: 'genaistudio', - clientId: 'genaistudio', - onLoad: 'login-required', // check-sso | login-required - responseType: 'code', // Corrected from KeycloakResponseType to responseType - silentCheckSsoRedirectUri: window.location.origin + "/silent-check-sso.html", - checkLoginIframe: false, - }; - - const kc = new Keycloak(initOptions); - - kc.init({ - onLoad: initOptions.onLoad, - responseType: 'code', // Corrected from KeycloakResponseType to responseType - }).then((auth) => { - if (!auth) { - window.location.reload(); - } else { - console.info("Authenticated"); - console.log('auth', auth); - console.log('Keycloak', kc); - - kc.onTokenExpired = () => { - console.log('token expired'); - }; - - setKeycloak(kc); // Set the Keycloak instance in state - setIsInitialized(true); // Mark initialization as complete + // Keycloak is enabled - dynamically import and initialize + import('keycloak-js').then((KeycloakModule) => { + const Keycloak = KeycloakModule.default; + + if (!window.crypto || !window.crypto.subtle) { + console.error("Web Crypto API is not available. This may cause security issues."); } - }).catch((error) => { - console.error("Authentication Failed", error); + + const initOptions = { + url: '/auth/', + realm: 'genaistudio', + clientId: 'genaistudio', + onLoad: 'login-required', + responseType: 'code', + silentCheckSsoRedirectUri: window.location.origin + "/silent-check-sso.html", + checkLoginIframe: false, + }; + + const kc = new Keycloak(initOptions); + + kc.init({ + onLoad: initOptions.onLoad, + responseType: 'code', + }).then((auth) => { + if (!auth) { + window.location.reload(); + } else { + console.info("Authenticated with Keycloak"); + console.log('auth', auth); + console.log('Keycloak', kc); + + kc.onTokenExpired = () => { + console.log('token expired'); + }; + + setKeycloak(kc); + setIsInitialized(true); + } + }).catch((error) => { + console.error("Authentication Failed", error); + }); }); }, []); From 8d75898f619cd076fadb10ecff7ea07705a05ea6 Mon Sep 17 00:00:00 2001 From: wwanarif Date: Fri, 29 Aug 2025 08:17:53 +0000 Subject: [PATCH 04/23] initial push for finetuning UI Signed-off-by: wwanarif --- .../packages/ui/src/api/finetuning.js | 62 ++ .../ui/src/layout/MainLayout/Header/index.jsx | 35 +- .../MenuList/CollapsedMenuList/index.jsx | 104 +++ .../Sidebar/MenuList/NavGroup/index.jsx | 4 +- .../src/layout/MainLayout/Sidebar/index.jsx | 210 ++++-- .../ui/src/layout/MainLayout/ViewHeader.jsx | 5 +- .../ui/src/layout/MainLayout/index.jsx | 62 +- .../packages/ui/src/menu-items/dashboard.js | 28 +- .../packages/ui/src/routes/MainRoutes.jsx | 7 + .../packages/ui/src/store/constant.js | 1 + .../ui/src/ui-component/cards/MainCard.jsx | 10 +- .../ui/src/ui-component/extended/Logo.jsx | 3 +- .../src/ui-component/table/FlowListTable.jsx | 3 +- .../src/views/finetuning/FileUploadArea.jsx | 318 ++++++++ .../views/finetuning/FinetuningJobModal.jsx | 701 ++++++++++++++++++ .../views/finetuning/FinetuningJobsTable.jsx | 211 ++++++ .../ui/src/views/finetuning/index.jsx | 175 +++++ .../packages/ui/src/views/opeaflows/index.jsx | 95 +-- 18 files changed, 1874 insertions(+), 160 deletions(-) create mode 100644 studio-frontend/packages/ui/src/api/finetuning.js create mode 100644 studio-frontend/packages/ui/src/layout/MainLayout/Sidebar/MenuList/CollapsedMenuList/index.jsx create mode 100644 studio-frontend/packages/ui/src/views/finetuning/FileUploadArea.jsx create mode 100644 studio-frontend/packages/ui/src/views/finetuning/FinetuningJobModal.jsx create mode 100644 studio-frontend/packages/ui/src/views/finetuning/FinetuningJobsTable.jsx create mode 100644 studio-frontend/packages/ui/src/views/finetuning/index.jsx diff --git a/studio-frontend/packages/ui/src/api/finetuning.js b/studio-frontend/packages/ui/src/api/finetuning.js new file mode 100644 index 0000000..3c7bd6d --- /dev/null +++ b/studio-frontend/packages/ui/src/api/finetuning.js @@ -0,0 +1,62 @@ +import client from './client' + +const finetuningApi = { + // Get all fine-tuning jobs + getAllJobs: () => client.get('/finetuning/jobs'), + + // Create new fine-tuning job with OpenAI API format + createJob: (jobData) => { + const payload = { + model: jobData.model, + training_file: jobData.training_file_id, + validation_file: jobData.validation_file_id, + hyperparameters: { + n_epochs: jobData.hyperparameters.n_epochs, + batch_size: jobData.hyperparameters.batch_size, + learning_rate_multiplier: jobData.hyperparameters.learning_rate_multiplier, + prompt_loss_weight: jobData.hyperparameters.prompt_loss_weight + }, + suffix: jobData.suffix + } + + return client.post('/finetuning/jobs', payload) + }, + + // Get specific fine-tuning job + getJob: (jobId) => client.get(`/finetuning/jobs/${jobId}`), + + // Delete fine-tuning job + deleteJob: (jobId) => client.delete(`/finetuning/jobs/${jobId}`), + + // Upload dataset file with suffix + uploadFile: (file, suffix, onUploadProgress) => { + const formData = new FormData() + + // Generate suffixed filename + const fileExtension = '.' + file.name.split('.').pop() + const baseFileName = file.name.replace(fileExtension, '') + const suffixedFileName = `${baseFileName}-${suffix}${fileExtension}` + + // Append file with suffixed name + formData.append('file', file, suffixedFileName) + formData.append('purpose', 'fine-tune') // OpenAI API requirement + formData.append('suffix', suffix) + + return client.post('/files/upload', formData, { + headers: { + 'Content-Type': 'multipart/form-data' + }, + onUploadProgress + }) + }, + + // Get available base models + getBaseModels: () => client.get('/finetuning/models'), + + // Download fine-tuned model + downloadModel: (jobId) => client.get(`/finetuning/jobs/${jobId}/download`, { + responseType: 'blob' + }) +} + +export default finetuningApi \ No newline at end of file diff --git a/studio-frontend/packages/ui/src/layout/MainLayout/Header/index.jsx b/studio-frontend/packages/ui/src/layout/MainLayout/Header/index.jsx index f276535..5ddb510 100644 --- a/studio-frontend/packages/ui/src/layout/MainLayout/Header/index.jsx +++ b/studio-frontend/packages/ui/src/layout/MainLayout/Header/index.jsx @@ -5,8 +5,9 @@ import { useNavigate } from 'react-router-dom' // material-ui import { useTheme } from '@mui/material/styles' -import { Avatar, Box, ButtonBase, Switch } from '@mui/material' +import { Avatar, Box, ButtonBase, Switch, Typography, IconButton, useMediaQuery } from '@mui/material' import { styled } from '@mui/material/styles' +import MenuIcon from '@mui/icons-material/Menu' // project imports import LogoSection from '../LogoSection' @@ -87,9 +88,10 @@ const MaterialUISwitch = styled(Switch)(({ theme }) => ({ } })) -const Header = ({userId}) => { +const Header = ({userId, handleLeftDrawerToggle}) => { // console.log ('Header', userId) const theme = useTheme() + const matchDownMd = useMediaQuery(theme.breakpoints.down('md')) // const navigate = useNavigate() // const customization = useSelector((state) => state.customization) @@ -122,18 +124,39 @@ const Header = ({userId}) => { width: '100%', // Full width of the parent container }} > - {/* Logo Section */} + {/* Left Section - Mobile menu + Logo */} - + {/* Mobile menu button */} + {matchDownMd && handleLeftDrawerToggle && ( + + + + )} + + {/* Logo - always visible on mobile, hidden on desktop in header */} + + + + + {/* Desktop logo - hidden on mobile */} + diff --git a/studio-frontend/packages/ui/src/layout/MainLayout/Sidebar/MenuList/CollapsedMenuList/index.jsx b/studio-frontend/packages/ui/src/layout/MainLayout/Sidebar/MenuList/CollapsedMenuList/index.jsx new file mode 100644 index 0000000..8d24ca3 --- /dev/null +++ b/studio-frontend/packages/ui/src/layout/MainLayout/Sidebar/MenuList/CollapsedMenuList/index.jsx @@ -0,0 +1,104 @@ +import { forwardRef } from 'react' +import { Link } from 'react-router-dom' +import { useDispatch, useSelector } from 'react-redux' + +// material-ui +import { useTheme } from '@mui/material/styles' +import { Box, IconButton, Tooltip, useMediaQuery } from '@mui/material' + +// project imports +import { MENU_OPEN, SET_MENU } from '@/store/actions' +import config from '@/config' +import menuItem from '@/menu-items' + +// ==============================|| COLLAPSED SIDEBAR MENU LIST ||============================== // + +const CollapsedMenuList = () => { + const theme = useTheme() + const dispatch = useDispatch() + const customization = useSelector((state) => state.customization) + const matchesSM = useMediaQuery(theme.breakpoints.down('lg')) + + // Get all menu items + const getAllMenuItems = (items) => { + let allItems = [] + items.forEach(item => { + if (item.type === 'group' && item.children) { + item.children.forEach(child => { + if (child.type === 'item') { + allItems.push(child) + } + }) + } + }) + return allItems + } + + const menuItems = getAllMenuItems(menuItem.items) + + const itemHandler = (item) => { + dispatch({ type: MENU_OPEN, id: item.id }) + if (matchesSM) dispatch({ type: SET_MENU, opened: false }) + } + + const CollapsedNavItem = ({ item }) => { + const Icon = item.icon + const isSelected = customization.isOpen.findIndex((id) => id === item.id) > -1 + + let itemTarget = '_self' + if (item.target) { + itemTarget = '_blank' + } + + let linkProps = { + component: forwardRef(function CollapsedNavItemComponent(props, ref) { + return + }) + } + if (item?.external) { + linkProps = { component: 'a', href: item.url, target: itemTarget } + } + + return ( + + itemHandler(item)} + sx={{ + width: '40px', + height: '40px', + margin: '4px 0', + backgroundColor: isSelected ? theme.palette.action.selected : 'transparent', + color: isSelected ? theme.palette.primary.main : theme.palette.text.secondary, + '&:hover': { + backgroundColor: theme.palette.action.hover, + color: theme.palette.primary.main + }, + borderRadius: '8px' + }} + disabled={item.disabled} + > + {item.icon ? : null} + + + ) + } + + return ( + + {menuItems.map((item) => ( + + ))} + + ) +} + +export default CollapsedMenuList \ No newline at end of file diff --git a/studio-frontend/packages/ui/src/layout/MainLayout/Sidebar/MenuList/NavGroup/index.jsx b/studio-frontend/packages/ui/src/layout/MainLayout/Sidebar/MenuList/NavGroup/index.jsx index 0625d54..f93ecfd 100644 --- a/studio-frontend/packages/ui/src/layout/MainLayout/Sidebar/MenuList/NavGroup/index.jsx +++ b/studio-frontend/packages/ui/src/layout/MainLayout/Sidebar/MenuList/NavGroup/index.jsx @@ -44,13 +44,11 @@ const NavGroup = ({ item }) => { ) } - sx={{ py: '20px' }} + sx={{ py: 0 }} > {items} - {/* group divider */} - ) } diff --git a/studio-frontend/packages/ui/src/layout/MainLayout/Sidebar/index.jsx b/studio-frontend/packages/ui/src/layout/MainLayout/Sidebar/index.jsx index efdda72..6ce5c01 100644 --- a/studio-frontend/packages/ui/src/layout/MainLayout/Sidebar/index.jsx +++ b/studio-frontend/packages/ui/src/layout/MainLayout/Sidebar/index.jsx @@ -2,7 +2,10 @@ import PropTypes from 'prop-types' // material-ui import { useTheme } from '@mui/material/styles' -import { Box, Drawer, useMediaQuery } from '@mui/material' +import { Box, Drawer, useMediaQuery, IconButton, Tooltip, Typography } from '@mui/material' +import ChevronLeftIcon from '@mui/icons-material/ChevronLeft' +import ChevronRightIcon from '@mui/icons-material/ChevronRight' +import MenuIcon from '@mui/icons-material/Menu' // third-party import PerfectScrollbar from 'react-perfect-scrollbar' @@ -10,8 +13,9 @@ import { BrowserView, MobileView } from 'react-device-detect' // project imports import MenuList from './MenuList' +import CollapsedMenuList from './MenuList/CollapsedMenuList' import LogoSection from '../LogoSection' -import { drawerWidth, headerHeight } from '@/store/constant' +import { drawerWidth, drawerWidthCollapsed, headerHeight } from '@/store/constant' // ==============================|| SIDEBAR DRAWER ||============================== // @@ -19,23 +23,86 @@ const Sidebar = ({ drawerOpen, drawerToggle, window }) => { const theme = useTheme() const matchUpMd = useMediaQuery(theme.breakpoints.up('md')) - const drawer = ( + // Desktop collapsed drawer content + const collapsedDrawer = ( + + {/* Sidebar icon when collapsed with tooltip */} + + + + + + + + + {/* Collapsed Menu Items */} + + + + + ) + + // Desktop expanded drawer content + const expandedDrawer = ( <> + {/* Header with GenAI Studio text and collapse button */} - - - + + GenAI Studio + + + + + + + + + {/* Menu content */} { - - - - - ) - const container = window !== undefined ? () => window.document.body : undefined - - return ( - - + - {drawer} - - + + GenAI Studio + + + + + + + {/* Mobile Menu content */} + + + + + ) + + const container = window !== undefined ? () => window.document.body : undefined + + return ( + <> + {/* Desktop Sidebar - Always present, changes width */} + {matchUpMd && ( + + {drawerOpen ? expandedDrawer : collapsedDrawer} + + )} + + {/* Mobile Sidebar - Overlay */} + {!matchUpMd && ( + + {mobileDrawer} + + )} + ) } diff --git a/studio-frontend/packages/ui/src/layout/MainLayout/ViewHeader.jsx b/studio-frontend/packages/ui/src/layout/MainLayout/ViewHeader.jsx index 9648888..cf39de8 100644 --- a/studio-frontend/packages/ui/src/layout/MainLayout/ViewHeader.jsx +++ b/studio-frontend/packages/ui/src/layout/MainLayout/ViewHeader.jsx @@ -24,17 +24,18 @@ const ViewHeader = ({ const theme = useTheme() return ( - + - + {isBackButton && ( diff --git a/studio-frontend/packages/ui/src/layout/MainLayout/index.jsx b/studio-frontend/packages/ui/src/layout/MainLayout/index.jsx index f53facc..491708f 100644 --- a/studio-frontend/packages/ui/src/layout/MainLayout/index.jsx +++ b/studio-frontend/packages/ui/src/layout/MainLayout/index.jsx @@ -4,12 +4,13 @@ import { Outlet } from 'react-router-dom' // material-ui import { styled, useTheme } from '@mui/material/styles' -import { AppBar, Box, Chip, CssBaseline, Toolbar, useMediaQuery } from '@mui/material' +import { AppBar, Box, Chip, CssBaseline, Toolbar, useMediaQuery, IconButton, Fab } from '@mui/material' +import ChevronRightIcon from '@mui/icons-material/ChevronRight' // project imports import Header from './Header' import Sidebar from './Sidebar' -import { drawerWidth, headerHeight } from '@/store/constant' +import { drawerWidth, drawerWidthCollapsed, headerHeight } from '@/store/constant' import { SET_MENU } from '@/store/actions' import {useKeycloak } from '../../KeycloakContext.jsx' @@ -17,43 +18,31 @@ import {useKeycloak } from '../../KeycloakContext.jsx' // styles const Main = styled('main', { shouldForwardProp: (prop) => prop !== 'open' })(({ theme, open }) => ({ ...theme.typography.mainContent, - ...(!open && { - backgroundColor: 'transparent', - borderBottomLeftRadius: 0, - borderBottomRightRadius: 0, - transition: theme.transitions.create('all', { - easing: theme.transitions.easing.sharp, - duration: theme.transitions.duration.leavingScreen - }), - marginRight: 0, - [theme.breakpoints.up('md')]: { - marginLeft: -drawerWidth, - width: `calc(100% - ${drawerWidth}px)` - }, - [theme.breakpoints.down('md')]: { - marginLeft: '20px', - width: `calc(100% - ${drawerWidth}px)`, - padding: '16px' - }, - [theme.breakpoints.down('sm')]: { - marginLeft: '10px', - width: `calc(100% - ${drawerWidth}px)`, - padding: '16px', - marginRight: '10px' - } + backgroundColor: 'transparent', + borderBottomLeftRadius: 0, + borderBottomRightRadius: 0, + transition: theme.transitions.create(['margin', 'width'], { + easing: theme.transitions.easing.sharp, + duration: theme.transitions.duration.leavingScreen }), - ...(open && { - backgroundColor: 'transparent', - transition: theme.transitions.create('all', { + marginRight: 0, + [theme.breakpoints.up('md')]: { + marginLeft: 0, + width: `calc(100% - ${open ? drawerWidth : drawerWidthCollapsed}px)`, + transition: theme.transitions.create(['margin', 'width'], { easing: theme.transitions.easing.easeOut, duration: theme.transitions.duration.enteringScreen }), + paddingLeft: '8px', + paddingRight: '8px', + paddingBottom: '8px', + paddingTop: '2px' + }, + [theme.breakpoints.down('md')]: { marginLeft: 0, - marginRight: 0, - borderBottomLeftRadius: 0, - borderBottomRightRadius: 0, - width: `calc(100% - ${drawerWidth}px)` - }) + width: '100%', + padding: '16px' + } })) // ==============================|| MAIN LAYOUT ||============================== // @@ -81,6 +70,7 @@ const MainLayout = () => { } useEffect(() => { + // On desktop, start with sidebar open; on mobile, keep it closed until user opens setTimeout(() => dispatch({ type: SET_MENU, opened: !matchDownMd }), 0) // eslint-disable-next-line react-hooks/exhaustive-deps }, [matchDownMd]) @@ -101,12 +91,12 @@ const MainLayout = () => { }} > -
+
{/* drawer */} - {/* */} + {/* main content */} (
diff --git a/studio-frontend/packages/ui/src/menu-items/dashboard.js b/studio-frontend/packages/ui/src/menu-items/dashboard.js index 6c7c3b1..23acd8d 100644 --- a/studio-frontend/packages/ui/src/menu-items/dashboard.js +++ b/studio-frontend/packages/ui/src/menu-items/dashboard.js @@ -8,11 +8,13 @@ import { IconLock, IconRobot, IconVariable, - IconFiles + IconFiles, + IconApps, + IconBrain } from '@tabler/icons-react' // constant -const icons = { IconUsersGroup, IconHierarchy, IconBuildingStore, IconKey, IconTool, IconLock, IconRobot, IconVariable, IconFiles } +const icons = { IconUsersGroup, IconHierarchy, IconBuildingStore, IconKey, IconTool, IconLock, IconRobot, IconVariable, IconFiles, IconApps, IconBrain, IconApps } // ==============================|| DASHBOARD MENU ITEMS ||============================== // @@ -26,24 +28,16 @@ const dashboard = { title: 'OPEA Flows', type: 'item', url: '/opeaflows', - icon: icons.IconHierarchy, - breakcrumbs: true + icon: icons.IconApps, + breadcrumbs: true }, { - id: 'sandbox', - title: 'OPEA Sandbox Evaluation', + id: 'finetuning', + title: 'Fine-tuning', type: 'item', - url: '/opeaflows', - icon: icons.IconHierarchy, - breakcrumbs: true - }, - { - id: 'opeadeployment', - title: 'OPEA Deployment', - type: 'item', - url: '/opeaflows', - icon: icons.IconHierarchy, - breakcrumbs: true + url: '/finetuning', + icon: icons.IconBrain, + breadcrumbs: true } // { // id: 'chatflows', diff --git a/studio-frontend/packages/ui/src/routes/MainRoutes.jsx b/studio-frontend/packages/ui/src/routes/MainRoutes.jsx index cd77b07..6b7a388 100644 --- a/studio-frontend/packages/ui/src/routes/MainRoutes.jsx +++ b/studio-frontend/packages/ui/src/routes/MainRoutes.jsx @@ -7,6 +7,9 @@ import Loadable from '@/ui-component/loading/Loadable' // chatflows routing const Opeaflows = Loadable(lazy(() => import('@/views/opeaflows'))) +// finetuning routing +const Finetuning = Loadable(lazy(() => import('@/views/finetuning'))) + // tracer routing const Tracer = Loadable(lazy(() => import('@/views/tracer'))) @@ -59,6 +62,10 @@ const MainRoutes = { path: '/opeaflows', element: }, + { + path: '/finetuning', + element: + }, { path:'/tracer/:ns', element: diff --git a/studio-frontend/packages/ui/src/store/constant.js b/studio-frontend/packages/ui/src/store/constant.js index de700eb..1143c56 100644 --- a/studio-frontend/packages/ui/src/store/constant.js +++ b/studio-frontend/packages/ui/src/store/constant.js @@ -1,6 +1,7 @@ // constant export const gridSpacing = 3 export const drawerWidth = 260 +export const drawerWidthCollapsed = 64 export const appDrawerWidth = 320 export const headerHeight = 80 export const maxScroll = 100000 diff --git a/studio-frontend/packages/ui/src/ui-component/cards/MainCard.jsx b/studio-frontend/packages/ui/src/ui-component/cards/MainCard.jsx index 61bbf75..a295737 100644 --- a/studio-frontend/packages/ui/src/ui-component/cards/MainCard.jsx +++ b/studio-frontend/packages/ui/src/ui-component/cards/MainCard.jsx @@ -18,8 +18,9 @@ const MainCard = forwardRef(function MainCard( content = true, contentClass = '', contentSX = { - px: 2, - py: 0 + px: 3, + pt: 3, + pb: 3 }, darkTitle, secondary, @@ -39,8 +40,9 @@ const MainCard = forwardRef(function MainCard( ':hover': { boxShadow: boxShadow ? shadow || '0 2px 14px 0 rgb(32 40 45 / 8%)' : 'inherit' }, - maxWidth: '1280px', - mx: 'auto', + width: '100%', + mx: 0, + my: 0, ...sx }} > diff --git a/studio-frontend/packages/ui/src/ui-component/extended/Logo.jsx b/studio-frontend/packages/ui/src/ui-component/extended/Logo.jsx index e1a862d..153bb1f 100644 --- a/studio-frontend/packages/ui/src/ui-component/extended/Logo.jsx +++ b/studio-frontend/packages/ui/src/ui-component/extended/Logo.jsx @@ -15,8 +15,7 @@ const Logo = () => { Flowise ) diff --git a/studio-frontend/packages/ui/src/ui-component/table/FlowListTable.jsx b/studio-frontend/packages/ui/src/ui-component/table/FlowListTable.jsx index bcc415a..86db21d 100644 --- a/studio-frontend/packages/ui/src/ui-component/table/FlowListTable.jsx +++ b/studio-frontend/packages/ui/src/ui-component/table/FlowListTable.jsx @@ -617,11 +617,10 @@ export const FlowListTable = ({ data, images, isLoading, filterFunction, updateF {userRole === 'admin' && - + User diff --git a/studio-frontend/packages/ui/src/views/finetuning/FileUploadArea.jsx b/studio-frontend/packages/ui/src/views/finetuning/FileUploadArea.jsx new file mode 100644 index 0000000..07ef190 --- /dev/null +++ b/studio-frontend/packages/ui/src/views/finetuning/FileUploadArea.jsx @@ -0,0 +1,318 @@ +import { useState, useRef } from 'react' +import PropTypes from 'prop-types' + +// material-ui +import { + Box, + Button, + LinearProgress, + Paper, + Stack, + Typography, + IconButton, + Chip, + Alert +} from '@mui/material' +import { useTheme, alpha } from '@mui/material/styles' + +// icons +import { + IconUpload, + IconFile, + IconX, + IconCheck, + IconAlertTriangle +} from '@tabler/icons-react' + +const FileUploadArea = ({ + onFileUpload, + acceptedTypes = ['.json', '.jsonl', '.csv'], + maxSizeMB = 10, + error = null +}) => { + const theme = useTheme() + const fileInputRef = useRef(null) + + const [dragActive, setDragActive] = useState(false) + const [uploadedFile, setUploadedFile] = useState(null) + const [uploadProgress, setUploadProgress] = useState(0) + const [uploadError, setUploadError] = useState(null) + const [preview, setPreview] = useState(null) + + const maxSizeBytes = maxSizeMB * 1024 * 1024 + + const validateFile = (file) => { + const errors = [] + + // Check file type + const fileExtension = '.' + file.name.split('.').pop().toLowerCase() + if (!acceptedTypes.includes(fileExtension)) { + errors.push(`File type ${fileExtension} not supported. Accepted types: ${acceptedTypes.join(', ')}`) + } + + // Check file size + if (file.size > maxSizeBytes) { + errors.push(`File size (${(file.size / 1024 / 1024).toFixed(2)}MB) exceeds limit of ${maxSizeMB}MB`) + } + + return errors + } + + const previewFile = async (file) => { + try { + const text = await file.text() + const lines = text.split('\n').slice(0, 5) // First 5 lines + setPreview({ + lines, + totalSize: file.size, + totalLines: text.split('\n').length + }) + } catch (error) { + console.error('Error previewing file:', error) + setPreview(null) + } + } + + const handleFileUpload = async (file) => { + const validationErrors = validateFile(file) + if (validationErrors.length > 0) { + setUploadError(validationErrors[0]) + return + } + + setUploadError(null) + setUploadProgress(0) + + // Simulate upload progress + const uploadInterval = setInterval(() => { + setUploadProgress(prev => { + if (prev >= 90) { + clearInterval(uploadInterval) + return prev + } + return prev + 10 + }) + }, 100) + + try { + // Create file object with original name (server will handle suffix) + const fileForUpload = { + ...file, + originalName: file.name, + id: `file-${Date.now()}` // Simple ID for tracking + } + + // Simulate upload delay + await new Promise(resolve => setTimeout(resolve, 1000)) + + setUploadProgress(100) + setUploadedFile(fileForUpload) + + // Generate preview + await previewFile(file) + + // Notify parent component + onFileUpload(fileForUpload) + + setTimeout(() => setUploadProgress(0), 500) + } catch (error) { + console.error('Upload error:', error) + setUploadError('Failed to upload file. Please try again.') + setUploadProgress(0) + } + } + + const handleDrag = (e) => { + e.preventDefault() + e.stopPropagation() + if (e.type === 'dragenter' || e.type === 'dragover') { + setDragActive(true) + } else if (e.type === 'dragleave') { + setDragActive(false) + } + } + + const handleDrop = (e) => { + e.preventDefault() + e.stopPropagation() + setDragActive(false) + + if (e.dataTransfer.files && e.dataTransfer.files[0]) { + handleFileUpload(e.dataTransfer.files[0]) + } + } + + const handleFileSelect = (e) => { + if (e.target.files && e.target.files[0]) { + handleFileUpload(e.target.files[0]) + } + } + + const removeFile = () => { + setUploadedFile(null) + setPreview(null) + setUploadError(null) + setUploadProgress(0) + onFileUpload(null) + + if (fileInputRef.current) { + fileInputRef.current.value = '' + } + } + + const openFileDialog = () => { + fileInputRef.current?.click() + } + + const formatFileSize = (bytes) => { + if (bytes === 0) return '0 Bytes' + const k = 1024 + const sizes = ['Bytes', 'KB', 'MB', 'GB'] + const i = Math.floor(Math.log(bytes) / Math.log(k)) + return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i] + } + + return ( + + + + {!uploadedFile ? ( + + + + + + + Drop your file here or click to browse + + + Supported formats: {acceptedTypes.join(', ')} + + + Maximum file size: {maxSizeMB}MB + + + + + + + ) : ( + + + {/* File Info */} + + + + + {uploadedFile.name} + + } + label="Uploaded" + color="success" + size="small" + variant="outlined" + /> + + + + + + + + {formatFileSize(uploadedFile.size)} + + + {/* Upload Progress */} + {uploadProgress > 0 && uploadProgress < 100 && ( + + )} + + {/* File Preview */} + {preview && ( + + + Preview ({preview.totalLines} total lines): + + + + {preview.lines.join('\n')} + {preview.lines.length < preview.totalLines && '\n...'} + + + + )} + + + )} + + {/* Error Display */} + {(uploadError || error) && ( + } + > + {uploadError || error} + + )} + + ) +} + +FileUploadArea.propTypes = { + onFileUpload: PropTypes.func.isRequired, + acceptedTypes: PropTypes.arrayOf(PropTypes.string), + maxSizeMB: PropTypes.number, + error: PropTypes.string +} + +export default FileUploadArea \ No newline at end of file diff --git a/studio-frontend/packages/ui/src/views/finetuning/FinetuningJobModal.jsx b/studio-frontend/packages/ui/src/views/finetuning/FinetuningJobModal.jsx new file mode 100644 index 0000000..c389a54 --- /dev/null +++ b/studio-frontend/packages/ui/src/views/finetuning/FinetuningJobModal.jsx @@ -0,0 +1,701 @@ +import { useState } from 'react' +import PropTypes from 'prop-types' + +// material-ui +import { + Box, + Button, + Dialog, + DialogActions, + DialogContent, + DialogTitle, + TextField, + FormControl, + InputLabel, + Select, + MenuItem, + Typography, + Stack, + IconButton, + Grid +} from '@mui/material' +import { useTheme } from '@mui/material/styles' + +// icons +import { IconX } from '@tabler/icons-react' + +// components +import FileUploadArea from './FileUploadArea' + +// API +import finetuningApi from '@/api/finetuning' + +const FinetuningJobModal = ({ open, onClose, onJobCreated }) => { + const theme = useTheme() + + const [formData, setFormData] = useState({ + baseModel: '', + trainingDataset: null, + // OpenAI standard parameters + openai_params: { + n_epochs: 3, + batch_size: 16, + learning_rate_multiplier: 1.0, + prompt_loss_weight: 0.01 + }, + // Extended configuration + general: { + task: 'instruction_tuning', + output_dir: './tmp', + report_to: 'none', + save_strategy: 'no', + enable_gradient_checkpointing: false, + trust_remote_code: false + }, + dataset: { + max_length: 512, + validation_split_percentage: 5, + padding_side: 'right', + truncation_side: 'right', + max_source_length: 384, + max_prompt_length: 512, + data_preprocess_type: 'neural_chat', + mask_input: true, + mask_response: true + }, + training: { + optimizer: 'adamw_torch', + epochs: 3, + learning_rate: 5.0e-5, + lr_scheduler: 'linear', + weight_decay: 0.0, + device: 'cpu', + mixed_precision: 'no', + gradient_accumulation_steps: 1, + logging_steps: 10, + accelerate_mode: 'DDP', + hpu_execution_mode: 'lazy', + num_training_workers: 1 + }, + lora: { + r: 8, + lora_alpha: 32, + lora_dropout: 0.1, + task_type: 'CAUSAL_LM' + } + }) + + const [errors, setErrors] = useState({}) + const [isSubmitting, setIsSubmitting] = useState(false) + + const baseModels = [ + 'gpt-3.5-turbo', + 'gpt-4', + 'llama-2-7b', + 'llama-2-13b', + 'mistral-7b', + 'codellama-7b', + 'falcon-7b' + ] + + const handleInputChange = (field, value) => { + setFormData(prev => ({ + ...prev, + [field]: value + })) + + // Clear error for this field + if (errors[field]) { + setErrors(prev => ({ + ...prev, + [field]: null + })) + } + } + + const handleOpenAIParamChange = (param, value) => { + setFormData(prev => ({ + ...prev, + openai_params: { + ...prev.openai_params, + [param]: value + } + })) + } + + const handleConfigChange = (section, param, value) => { + setFormData(prev => ({ + ...prev, + [section]: { + ...prev[section], + [param]: value + } + })) + } + + const handleFileUpload = (fileType, file) => { + setFormData(prev => ({ + ...prev, + [fileType]: file + })) + + // Clear error for this field + if (errors[fileType]) { + setErrors(prev => ({ + ...prev, + [fileType]: null + })) + } + } + + const validateForm = () => { + const newErrors = {} + + // Base validation + if (!formData.baseModel) { + newErrors.baseModel = 'Base model is required' + } + + if (!formData.trainingDataset) { + newErrors.trainingDataset = 'Training dataset is required' + } + + // OpenAI parameters validation + if (formData.openai_params.learning_rate_multiplier <= 0) { + newErrors.learning_rate_multiplier = 'Learning rate multiplier must be greater than 0' + } + + if (formData.openai_params.batch_size <= 0) { + newErrors.batch_size = 'Batch size must be greater than 0' + } + + if (formData.openai_params.n_epochs <= 0) { + newErrors.n_epochs = 'Number of epochs must be greater than 0' + } + + // Training parameters validation + if (formData.training.learning_rate <= 0) { + newErrors.learning_rate = 'Learning rate must be greater than 0' + } + + if (formData.training.epochs <= 0) { + newErrors.epochs = 'Epochs must be greater than 0' + } + + if (formData.training.logging_steps <= 0) { + newErrors.logging_steps = 'Logging steps must be greater than 0' + } + + // LoRA parameters validation + if (formData.lora.r <= 0) { + newErrors.lora_r = 'LoRA rank must be greater than 0' + } + + if (formData.lora.lora_alpha <= 0) { + newErrors.lora_alpha = 'LoRA alpha must be greater than 0' + } + + if (formData.lora.lora_dropout < 0 || formData.lora.lora_dropout > 1) { + newErrors.lora_dropout = 'LoRA dropout must be between 0 and 1' + } + + setErrors(newErrors) + return Object.keys(newErrors).length === 0 + } + + const handleSubmit = async () => { + if (!validateForm()) { + return + } + + setIsSubmitting(true) + + try { + // TODO: Replace with actual API call + // const response = await finetuningApi.createJob({ + // model: formData.baseModel, + // training_file_id: formData.trainingDataset?.id, + // General: formData.general, + // Dataset: formData.dataset, + // Training: formData.training, + // openai_params: formData.openai_params, + // lora_config: formData.lora + // }) + + // Generate job name automatically based on model and timestamp for simulation + const timestamp = new Date().toISOString().replace(/[:.]/g, '-').slice(0, 19) + const jobId = `ft-${formData.baseModel}-${timestamp}` + + const newJob = { + id: jobId, + name: jobId, + status: 'pending', + model: formData.baseModel, + dataset: formData.trainingDataset?.suffixedName || formData.trainingDataset?.name || 'Unknown', + progress: '0%', + createdDate: new Date().toISOString(), + // Include all configuration sections + openai_params: formData.openai_params, + general: formData.general, + dataset_config: formData.dataset, + training: formData.training, + lora: formData.lora, + training_file_id: formData.trainingDataset?.id + } + + // Simulate API call delay + await new Promise(resolve => setTimeout(resolve, 1000)) + + onJobCreated(newJob) + handleClose() + } catch (error) { + console.error('Error creating fine-tuning job:', error) + // TODO: Show error notification + } finally { + setIsSubmitting(false) + } + } + + const handleClose = () => { + setFormData({ + baseModel: '', + trainingDataset: null, + // OpenAI standard parameters + openai_params: { + n_epochs: 3, + batch_size: 16, + learning_rate_multiplier: 1.0, + prompt_loss_weight: 0.01 + }, + // Extended configuration + general: { + task: 'instruction_tuning', + output_dir: './tmp', + report_to: 'none', + save_strategy: 'no', + enable_gradient_checkpointing: false, + trust_remote_code: false + }, + dataset: { + max_length: 512, + validation_split_percentage: 5, + padding_side: 'right', + truncation_side: 'right', + max_source_length: 384, + max_prompt_length: 512, + data_preprocess_type: 'neural_chat', + mask_input: true, + mask_response: true + }, + training: { + optimizer: 'adamw_torch', + epochs: 3, + learning_rate: 5.0e-5, + lr_scheduler: 'linear', + weight_decay: 0.0, + device: 'cpu', + mixed_precision: 'no', + gradient_accumulation_steps: 1, + logging_steps: 10, + accelerate_mode: 'DDP', + hpu_execution_mode: 'lazy', + num_training_workers: 1 + }, + lora: { + r: 8, + lora_alpha: 32, + lora_dropout: 0.1, + task_type: 'CAUSAL_LM' + } + }) + setErrors({}) + setIsSubmitting(false) + onClose() + } + + return ( + + + + + Create New Fine-tuning Job + + + + + + + + + + {/* Top Left Quadrant: Model Configuration and Dataset Configuration */} + + + {/* Model Configuration */} + + + Model Configuration + + + Base Model + + {errors.baseModel && ( + + {errors.baseModel} + + )} + + + + {/* Dataset Configuration */} + + + Dataset Configuration + + + + + handleConfigChange('dataset', 'max_length', parseInt(e.target.value))} + inputProps={{ min: 128, max: 4096, step: 1 }} + size="medium" + fullWidth + sx={{ '& .MuiInputBase-root': { minHeight: 48 } }} + /> + + + handleConfigChange('dataset', 'max_source_length', parseInt(e.target.value))} + inputProps={{ min: 128, max: 2048, step: 1 }} + size="medium" + fullWidth + sx={{ '& .MuiInputBase-root': { minHeight: 48 } }} + /> + + + + Preprocess Type + + + + + handleConfigChange('dataset', 'validation_split_percentage', parseInt(e.target.value))} + inputProps={{ min: 1, max: 50, step: 1 }} + size="medium" + fullWidth + sx={{ '& .MuiInputBase-root': { minHeight: 48 } }} + /> + + + + + + + + {/* Top Right Quadrant: Training Dataset Upload */} + + + + Training Dataset * + + + handleFileUpload('trainingDataset', file)} + acceptedTypes={['.json', '.jsonl', '.csv']} + maxSizeMB={100} + error={errors.trainingDataset} + /> + + + + + {/* Bottom Left Quadrant: General Configuration + LoRA */} + + + + General Configuration + + + + Task Type + + + + Report To + + + handleConfigChange('general', 'output_dir', e.target.value)} + fullWidth + size="medium" + /> + + + + handleConfigChange('lora', 'r', parseInt(e.target.value))} + error={!!errors.lora_r} + inputProps={{ min: 1, max: 128, step: 1 }} + size="medium" + fullWidth + /> + + + handleConfigChange('lora', 'lora_alpha', parseInt(e.target.value))} + error={!!errors.lora_alpha} + inputProps={{ min: 1, max: 256, step: 1 }} + size="medium" + fullWidth + sx={{ '& .MuiInputBase-root': { minHeight: 48 } }} + /> + + + handleConfigChange('lora', 'lora_dropout', parseFloat(e.target.value))} + error={!!errors.lora_dropout} + inputProps={{ min: 0, max: 1, step: 0.01 }} + size="medium" + fullWidth + sx={{ '& .MuiInputBase-root': { minHeight: 48 } }} + /> + + + + + + + + {/* Bottom Right Quadrant: Training Configuration + OpenAI */} + + + + Training Configuration + + + + + + handleConfigChange('training', 'epochs', parseInt(e.target.value))} + error={!!errors.epochs} + inputProps={{ min: 1, max: 50, step: 1 }} + size="medium" + fullWidth + sx={{ '& .MuiInputBase-root': { minHeight: 48 } }} + /> + + + handleOpenAIParamChange('batch_size', parseInt(e.target.value))} + error={!!errors.batch_size} + inputProps={{ min: 1, max: 256, step: 1 }} + size="medium" + fullWidth + sx={{ '& .MuiInputBase-root': { minHeight: 48 } }} + /> + + + + handleConfigChange('training', 'learning_rate', parseFloat(e.target.value))} + error={!!errors.learning_rate} + inputProps={{ min: 0.00001, max: 0.01, step: 0.00001 }} + size="medium" + fullWidth + sx={{ '& .MuiInputBase-root': { minHeight: 48 } }} + /> + + Optimizer + + + + + + + Device + + + + + + Mixed Precision + + + + + + handleConfigChange('training', 'gradient_accumulation_steps', parseInt(e.target.value))} + inputProps={{ min: 1, step: 1 }} + size="medium" + fullWidth + /> + + + + handleOpenAIParamChange('learning_rate_multiplier', parseFloat(e.target.value))} + error={!!errors.learning_rate_multiplier} + inputProps={{ min: 0.02, max: 2, step: 0.01 }} + size="medium" + fullWidth + /> + + + handleOpenAIParamChange('prompt_loss_weight', parseFloat(e.target.value))} + inputProps={{ min: 0, max: 1, step: 0.01 }} + size="medium" + fullWidth + /> + + + + + + + + + + + + + + + ) +} + +FinetuningJobModal.propTypes = { + open: PropTypes.bool.isRequired, + onClose: PropTypes.func.isRequired, + onJobCreated: PropTypes.func.isRequired +} + +export default FinetuningJobModal \ No newline at end of file diff --git a/studio-frontend/packages/ui/src/views/finetuning/FinetuningJobsTable.jsx b/studio-frontend/packages/ui/src/views/finetuning/FinetuningJobsTable.jsx new file mode 100644 index 0000000..e551fab --- /dev/null +++ b/studio-frontend/packages/ui/src/views/finetuning/FinetuningJobsTable.jsx @@ -0,0 +1,211 @@ +import { useState } from 'react' +import PropTypes from 'prop-types' + +// material-ui +import { + Box, + Button, + Chip, + LinearProgress, + Paper, + Stack, + Table, + TableBody, + TableCell, + TableContainer, + TableHead, + TableRow, + Typography, + IconButton, + Menu, + MenuItem +} from '@mui/material' +import { useTheme } from '@mui/material/styles' + +// icons +import { IconDots, IconEye, IconTrash, IconDownload } from '@tabler/icons-react' + +// utils - simple date formatting helper +const formatDistanceToNow = (date) => { + const now = new Date() + const diff = now - new Date(date) + const days = Math.floor(diff / (1000 * 60 * 60 * 24)) + const hours = Math.floor(diff / (1000 * 60 * 60)) + const minutes = Math.floor(diff / (1000 * 60)) + + if (days > 0) return `${days} day${days > 1 ? 's' : ''} ago` + if (hours > 0) return `${hours} hour${hours > 1 ? 's' : ''} ago` + if (minutes > 0) return `${minutes} minute${minutes > 1 ? 's' : ''} ago` + return 'Just now' +} + +const FinetuningJobsTable = ({ data, isLoading }) => { + const theme = useTheme() + const [anchorEl, setAnchorEl] = useState(null) + const [selectedJob, setSelectedJob] = useState(null) + + const handleMenuClick = (event, job) => { + setAnchorEl(event.currentTarget) + setSelectedJob(job) + } + + const handleMenuClose = () => { + setAnchorEl(null) + setSelectedJob(null) + } + + const getStatusColor = (status) => { + switch (status?.toLowerCase()) { + case 'completed': + return 'success' + case 'running': + return 'primary' + case 'failed': + return 'error' + case 'pending': + return 'default' + default: + return 'default' + } + } + + const getProgressValue = (progress) => { + if (typeof progress === 'string' && progress.includes('%')) { + return parseInt(progress.replace('%', '')) + } + return progress || 0 + } + + if (isLoading) { + return ( + + + + Loading fine-tuning jobs... + + + ) + } + + if (!data || data.length === 0) { + return ( + + + No fine-tuning jobs found + + + ) + } + + return ( + + + + + Job Name + Status + Model + Dataset + Progress + Created Date + Actions + + + + {data.map((job) => ( + + + + {job.name} + + + + + + + + {job.model || 'N/A'} + + + + + {job.dataset || 'N/A'} + + + + + + + {job.progress || '0%'} + + + + + + {job.createdDate + ? formatDistanceToNow(job.createdDate) + : 'Unknown' + } + + + + handleMenuClick(e, job)} + > + + + + + ))} + +
+ + + { /* TODO: View job details */ handleMenuClose() }}> + + View Details + + { /* TODO: Download model */ handleMenuClose() }} + disabled={selectedJob?.status !== 'completed'} + > + + Download Model + + { /* TODO: Delete job */ handleMenuClose() }} + sx={{ color: 'error.main' }} + > + + Delete Job + + +
+ ) +} + +FinetuningJobsTable.propTypes = { + data: PropTypes.array.isRequired, + isLoading: PropTypes.bool +} + +FinetuningJobsTable.defaultProps = { + isLoading: false +} + +export default FinetuningJobsTable \ No newline at end of file diff --git a/studio-frontend/packages/ui/src/views/finetuning/index.jsx b/studio-frontend/packages/ui/src/views/finetuning/index.jsx new file mode 100644 index 0000000..6219ff5 --- /dev/null +++ b/studio-frontend/packages/ui/src/views/finetuning/index.jsx @@ -0,0 +1,175 @@ +import { useEffect, useState } from 'react' +import { useNavigate } from 'react-router-dom' + +// material-ui +import { Box, Skeleton, Stack, ToggleButton, ToggleButtonGroup, Typography } from '@mui/material' +import { useTheme } from '@mui/material/styles' + +// project imports +import MainCard from '@/ui-component/cards/MainCard' +import ItemCard from '@/ui-component/cards/ItemCard' +import { gridSpacing } from '@/store/constant' +import WorkflowEmptySVG from '@/assets/images/workflow_empty.svg' +import LoginDialog from '@/ui-component/dialog/LoginDialog' +import ConfirmDialog from '@/ui-component/dialog/ConfirmDialog' +import { StyledButton } from '@/ui-component/button/StyledButton' +import ViewHeader from '@/layout/MainLayout/ViewHeader' +import ErrorBoundary from '@/ErrorBoundary' +import FinetuningJobsTable from './FinetuningJobsTable' +import FinetuningJobModal from './FinetuningJobModal' + +// API +import finetuningApi from '@/api/finetuning' + +// Hooks +import useApi from '@/hooks/useApi' + +// icons +import { IconPlus, IconLayoutGrid, IconList } from '@tabler/icons-react' + +//keycloak +import { useKeycloak } from '../../KeycloakContext' + +// ==============================|| Fine-tuning ||============================== // + +const Finetuning = () => { + const keycloak = useKeycloak() + const navigate = useNavigate() + const theme = useTheme() + + const [isLoading, setLoading] = useState(true) + const [error, setError] = useState(null) + const [search, setSearch] = useState('') + const [loginDialogOpen, setLoginDialogOpen] = useState(false) + const [loginDialogProps, setLoginDialogProps] = useState({}) + const [jobs, setJobs] = useState([]) + const [jobModalOpen, setJobModalOpen] = useState(false) + + let userRole = keycloak?.tokenParsed?.resource_access?.genaistudio?.roles[0] + let getAllJobsApi = null + + if (keycloak.authenticated) { + getAllJobsApi = useApi(finetuningApi.getAllJobs) + } + + useEffect(() => { + // Load fine-tuning jobs + loadJobs() + }, []) + + const loadJobs = async () => { + if (!getAllJobsApi) return + + try { + setLoading(true) + const response = await getAllJobsApi.request() + setJobs(response || []) + setLoading(false) + } catch (error) { + console.error('Error loading fine-tuning jobs:', error) + setJobs([]) + setError(error) + setLoading(false) + } + } + + const handleCreateJob = () => { + setJobModalOpen(true) + } + + const handleJobCreated = (newJob) => { + setJobs(prev => [...prev, newJob]) + setJobModalOpen(false) + } + + const filterJobs = (jobs) => { + return jobs.filter((job) => job.name.toLowerCase().includes(search.toLowerCase())) + } + + return ( + <> + + + + + Fine-tuning Jobs + + + } + sx={{ borderRadius: 2, height: 40 }} + > + Create New Job + + + + + {isLoading ? ( + + + + + + ) : ( + + {jobs.length === 0 ? ( + + + WorkflowEmptySVG + +

No Fine-tuning Jobs Yet

+ Create your first fine-tuning job to get started! +
+
+
+ ) : ( + + )} +
+ )} +
+
+ + setJobModalOpen(false)} + onJobCreated={handleJobCreated} + /> + + setLoginDialogOpen(false)} + onConfirm={() => setLoginDialogOpen(false)} + /> + + ) +} + +export default Finetuning \ No newline at end of file diff --git a/studio-frontend/packages/ui/src/views/opeaflows/index.jsx b/studio-frontend/packages/ui/src/views/opeaflows/index.jsx index 1beefee..f2beda7 100644 --- a/studio-frontend/packages/ui/src/views/opeaflows/index.jsx +++ b/studio-frontend/packages/ui/src/views/opeaflows/index.jsx @@ -2,7 +2,7 @@ import { useEffect, useState } from 'react' import { useNavigate } from 'react-router-dom' // material-ui -import { Box, Skeleton, Stack, ToggleButton, ToggleButtonGroup } from '@mui/material' +import { Box, Skeleton, Stack, Input, Typography } from '@mui/material' import { useTheme } from '@mui/material/styles' // project imports @@ -27,7 +27,7 @@ import useApi from '@/hooks/useApi' import { baseURL } from '@/store/constant' // icons -import { IconPlus, IconLayoutGrid, IconList } from '@tabler/icons-react' +import { IconPlus, IconLayoutGrid, IconList, IconSearch } from '@tabler/icons-react' //keycloak import { useKeycloak } from '../../KeycloakContext' @@ -161,47 +161,58 @@ const Opeaflows = () => { ) : ( - - {/* + - - - - - - - */} - - - } sx={{ borderRadius: 2, height: 40, width: 250 }}> - Create New Workflow - - } sx={{ borderRadius: 2, height: 40, width: 250 }}> - Import Sample Workflows - + OPEA Flows + + + + } sx={{ borderRadius: 2, height: 40, width: 250 }}> + Create New Workflow + + } sx={{ borderRadius: 2, height: 40, width: 250 }}> + Import Sample Workflows + + + + + + + } + type='search' + /> + {!view || view === 'card' ? ( <> From 1acf5eb18f55e94777ef07ac07c95a962963555d Mon Sep 17 00:00:00 2001 From: wwanarif Date: Fri, 29 Aug 2025 08:23:26 +0000 Subject: [PATCH 05/23] update docker-compose.dev.yml Signed-off-by: wwanarif --- studio-frontend/docker-compose.dev.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/studio-frontend/docker-compose.dev.yml b/studio-frontend/docker-compose.dev.yml index 5d5133c..0b42ae5 100644 --- a/studio-frontend/docker-compose.dev.yml +++ b/studio-frontend/docker-compose.dev.yml @@ -2,6 +2,7 @@ version: '3.8' services: studio-frontend: + image: studio-frontend:latest build: context: . dockerfile: Dockerfile From 044b4c3707d061124e4696820193721dcc88c0df Mon Sep 17 00:00:00 2001 From: wwanarif Date: Mon, 13 Oct 2025 03:40:44 +0000 Subject: [PATCH 06/23] update dev env Signed-off-by: wwanarif --- studio-frontend/.env.development | 5 +++ studio-frontend/docker-compose.dev.yml | 2 +- studio-frontend/packages/server/src/index.ts | 35 +++++++++++++++---- studio-frontend/packages/ui/.env.development | 2 -- .../packages/ui/src/menu-items/dashboard.js | 2 +- .../packages/ui/src/views/opeaflows/index.jsx | 2 +- 6 files changed, 37 insertions(+), 11 deletions(-) create mode 100644 studio-frontend/.env.development delete mode 100644 studio-frontend/packages/ui/.env.development diff --git a/studio-frontend/.env.development b/studio-frontend/.env.development new file mode 100644 index 0000000..addfaac --- /dev/null +++ b/studio-frontend/.env.development @@ -0,0 +1,5 @@ +NODE_TLS_REJECT_UNAUTHORIZED=0 +VITE_DISABLE_KEYCLOAK=true +NODE_ENV=development +VITE_HOST=0.0.0.0 +VITE_PORT=8088 \ No newline at end of file diff --git a/studio-frontend/docker-compose.dev.yml b/studio-frontend/docker-compose.dev.yml index 0b42ae5..92f4cbb 100644 --- a/studio-frontend/docker-compose.dev.yml +++ b/studio-frontend/docker-compose.dev.yml @@ -19,7 +19,7 @@ services: - /usr/src/node_modules - /usr/src/.pnpm-store - /usr/src/packages/ui/build - command: ["sh", "-c", "pnpm dev"] + command: ["sh", "-c", "cp /usr/src/.env.development /usr/src/packages/ui/.env; cp /usr/src/.env.development /usr/src/packages/server/.env; pnpm dev"] environment: - http_proxy=${http_proxy} - https_proxy=${https_proxy} diff --git a/studio-frontend/packages/server/src/index.ts b/studio-frontend/packages/server/src/index.ts index 9fa5dad..5b3aa21 100644 --- a/studio-frontend/packages/server/src/index.ts +++ b/studio-frontend/packages/server/src/index.ts @@ -227,13 +227,36 @@ export class App { const packagePath = getNodeModulesPackagePath('flowise-ui') const uiBuildPath = path.join(packagePath, 'build') const uiHtmlPath = path.join(packagePath, 'build', 'index.html') + const nodeEnv = process.env.NODE_ENV || 'undefined' + + // Treat any non-production environment as development for the landing page + if (nodeEnv === 'development') { + this.app.get('/', (req: Request, res: Response) => { + res.send(` + + + + + Flowise Server (development) + + + +

Flowise Server

+

Mode: development

+

Server is listening on port 3000.

+

UI is listening on port 8088.

+

Ping API

+ + `) + }) + } else { + this.app.use('/', express.static(uiBuildPath)) - this.app.use('/', express.static(uiBuildPath)) - - // All other requests not handled will return React app - this.app.use((req: Request, res: Response) => { - res.sendFile(uiHtmlPath) - }) + // All other requests not handled will return React app + this.app.use((req: Request, res: Response) => { + res.sendFile(uiHtmlPath) + }) + } // Error handling this.app.use(errorHandlerMiddleware) diff --git a/studio-frontend/packages/ui/.env.development b/studio-frontend/packages/ui/.env.development deleted file mode 100644 index 3269e6c..0000000 --- a/studio-frontend/packages/ui/.env.development +++ /dev/null @@ -1,2 +0,0 @@ -NODE_TLS_REJECT_UNAUTHORIZED=0 -VITE_DISABLE_KEYCLOAK=true \ No newline at end of file diff --git a/studio-frontend/packages/ui/src/menu-items/dashboard.js b/studio-frontend/packages/ui/src/menu-items/dashboard.js index 23acd8d..09bbb15 100644 --- a/studio-frontend/packages/ui/src/menu-items/dashboard.js +++ b/studio-frontend/packages/ui/src/menu-items/dashboard.js @@ -25,7 +25,7 @@ const dashboard = { children: [ { id: 'opeaflows', - title: 'OPEA Flows', + title: 'Workflows', type: 'item', url: '/opeaflows', icon: icons.IconApps, diff --git a/studio-frontend/packages/ui/src/views/opeaflows/index.jsx b/studio-frontend/packages/ui/src/views/opeaflows/index.jsx index f2beda7..5e7e28f 100644 --- a/studio-frontend/packages/ui/src/views/opeaflows/index.jsx +++ b/studio-frontend/packages/ui/src/views/opeaflows/index.jsx @@ -172,7 +172,7 @@ const Opeaflows = () => { }} variant='h1' > - OPEA Flows + Workflows From ba660fb88bc677d1856730ec96435ecead373c1b Mon Sep 17 00:00:00 2001 From: wwanarif Date: Tue, 14 Oct 2025 09:37:56 +0000 Subject: [PATCH 07/23] initial push for finetuning api server Signed-off-by: wwanarif --- .../react/src/components/SideBar/SideBar.tsx | 1 - studio-frontend/.env.development | 3 +- studio-frontend/docker-compose.dev.yml | 12 +- studio-frontend/packages/server/package.json | 1 + .../src/controllers/finetuning/index.ts | 167 +++++ .../database/entities/FineTuningCheckpoint.ts | 19 + .../src/database/entities/FineTuningJob.ts | 55 ++ .../server/src/database/entities/index.ts | 6 +- .../1760424809635-AddFineTuningTables.ts | 45 ++ .../src/database/migrations/mysql/index.ts | 4 +- .../1760424809635-AddFineTuningTables.ts | 46 ++ .../src/database/migrations/sqlite/index.ts | 2 + .../server/src/middlewares/errors/index.ts | 14 +- .../server/src/routes/finetuning/index.ts | 32 + .../packages/server/src/routes/index.ts | 2 + .../server/src/services/finetuning/index.ts | 619 ++++++++++++++++++ studio-frontend/packages/ui/src/api/client.js | 2 +- .../packages/ui/src/api/finetuning.js | 112 ++-- .../packages/ui/src/hooks/useApi.jsx | 2 + .../src/ui-component/button/AnimateButton.jsx | 12 +- .../src/ui-component/extended/Transitions.jsx | 8 +- .../src/views/finetuning/FileUploadArea.jsx | 28 +- .../views/finetuning/FinetuningJobModal.jsx | 192 ++++-- .../views/finetuning/FinetuningJobsTable.jsx | 213 +++++- .../ui/src/views/finetuning/index.jsx | 42 +- 25 files changed, 1459 insertions(+), 180 deletions(-) create mode 100644 studio-frontend/packages/server/src/controllers/finetuning/index.ts create mode 100644 studio-frontend/packages/server/src/database/entities/FineTuningCheckpoint.ts create mode 100644 studio-frontend/packages/server/src/database/entities/FineTuningJob.ts create mode 100644 studio-frontend/packages/server/src/database/migrations/mysql/1760424809635-AddFineTuningTables.ts create mode 100644 studio-frontend/packages/server/src/database/migrations/sqlite/1760424809635-AddFineTuningTables.ts create mode 100644 studio-frontend/packages/server/src/routes/finetuning/index.ts create mode 100644 studio-frontend/packages/server/src/services/finetuning/index.ts diff --git a/app-frontend/react/src/components/SideBar/SideBar.tsx b/app-frontend/react/src/components/SideBar/SideBar.tsx index ee356c9..26873e8 100644 --- a/app-frontend/react/src/components/SideBar/SideBar.tsx +++ b/app-frontend/react/src/components/SideBar/SideBar.tsx @@ -79,7 +79,6 @@ export const LinkedMenuItem: React.FC = ({ to={toWithQuery(to)} onClick={onClick} tabIndex={open ? 0 : -1} - aria-hidden={!open} > {children} diff --git a/studio-frontend/.env.development b/studio-frontend/.env.development index addfaac..e3f00ca 100644 --- a/studio-frontend/.env.development +++ b/studio-frontend/.env.development @@ -2,4 +2,5 @@ NODE_TLS_REJECT_UNAUTHORIZED=0 VITE_DISABLE_KEYCLOAK=true NODE_ENV=development VITE_HOST=0.0.0.0 -VITE_PORT=8088 \ No newline at end of file +VITE_PORT=8088 +FINETUNING_SERVICE_URL=http://${HOST_IP}:8015 \ No newline at end of file diff --git a/studio-frontend/docker-compose.dev.yml b/studio-frontend/docker-compose.dev.yml index 92f4cbb..ff473e3 100644 --- a/studio-frontend/docker-compose.dev.yml +++ b/studio-frontend/docker-compose.dev.yml @@ -16,14 +16,18 @@ services: - "8088:8088" volumes: - .:/usr/src - - /usr/src/node_modules - - /usr/src/.pnpm-store + - node_modules:/usr/src/node_modules + - pnpm_store:/usr/src/.pnpm-store - /usr/src/packages/ui/build - command: ["sh", "-c", "cp /usr/src/.env.development /usr/src/packages/ui/.env; cp /usr/src/.env.development /usr/src/packages/server/.env; pnpm dev"] + command: ["sh", "-c", "cp /usr/src/.env.development /usr/src/packages/ui/.env; cp /usr/src/.env.development /usr/src/packages/server/.env; pnpm install; pnpm dev"] environment: - http_proxy=${http_proxy} - https_proxy=${https_proxy} - no_proxy=${no_proxy} stdin_open: true tty: true - restart: unless-stopped \ No newline at end of file + restart: unless-stopped + +volumes: + node_modules: + pnpm_store: \ No newline at end of file diff --git a/studio-frontend/packages/server/package.json b/studio-frontend/packages/server/package.json index 29ca02a..7ef55d6 100644 --- a/studio-frontend/packages/server/package.json +++ b/studio-frontend/packages/server/package.json @@ -76,6 +76,7 @@ "moment-timezone": "^0.5.34", "multer": "^1.4.5-lts.1", "mysql2": "^3.9.2", + "form-data": "^4.0.0", "openai": "^4.57.3", "pg": "^8.11.1", "posthog-node": "^3.5.0", diff --git a/studio-frontend/packages/server/src/controllers/finetuning/index.ts b/studio-frontend/packages/server/src/controllers/finetuning/index.ts new file mode 100644 index 0000000..211c8c6 --- /dev/null +++ b/studio-frontend/packages/server/src/controllers/finetuning/index.ts @@ -0,0 +1,167 @@ +import { Request, Response, NextFunction } from 'express' +import { StatusCodes } from 'http-status-codes' +import { InternalFlowiseError } from '../../errors/internalFlowiseError' +import finetuningService from '../../services/finetuning' + +/** + * Upload a training file + * POST /api/v1/finetuning/files + */ +const uploadTrainingFile = async (req: Request, res: Response, next: NextFunction) => { + try { + if (!req.file) { + // Debug: log request body and files to help trace upload issues + console.debug('finetuningController.uploadTrainingFile - no file received. req.body=', req.body, 'req.files=', (req as any).files) + throw new InternalFlowiseError(StatusCodes.BAD_REQUEST, 'Error: finetuningController.uploadTrainingFile - file not provided!') + } + + const purpose = req.body.purpose || 'fine-tune' + const apiResponse = await finetuningService.uploadTrainingFile(req.file, purpose) + return res.json(apiResponse) + } catch (error) { + next(error) + } +} + +/** + * Create a fine-tuning job + * POST /api/v1/finetuning/jobs + */ +const createFineTuningJob = async (req: Request, res: Response, next: NextFunction) => { + try { + const hasFile = !!req.body?.training_file || !!(req.body as any).training_file_id + if (!req.body || !hasFile || !req.body.model) { + throw new InternalFlowiseError( + StatusCodes.BAD_REQUEST, + 'Error: finetuningController.createFineTuningJob - model and training_file (or training_file_id) are required!' + ) + } + + const apiResponse = await finetuningService.createFineTuningJob(req.body) + return res.json(apiResponse) + } catch (error) { + next(error) + } +} + +/** + * List all fine-tuning jobs + * GET /api/v1/finetuning/jobs + */ +const listFineTuningJobs = async (req: Request, res: Response, next: NextFunction) => { + try { + const apiResponse = await finetuningService.listFineTuningJobs() + return res.json(apiResponse) + } catch (error) { + next(error) + } +} + +/** + * Retrieve a specific fine-tuning job + * POST /api/v1/finetuning/jobs/retrieve + */ +const retrieveFineTuningJob = async (req: Request, res: Response, next: NextFunction) => { + try { + if (typeof req.body === 'undefined' || !req.body.fine_tuning_job_id) { + throw new InternalFlowiseError( + StatusCodes.BAD_REQUEST, + 'Error: finetuningController.retrieveFineTuningJob - fine_tuning_job_id not provided!' + ) + } + + const apiResponse = await finetuningService.retrieveFineTuningJob(req.body.fine_tuning_job_id) + return res.json(apiResponse) + } catch (error) { + next(error) + } +} + +/** + * Cancel a fine-tuning job + * POST /api/v1/finetuning/jobs/cancel + */ +const cancelFineTuningJob = async (req: Request, res: Response, next: NextFunction) => { + try { + if (typeof req.body === 'undefined' || !req.body.fine_tuning_job_id) { + throw new InternalFlowiseError( + StatusCodes.BAD_REQUEST, + 'Error: finetuningController.cancelFineTuningJob - fine_tuning_job_id not provided!' + ) + } + + const apiResponse = await finetuningService.cancelFineTuningJob(req.body.fine_tuning_job_id) + return res.json(apiResponse) + } catch (error) { + next(error) + } +} + +/** + * Delete a fine-tuning job (cancel remote if possible and remove local records) + * POST /api/v1/finetuning/jobs/delete + */ +const deleteFineTuningJob = async (req: Request, res: Response, next: NextFunction) => { + try { + if (typeof req.body === 'undefined' || !req.body.fine_tuning_job_id) { + throw new InternalFlowiseError( + StatusCodes.BAD_REQUEST, + 'Error: finetuningController.deleteFineTuningJob - fine_tuning_job_id not provided!' + ) + } + + const apiResponse = await finetuningService.deleteFineTuningJob(req.body.fine_tuning_job_id) + return res.json(apiResponse) + } catch (error) { + next(error) + } +} + +/** + * List checkpoints of a fine-tuning job + * POST /api/v1/finetuning/jobs/checkpoints + */ +const listFineTuningCheckpoints = async (req: Request, res: Response, next: NextFunction) => { + try { + if (typeof req.body === 'undefined' || !req.body.fine_tuning_job_id) { + throw new InternalFlowiseError( + StatusCodes.BAD_REQUEST, + 'Error: finetuningController.listFineTuningCheckpoints - fine_tuning_job_id not provided!' + ) + } + + const apiResponse = await finetuningService.listFineTuningCheckpoints(req.body.fine_tuning_job_id) + return res.json(apiResponse) + } catch (error) { + next(error) + } +} + +/** + * Debug: proxy an arbitrary job payload to the finetuning service and return raw response + * POST /api/v1/finetuning/debug/proxy-job + */ +const proxyJobDebug = async (req: Request, res: Response, next: NextFunction) => { + try { + if (typeof req.body === 'undefined') { + throw new InternalFlowiseError(StatusCodes.BAD_REQUEST, 'Error: finetuningController.proxyJobDebug - body is required') + } + + const apiResponse = await finetuningService.proxyJobDebug(req.body) + // Return the raw response object from the finetuning service + return res.status(apiResponse.status).send(apiResponse.body) + } catch (error) { + next(error) + } +} + +export default { + uploadTrainingFile, + createFineTuningJob, + listFineTuningJobs, + retrieveFineTuningJob, + cancelFineTuningJob, + deleteFineTuningJob, + listFineTuningCheckpoints, + proxyJobDebug +} diff --git a/studio-frontend/packages/server/src/database/entities/FineTuningCheckpoint.ts b/studio-frontend/packages/server/src/database/entities/FineTuningCheckpoint.ts new file mode 100644 index 0000000..9d119f3 --- /dev/null +++ b/studio-frontend/packages/server/src/database/entities/FineTuningCheckpoint.ts @@ -0,0 +1,19 @@ +import { Entity, Column, PrimaryColumn, CreateDateColumn } from 'typeorm' + +@Entity('fine_tuning_checkpoint') +export class FineTuningCheckpoint { + @PrimaryColumn() + id!: string + + @Column() + fine_tuning_job_id!: string + + @Column() + filename!: string + + @Column({ type: 'text', nullable: true }) + metadata?: string + + @CreateDateColumn({ type: 'datetime' }) + createdDate!: Date +} diff --git a/studio-frontend/packages/server/src/database/entities/FineTuningJob.ts b/studio-frontend/packages/server/src/database/entities/FineTuningJob.ts new file mode 100644 index 0000000..342cd06 --- /dev/null +++ b/studio-frontend/packages/server/src/database/entities/FineTuningJob.ts @@ -0,0 +1,55 @@ +import { Entity, Column, PrimaryColumn, CreateDateColumn, UpdateDateColumn } from 'typeorm' + +@Entity('fine_tuning_job') +export class FineTuningJob { + @PrimaryColumn() + id!: string + + @Column({ nullable: true }) + name?: string + + @Column({ nullable: true }) + model?: string + + @Column({ nullable: true }) + task?: string + + @Column({ nullable: true }) + status?: string + + @Column({ nullable: true }) + training_file?: string + + @Column({ nullable: true }) + training_file_id?: string + + @Column({ type: 'text', nullable: true }) + lora_config?: string + + @Column({ type: 'text', nullable: true }) + hyperparameters?: string + + @Column({ type: 'text', nullable: true }) + result_files?: string + + @Column({ type: 'text', nullable: true }) + error?: string + + @Column({ nullable: true, type: 'int' }) + progress?: number + + @Column({ nullable: true, type: 'int' }) + trained_tokens?: number + + @Column({ nullable: true, type: 'datetime' }) + estimated_finish?: Date + + @Column({ nullable: true, type: 'datetime' }) + finishedDate?: Date + + @CreateDateColumn({ type: 'datetime' }) + createdDate!: Date + + @UpdateDateColumn({ type: 'datetime' }) + updatedDate!: Date +} diff --git a/studio-frontend/packages/server/src/database/entities/index.ts b/studio-frontend/packages/server/src/database/entities/index.ts index 4cb079b..caa35dc 100644 --- a/studio-frontend/packages/server/src/database/entities/index.ts +++ b/studio-frontend/packages/server/src/database/entities/index.ts @@ -11,6 +11,8 @@ import { Lead } from './Lead' import { UpsertHistory } from './UpsertHistory' import { ApiKey } from './ApiKey' import { CustomTemplate } from './CustomTemplate' +import { FineTuningJob } from './FineTuningJob' +import { FineTuningCheckpoint } from './FineTuningCheckpoint' export const entities = { ChatFlow, @@ -25,5 +27,7 @@ export const entities = { Lead, UpsertHistory, ApiKey, - CustomTemplate + CustomTemplate, + FineTuningJob, + FineTuningCheckpoint } diff --git a/studio-frontend/packages/server/src/database/migrations/mysql/1760424809635-AddFineTuningTables.ts b/studio-frontend/packages/server/src/database/migrations/mysql/1760424809635-AddFineTuningTables.ts new file mode 100644 index 0000000..1b9bb8b --- /dev/null +++ b/studio-frontend/packages/server/src/database/migrations/mysql/1760424809635-AddFineTuningTables.ts @@ -0,0 +1,45 @@ +import { MigrationInterface, QueryRunner } from 'typeorm' + +export class AddFineTuningTables1760424809635 implements MigrationInterface { + public async up(queryRunner: QueryRunner): Promise { + await queryRunner.query( + `CREATE TABLE IF NOT EXISTS fine_tuning_job ( + id varchar(255) PRIMARY KEY NOT NULL, + name varchar(255), + model varchar(255), + task varchar(255), + status varchar(255), + training_file varchar(255), + training_file_id varchar(255), + lora_config longtext, + hyperparameters longtext, + result_files longtext, + error longtext, + progress int, + trained_tokens int, + estimated_finish datetime, + finishedDate datetime, + createdDate datetime NOT NULL DEFAULT CURRENT_TIMESTAMP, + updatedDate datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP + ) ENGINE=InnoDB; + ` + ) + + await queryRunner.query( + `CREATE TABLE IF NOT EXISTS fine_tuning_checkpoint ( + id varchar(255) PRIMARY KEY NOT NULL, + fine_tuning_job_id varchar(255) NOT NULL, + filename varchar(255) NOT NULL, + metadata longtext, + createdDate datetime NOT NULL DEFAULT CURRENT_TIMESTAMP, + INDEX IDX_fine_tuning_checkpoint_job (fine_tuning_job_id) + ) ENGINE=InnoDB; + ` + ) + } + + public async down(queryRunner: QueryRunner): Promise { + await queryRunner.query(`DROP TABLE IF EXISTS fine_tuning_checkpoint`) + await queryRunner.query(`DROP TABLE IF EXISTS fine_tuning_job`) + } +} diff --git a/studio-frontend/packages/server/src/database/migrations/mysql/index.ts b/studio-frontend/packages/server/src/database/migrations/mysql/index.ts index 3645d89..6df72eb 100644 --- a/studio-frontend/packages/server/src/database/migrations/mysql/index.ts +++ b/studio-frontend/packages/server/src/database/migrations/mysql/index.ts @@ -30,6 +30,7 @@ import { AddStudioFieldsToChatFlow1733282099772 } from './1733282099772-AddStudi import { AddSandboxTracerUrlToChatFlow1743740099772 } from './1743740099772-AddSandboxTracerUrlToChatFlow' import { AddSandboxDebugLogsUrlToChatFlow1749612373191 } from './1749612373191-AddSandboxDebugLogsUrlToChatFlow' import { AddDeploymentStatusToChatFlow1754700956637 } from './1754700956637-AddDeploymentStatusToChatFlow' +import { AddFineTuningTables1760424809635 } from './1760424809635-AddFineTuningTables' export const mysqlMigrations = [ @@ -64,5 +65,6 @@ export const mysqlMigrations = [ AddStudioFieldsToChatFlow1733282099772, AddSandboxTracerUrlToChatFlow1743740099772, AddSandboxDebugLogsUrlToChatFlow1749612373191, - AddDeploymentStatusToChatFlow1754700956637 + AddDeploymentStatusToChatFlow1754700956637, + AddFineTuningTables1760424809635 ] diff --git a/studio-frontend/packages/server/src/database/migrations/sqlite/1760424809635-AddFineTuningTables.ts b/studio-frontend/packages/server/src/database/migrations/sqlite/1760424809635-AddFineTuningTables.ts new file mode 100644 index 0000000..15288cb --- /dev/null +++ b/studio-frontend/packages/server/src/database/migrations/sqlite/1760424809635-AddFineTuningTables.ts @@ -0,0 +1,46 @@ +import { MigrationInterface, QueryRunner } from 'typeorm' + +export class AddFineTuningTables1760424809635 implements MigrationInterface { + public async up(queryRunner: QueryRunner): Promise { + await queryRunner.query( + `CREATE TABLE IF NOT EXISTS "fine_tuning_job" ( + "id" varchar PRIMARY KEY NOT NULL, + "name" varchar, + "model" varchar, + "task" varchar, + "status" varchar, + "training_file" varchar, + "training_file_id" varchar, + "lora_config" text, + "hyperparameters" text, + "result_files" text, + "error" text, + "progress" integer, + "trained_tokens" integer, + "estimated_finish" datetime, + "finishedDate" datetime, + "createdDate" datetime NOT NULL DEFAULT (datetime('now')), + "updatedDate" datetime NOT NULL DEFAULT (datetime('now')) + );` + ) + + await queryRunner.query( + `CREATE TABLE IF NOT EXISTS "fine_tuning_checkpoint" ( + "id" varchar PRIMARY KEY NOT NULL, + "fine_tuning_job_id" varchar NOT NULL, + "filename" varchar NOT NULL, + "metadata" text, + "createdDate" datetime NOT NULL DEFAULT (datetime('now')) + );` + ) + + await queryRunner.query( + `CREATE INDEX IF NOT EXISTS "IDX_fine_tuning_checkpoint_job" ON "fine_tuning_checkpoint" ("fine_tuning_job_id") ;` + ) + } + + public async down(queryRunner: QueryRunner): Promise { + await queryRunner.query(`DROP TABLE IF EXISTS fine_tuning_checkpoint`) + await queryRunner.query(`DROP TABLE IF EXISTS fine_tuning_job`) + } +} diff --git a/studio-frontend/packages/server/src/database/migrations/sqlite/index.ts b/studio-frontend/packages/server/src/database/migrations/sqlite/index.ts index c9ed343..1b87c17 100644 --- a/studio-frontend/packages/server/src/database/migrations/sqlite/index.ts +++ b/studio-frontend/packages/server/src/database/migrations/sqlite/index.ts @@ -29,6 +29,7 @@ import { AddStudioFieldsToChatFlow1733282099772 } from './1733282099772-AddStudi import { AddSandboxTracerUrlToChatFlow1743740099772 } from './1743740099772-AddSandboxTracerUrlToChatFlow' import { AddSandboxDebugLogsUrlToChatFlow1749612373191 } from './1749612373191-AddSandboxDebugLogsUrlToChatFlow' import { AddDeploymentStatusToChatFlow1754700956637 } from './1754700956637-AddDeploymentStatusToChatFlow' +import { AddFineTuningTables1760424809635 } from './1760424809635-AddFineTuningTables' export const sqliteMigrations = [ Init1693835579790, @@ -62,4 +63,5 @@ export const sqliteMigrations = [ AddSandboxTracerUrlToChatFlow1743740099772, AddSandboxDebugLogsUrlToChatFlow1749612373191, AddDeploymentStatusToChatFlow1754700956637 + ,AddFineTuningTables1760424809635 ] diff --git a/studio-frontend/packages/server/src/middlewares/errors/index.ts b/studio-frontend/packages/server/src/middlewares/errors/index.ts index 75cd2c2..06b5422 100644 --- a/studio-frontend/packages/server/src/middlewares/errors/index.ts +++ b/studio-frontend/packages/server/src/middlewares/errors/index.ts @@ -5,14 +5,24 @@ import { InternalFlowiseError } from '../../errors/internalFlowiseError' // we need eslint because we have to pass next arg for the error middleware // eslint-disable-next-line async function errorHandlerMiddleware(err: InternalFlowiseError, req: Request, res: Response, next: NextFunction) { - let displayedError = { + // Safely read streaming flag from body (req.body may be undefined) + const streamingFlag = req && (req as any).body ? (req as any).body.streaming : undefined + + // Build the response payload + const displayedError = { statusCode: err.statusCode || StatusCodes.INTERNAL_SERVER_ERROR, success: false, message: err.message, // Provide error stack trace only in development stack: process.env.NODE_ENV === 'development' ? err.stack : {} } - if (!req.body.streaming || req.body.streaming === 'false') { + + // Log the error server-side for easier debugging + // Keep this server-side only; we still control what is returned to the client + // eslint-disable-next-line no-console + console.error('Unhandled error caught by errorHandlerMiddleware:', err) + + if (!streamingFlag || streamingFlag === 'false') { res.setHeader('Content-Type', 'application/json') res.status(displayedError.statusCode).json(displayedError) } diff --git a/studio-frontend/packages/server/src/routes/finetuning/index.ts b/studio-frontend/packages/server/src/routes/finetuning/index.ts new file mode 100644 index 0000000..1ee9fe8 --- /dev/null +++ b/studio-frontend/packages/server/src/routes/finetuning/index.ts @@ -0,0 +1,32 @@ +import express from 'express' +import multer from 'multer' +import finetuningController from '../../controllers/finetuning' + +const router = express.Router() + +// Use memory storage for multer to store files in buffer +const upload = multer({ storage: multer.memoryStorage() }) + +// Upload training file +router.post('/files', upload.single('file'), finetuningController.uploadTrainingFile) + +// Create fine-tuning job +router.post('/jobs', finetuningController.createFineTuningJob) + +// Debug: proxy an arbitrary job payload to the external finetuning service +router.post('/debug/proxy-job', finetuningController.proxyJobDebug) + +// List all fine-tuning jobs +router.get('/jobs', finetuningController.listFineTuningJobs) + +// Retrieve a specific fine-tuning job +router.post('/jobs/retrieve', finetuningController.retrieveFineTuningJob) + +// Cancel a fine-tuning job +router.post('/jobs/cancel', finetuningController.cancelFineTuningJob) +router.post('/jobs/delete', finetuningController.deleteFineTuningJob) + +// List checkpoints of a fine-tuning job +router.post('/jobs/checkpoints', finetuningController.listFineTuningCheckpoints) + +export default router diff --git a/studio-frontend/packages/server/src/routes/index.ts b/studio-frontend/packages/server/src/routes/index.ts index 6501f55..e1a92a5 100644 --- a/studio-frontend/packages/server/src/routes/index.ts +++ b/studio-frontend/packages/server/src/routes/index.ts @@ -13,6 +13,7 @@ import documentStoreRouter from './documentstore' import exportImportRouter from './export-import' import feedbackRouter from './feedback' import fetchLinksRouter from './fetch-links' +import finetuningRouter from './finetuning' import flowConfigRouter from './flow-config' import getUploadFileRouter from './get-upload-file' import getUploadPathRouter from './get-upload-path' @@ -59,6 +60,7 @@ router.use('/document-store', documentStoreRouter) router.use('/export-import', exportImportRouter) router.use('/feedback', feedbackRouter) router.use('/fetch-links', fetchLinksRouter) +router.use('/finetuning', finetuningRouter) router.use('/flow-config', flowConfigRouter) router.use('/internal-chatmessage', internalChatmessagesRouter) router.use('/internal-prediction', internalPredictionRouter) diff --git a/studio-frontend/packages/server/src/services/finetuning/index.ts b/studio-frontend/packages/server/src/services/finetuning/index.ts new file mode 100644 index 0000000..34910d7 --- /dev/null +++ b/studio-frontend/packages/server/src/services/finetuning/index.ts @@ -0,0 +1,619 @@ +import axios, { AxiosInstance } from 'axios' +import http from 'http' +import https from 'https' +import { StatusCodes } from 'http-status-codes' +import { InternalFlowiseError } from '../../errors/internalFlowiseError' +import { getErrorMessage } from '../../errors/utils' +import { getRunningExpressApp } from '../../utils/getRunningExpressApp' +import { FineTuningJob } from '../../database/entities/FineTuningJob' +import { FineTuningCheckpoint } from '../../database/entities/FineTuningCheckpoint' + +// Get finetuning service URL from environment variable or use default +// Note: use host network IP instead of localhost so containerized server can reach the finetuning service +const FINETUNING_SERVICE_URL = process.env.FINETUNING_SERVICE_URL || 'undefined' + +// Create an axios client with keep-alive to reduce connection churn +const agentOptions = { keepAlive: true, maxSockets: 20 } +const httpAgent = new http.Agent(agentOptions) +const httpsAgent = new https.Agent(agentOptions) + +const axiosClient: AxiosInstance = axios.create({ + baseURL: FINETUNING_SERVICE_URL, + timeout: 60000, // increase timeout to 60s + httpAgent, + httpsAgent, + headers: { + 'Content-Type': 'application/json' + } +}) + +// In-memory mapping: filename (raw and decoded) -> { id, rawFilename } +const uploadedFileIdMap: Map = new Map() + +/** + * Upload a training file to the finetuning service + */ +const uploadTrainingFile = async (file: Express.Multer.File, purpose: string = 'fine-tune') => { + try { + // Create FormData using the browser/Node.js FormData API + const FormData = require('form-data') + const formData = new FormData() + + formData.append('file', file.buffer, { + filename: file.originalname, + contentType: file.mimetype + }) + formData.append('purpose', purpose) + + const response = await axios.post(`${FINETUNING_SERVICE_URL}/v1/files`, formData, { + headers: { + ...formData.getHeaders() + } + }) + + // Debug: log the response from the finetuning service for uploaded file + try { + // eslint-disable-next-line no-console + console.debug('finetuningService.uploadTrainingFile - response.data:', response.data) + } catch (logErr) { + // ignore logging errors + } + + // If the finetuning service returned an id and a filename, store mappings for both + try { + const returnedId = response?.data?.id || response?.data?.file_id || response?.data?.name || undefined + const returnedFilenameRaw = response?.data?.filename || response?.data?.name || undefined + if (returnedId && returnedFilenameRaw) { + // store both raw and decoded filename keys + let decodedFilename = returnedFilenameRaw + try { + decodedFilename = decodeURIComponent(returnedFilenameRaw) + } catch (e) { + // ignore decode errors + } + + const entry = { id: returnedId, rawFilename: returnedFilenameRaw } + uploadedFileIdMap.set(returnedFilenameRaw, entry) + if (decodedFilename !== returnedFilenameRaw) { + uploadedFileIdMap.set(decodedFilename, entry) + } + + // eslint-disable-next-line no-console + console.debug('finetuningService.uploadTrainingFile - stored mapping', decodedFilename, '<->', returnedFilenameRaw, '->', returnedId) + } + } catch (e) { + // ignore mapping errors + } + + return response.data + } catch (error: any) { + throw new InternalFlowiseError( + StatusCodes.INTERNAL_SERVER_ERROR, + `Error: finetuningService.uploadTrainingFile - ${getErrorMessage(error)}` + ) + } +} + +// Helper: persist or update a fine-tuning job record in the local DB +const persistJobToDb = async (jobData: any) => { + try { + if (!jobData) return + const appServer = getRunningExpressApp() + if (!appServer || !appServer.AppDataSource) return + + const repo = appServer.AppDataSource.getRepository(FineTuningJob) + + // Determine canonical id from the response + const id = jobData.id || jobData.job_id || jobData.fine_tuning_job_id || jobData.fine_tuning_id + if (!id) return + + // Build entity object mapping common fields; fall back to stringifying objects + const entity: any = { + id: String(id), + name: jobData.name || jobData.id || undefined, + model: jobData.model || undefined, + status: jobData.status || jobData.state || undefined, + training_file: jobData.training_file || jobData.trainingFile || undefined, + training_file_id: jobData.training_file_id || undefined, + task: jobData.General?.task || jobData.task || undefined, + progress: typeof jobData.progress === 'number' ? jobData.progress : undefined, + trained_tokens: typeof jobData.trained_tokens === 'number' ? jobData.trained_tokens : undefined + } + + if (jobData.General) { + try { + entity.lora_config = typeof jobData.General.lora_config === 'object' + ? JSON.stringify(jobData.General.lora_config) + : jobData.General.lora_config ? String(jobData.General.lora_config) : undefined + } catch (e) { + // ignore + } + } + + if (jobData.hyperparameters) { + try { + entity.hyperparameters = typeof jobData.hyperparameters === 'object' ? JSON.stringify(jobData.hyperparameters) : String(jobData.hyperparameters) + } catch (e) {} + } + + if (jobData.result_files) { + try { + entity.result_files = typeof jobData.result_files === 'object' ? JSON.stringify(jobData.result_files) : String(jobData.result_files) + } catch (e) {} + } + + if (jobData.error) { + try { + entity.error = typeof jobData.error === 'object' ? JSON.stringify(jobData.error) : String(jobData.error) + } catch (e) {} + } + + if (jobData.estimated_finish) { + entity.estimated_finish = new Date(jobData.estimated_finish) + } + if (jobData.finishedDate || jobData.finished_at || jobData.completed_at) { + entity.finishedDate = new Date(jobData.finishedDate || jobData.finished_at || jobData.completed_at) + } + + // Upsert: merge if exists + let existing = await repo.findOneBy({ id: String(id) }) + if (!existing) { + const created = repo.create(entity) + await repo.save(created) + } else { + repo.merge(existing, entity) + await repo.save(existing) + } + } catch (e) { + // Don't fail the main flow if DB persistence fails; only log + try { + // eslint-disable-next-line no-console + console.error('finetuningService.persistJobToDb - failed to persist job', e) + } catch (logErr) { + // ignore + } + } +} + +// Helper: update specific fields for a job in the DB (best-effort) +const updateJobInDb = async (jobId: string, updates: Partial) => { + try { + if (!jobId) return + const appServer = getRunningExpressApp() + if (!appServer || !appServer.AppDataSource) return + const repo = appServer.AppDataSource.getRepository(FineTuningJob) + const existing = await repo.findOneBy({ id: String(jobId) }) + if (!existing) return + repo.merge(existing, updates) + await repo.save(existing) + } catch (e) { + try { + // eslint-disable-next-line no-console + console.error('finetuningService.updateJobInDb - failed to update job', jobId, e) + } catch (logErr) { + // ignore + } + } +} + +/** + * Create a fine-tuning job + */ +const createFineTuningJob = async (jobConfig: { + training_file: string + model: string + General?: { + task?: string + lora_config?: any + } + Dataset?: { + max_length?: number + query_max_len?: number + passage_max_len?: number + padding?: string + } + Training?: { + epochs?: number + batch_size?: number + gradient_accumulation_steps?: number + } +}) => { + try { + // Work with the jobConfig as-provided by the UI. Do not decode training_file automatically; + // the external service may expect the raw (possibly URL-encoded) filename. + const forwardedJobConfig = { ...jobConfig } + + // Debug: log the jobConfig being forwarded to the external finetuning service + try { + // eslint-disable-next-line no-console + console.debug('finetuningService.createFineTuningJob - initial jobConfig:', forwardedJobConfig) + } catch (logErr) { + // ignore + } + // Sanitize the payload: remove undefined values and empty nested objects + const sanitizedPayload = JSON.parse(JSON.stringify(forwardedJobConfig)) + + // // Fix lora_config: must be explicitly null for rerank/embedding tasks, or omitted for instruction tuning + // if (sanitizedPayload?.General) { + // if (Object.prototype.hasOwnProperty.call(sanitizedPayload.General, 'lora_config')) { + // const task = sanitizedPayload.General.task + // if (task === 'rerank' || task === 'embedding') { + // // For rerank/embedding tasks, lora_config must be explicitly null + // sanitizedPayload.General.lora_config = null + // // eslint-disable-next-line no-console + // console.debug('finetuningService.createFineTuningJob - setting General.lora_config to null for task:', task) + // } else { + // // For instruction tuning or other tasks, remove lora_config + // // eslint-disable-next-line no-console + // console.debug('finetuningService.createFineTuningJob - removing General.lora_config for instruction tuning') + // delete sanitizedPayload.General.lora_config + // } + // } else if (sanitizedPayload.General.task === 'rerank' || sanitizedPayload.General.task === 'embedding') { + // // If lora_config is missing for rerank/embedding, add it as null + // sanitizedPayload.General.lora_config = null + // // eslint-disable-next-line no-console + // console.debug('finetuningService.createFineTuningJob - adding lora_config=null for task:', sanitizedPayload.General.task) + // } + // } + + // Remove empty nested objects that may confuse the server + if (sanitizedPayload.General && Object.keys(sanitizedPayload.General).length === 0) { + delete sanitizedPayload.General + } + if (sanitizedPayload.Dataset && Object.keys(sanitizedPayload.Dataset).length === 0) { + delete sanitizedPayload.Dataset + } + if (sanitizedPayload.Training && Object.keys(sanitizedPayload.Training).length === 0) { + delete sanitizedPayload.Training + } + + // // For embedding/rerank tasks, only send training_file, model, and General (as per documentation examples) + // // Additional Dataset/Training params may cause 500 errors + // const task = sanitizedPayload.General?.task + // if (task === 'embedding' || task === 'rerank') { + // // Create minimal payload for embedding/rerank + // const minimalPayload: any = { + // training_file: sanitizedPayload.training_file, + // model: sanitizedPayload.model, + // General: sanitizedPayload.General + // } + // // Only include Dataset/Training if they have non-default values + // // eslint-disable-next-line no-console + // console.debug('finetuningService.createFineTuningJob - using minimal payload for', task, 'task') + // Object.assign(sanitizedPayload, minimalPayload) + // // Remove Dataset and Training for embedding/rerank to match documentation + // delete (sanitizedPayload as any).Dataset + // delete (sanitizedPayload as any).Training + // } + + // Use the stored raw filename from upload if available + // The upload response returns the exact filename as stored on the finetuning service + if (sanitizedPayload.training_file && typeof sanitizedPayload.training_file === 'string') { + const originalFilename = sanitizedPayload.training_file + + // Try to decode first in case it's URL-encoded + let lookupKey = originalFilename + try { + const decoded = decodeURIComponent(originalFilename) + lookupKey = decoded + } catch (e) { + // ignore decode errors + } + + // Check if we have a stored mapping from the upload + let stored = uploadedFileIdMap.get(lookupKey) + if (!stored && lookupKey !== originalFilename) { + // Also try the original (encoded) key + stored = uploadedFileIdMap.get(originalFilename) + } + + if (stored && stored.rawFilename) { + sanitizedPayload.training_file = stored.rawFilename + // eslint-disable-next-line no-console + console.debug('finetuningService.createFineTuningJob - using stored raw filename from upload:', stored.rawFilename) + } else { + // No stored mapping, try to use the original filename as-is + // The upload service may have stored it with the encoded name + sanitizedPayload.training_file = originalFilename + // eslint-disable-next-line no-console + console.debug('finetuningService.createFineTuningJob - no stored mapping found, using filename as-is:', originalFilename) + } + } + + // Remove training_file_id - the API doesn't accept it, only training_file is required + if ((sanitizedPayload as any).training_file_id) { + // eslint-disable-next-line no-console + console.debug('finetuningService.createFineTuningJob - removing training_file_id from payload') + delete (sanitizedPayload as any).training_file_id + } + + // Try a sequence of attempts to accommodate naming/encoding/id differences. + const attemptPost = async (payload: any, label = 'attempt') => { + try { + // eslint-disable-next-line no-console + console.debug(`finetuningService.createFineTuningJob - ${label} payload:`, payload) + const resp = await axiosClient.post('/v1/fine_tuning/jobs', payload) + // eslint-disable-next-line no-console + console.debug(`finetuningService.createFineTuningJob - ${label} response:`, typeof resp?.data === 'string' ? resp.data : JSON.stringify(resp?.data)) + return resp + } catch (err: any) { + // Log detailed info for debugging + try { + // eslint-disable-next-line no-console + console.error(`finetuningService.createFineTuningJob - ${label} failed`, { + message: err?.message, + status: err?.response?.status, + responseData: typeof err?.response?.data === 'string' ? err.response.data : JSON.stringify(err?.response?.data), + payload + }) + } catch (logErr) { + // ignore logging errors + } + throw err + } + } + + // Log the final sanitized payload + try { + // eslint-disable-next-line no-console + console.debug('finetuningService.createFineTuningJob - final sanitized payload:', JSON.stringify(sanitizedPayload, null, 2)) + } catch (e) { + // ignore + } + + // Send the sanitized payload + const resp = await attemptPost(sanitizedPayload, 'final') + const respData = resp.data + // Persist to local DB (best-effort) + try { + await persistJobToDb(respData) + } catch (e) { + // ignore + } + return respData + } catch (error: any) { + // Log error details from external service if available for debugging + try { + // eslint-disable-next-line no-console + console.error('finetuningService.createFineTuningJob - axios error:', { + message: error.message, + responseData: error.response ? (typeof error.response.data === 'string' ? error.response.data : JSON.stringify(error.response.data)) : undefined, + status: error.response ? error.response.status : undefined, + headers: error.response ? error.response.headers : undefined + }) + } catch (logErr) { + // ignore logging errors + } + throw new InternalFlowiseError( + StatusCodes.INTERNAL_SERVER_ERROR, + `Error: finetuningService.createFineTuningJob - ${getErrorMessage(error)}` + ) + } +} + +/** + * List all fine-tuning jobs + */ +const listFineTuningJobs = async () => { + try { + // First try to read persisted jobs from local DB + try { + const appServer = getRunningExpressApp() + const repo = appServer.AppDataSource.getRepository(FineTuningJob) + const persisted = await repo.find() + if (persisted && persisted.length > 0) { + return persisted + } + } catch (e) { + // If DB read fails, we'll fall back to external service + // eslint-disable-next-line no-console + console.debug('finetuningService.listFineTuningJobs - DB read failed, falling back to external service', e) + } + + // Fallback: query external finetuning service and persist results + const response = await axiosClient.get('/v1/fine_tuning/jobs') + const data = response.data + try { + if (Array.isArray(data)) { + for (const j of data) { + // best-effort persist + // eslint-disable-next-line no-await-in-loop + await persistJobToDb(j) + } + } + } catch (e) { + // ignore persistence errors + } + + return data + } catch (error: any) { + throw new InternalFlowiseError( + StatusCodes.INTERNAL_SERVER_ERROR, + `Error: finetuningService.listFineTuningJobs - ${getErrorMessage(error)}` + ) + } +} + +/** + * Retrieve a specific fine-tuning job + */ +const retrieveFineTuningJob = async (fineTuningJobId: string) => { + const maxAttempts = 3 + const baseDelayMs = 500 + + const sleep = (ms: number) => new Promise(resolve => setTimeout(resolve, ms)) + + for (let attempt = 1; attempt <= maxAttempts; attempt++) { + try { + // Log attempt for easier correlation in logs + // eslint-disable-next-line no-console + console.debug(`finetuningService.retrieveFineTuningJob - attempt ${attempt} for job ${fineTuningJobId}`) + + const response = await axiosClient.post('/v1/fine_tuning/jobs/retrieve', { + fine_tuning_job_id: fineTuningJobId + }) + const respData = response.data + // Persist/update DB with latest status (best-effort) + try { + await persistJobToDb(respData) + } catch (e) { + // ignore + } + return respData + } catch (error: any) { + const msg = getErrorMessage(error) + // eslint-disable-next-line no-console + console.warn(`finetuningService.retrieveFineTuningJob - attempt ${attempt} failed: ${msg}`) + + const isTransient = msg && ( + msg.toLowerCase().includes('socket hang up') || + msg.toLowerCase().includes('econnreset') || + msg.toLowerCase().includes('etimedout') || + msg.toLowerCase().includes('timeout') || + msg.toLowerCase().includes('connect') + ) + + if (attempt < maxAttempts && isTransient) { + const delay = baseDelayMs * Math.pow(2, attempt - 1) + // eslint-disable-next-line no-console + console.debug(`finetuningService.retrieveFineTuningJob - retrying in ${delay}ms`) + // eslint-disable-next-line no-await-in-loop + await sleep(delay) + continue + } + + // Final failure: log details and throw + try { + // eslint-disable-next-line no-console + console.error('finetuningService.retrieveFineTuningJob - error details:', { + message: error?.message, + status: error?.response?.status, + responseData: error?.response?.data + }) + } catch (logErr) { + // ignore logging errors + } + + throw new InternalFlowiseError( + StatusCodes.INTERNAL_SERVER_ERROR, + `Error: finetuningService.retrieveFineTuningJob - ${msg}` + ) + } + } + + throw new InternalFlowiseError( + StatusCodes.INTERNAL_SERVER_ERROR, + `Error: finetuningService.retrieveFineTuningJob - failed after ${maxAttempts} attempts` + ) +} + +/** + * Cancel a fine-tuning job + */ +const cancelFineTuningJob = async (fineTuningJobId: string) => { + try { + const response = await axiosClient.post('/v1/fine_tuning/jobs/cancel', { + fine_tuning_job_id: fineTuningJobId + }) + // Best-effort: update local DB to reflect cancelled status + try { + await updateJobInDb(fineTuningJobId, { status: 'cancelled', finishedDate: new Date() }) + } catch (e) { + // ignore + } + return response.data + } catch (error: any) { + throw new InternalFlowiseError( + StatusCodes.INTERNAL_SERVER_ERROR, + `Error: finetuningService.cancelFineTuningJob - ${getErrorMessage(error)}` + ) + } +} + +/** + * Delete a fine-tuning job locally and attempt to cancel it remotely. + * This will cancel the external job (best-effort) and remove DB records for the job and checkpoints. + */ +const deleteFineTuningJob = async (fineTuningJobId: string) => { + try { + // Attempt to cancel external job (best-effort) + try { + await axiosClient.post('/v1/fine_tuning/jobs/cancel', { + fine_tuning_job_id: fineTuningJobId + }) + } catch (e) { + // ignore external cancel errors + try { + // eslint-disable-next-line no-console + console.debug('finetuningService.deleteFineTuningJob - external cancel failed, continuing to delete locally', e) + } catch (logErr) {} + } + + // Remove local DB records (best-effort) + try { + const appServer = getRunningExpressApp() + const repo = appServer.AppDataSource.getRepository(FineTuningJob) + const checkpointRepo = appServer.AppDataSource.getRepository(FineTuningCheckpoint) + + // delete checkpoints first + await checkpointRepo.delete({ fine_tuning_job_id: String(fineTuningJobId) }) + // delete job + await repo.delete({ id: String(fineTuningJobId) }) + } catch (e) { + try { + // eslint-disable-next-line no-console + console.error('finetuningService.deleteFineTuningJob - failed to delete local DB records', e) + } catch (logErr) {} + } + + return { success: true } + } catch (error: any) { + throw new InternalFlowiseError( + StatusCodes.INTERNAL_SERVER_ERROR, + `Error: finetuningService.deleteFineTuningJob - ${getErrorMessage(error)}` + ) + } +} + +/** + * List checkpoints of a fine-tuning job + */ +const listFineTuningCheckpoints = async (fineTuningJobId: string) => { + try { + const response = await axiosClient.post('/v1/finetune/list_checkpoints', { + fine_tuning_job_id: fineTuningJobId + }) + return response.data + } catch (error: any) { + throw new InternalFlowiseError( + StatusCodes.INTERNAL_SERVER_ERROR, + `Error: finetuningService.listFineTuningCheckpoints - ${getErrorMessage(error)}` + ) + } +} + +/** + * Debug helper: forward any payload to the external finetuning job endpoint and return raw status/body + */ +const proxyJobDebug = async (payload: any) => { + try { + const resp = await axiosClient.post('/v1/fine_tuning/jobs', payload) + return { status: resp.status, body: resp.data } + } catch (error: any) { + // Return the status and response data (stringify if needed) + const status = error?.response?.status || 500 + const body = error?.response?.data || (error?.message || 'Unknown error') + return { status, body } + } +} + +export default { + uploadTrainingFile, + createFineTuningJob, + listFineTuningJobs, + retrieveFineTuningJob, + cancelFineTuningJob, + listFineTuningCheckpoints, + deleteFineTuningJob, + proxyJobDebug +} diff --git a/studio-frontend/packages/ui/src/api/client.js b/studio-frontend/packages/ui/src/api/client.js index d2dd873..7606ec2 100644 --- a/studio-frontend/packages/ui/src/api/client.js +++ b/studio-frontend/packages/ui/src/api/client.js @@ -4,7 +4,7 @@ import { baseURL } from '@/store/constant' const apiClient = axios.create({ baseURL: `${baseURL}/api/v1`, headers: { - 'Content-type': 'application/json', + 'x-request-from': 'internal' } }) diff --git a/studio-frontend/packages/ui/src/api/finetuning.js b/studio-frontend/packages/ui/src/api/finetuning.js index 3c7bd6d..ad3352a 100644 --- a/studio-frontend/packages/ui/src/api/finetuning.js +++ b/studio-frontend/packages/ui/src/api/finetuning.js @@ -1,62 +1,90 @@ import client from './client' const finetuningApi = { - // Get all fine-tuning jobs - getAllJobs: () => client.get('/finetuning/jobs'), + // Upload training file + uploadFile: (file, purpose = 'fine-tune', onUploadProgress) => { + const formData = new FormData() + formData.append('file', file) + formData.append('purpose', purpose) + + return client.post('/finetuning/files', formData, { + // DO NOT set Content-Type here; letting axios set it ensures the multipart boundary is included + onUploadProgress + }) + }, - // Create new fine-tuning job with OpenAI API format + // Create new fine-tuning job createJob: (jobData) => { const payload = { - model: jobData.model, - training_file: jobData.training_file_id, - validation_file: jobData.validation_file_id, - hyperparameters: { - n_epochs: jobData.hyperparameters.n_epochs, - batch_size: jobData.hyperparameters.batch_size, - learning_rate_multiplier: jobData.hyperparameters.learning_rate_multiplier, - prompt_loss_weight: jobData.hyperparameters.prompt_loss_weight - }, - suffix: jobData.suffix + training_file: jobData.training_file, + // forward training_file_id when available (server will prefer id) + ...(jobData.training_file_id ? { training_file_id: jobData.training_file_id } : {}), + model: jobData.model + } + + // Add optional General configuration + if (jobData.General) { + payload.General = jobData.General + } + + // Add optional Dataset configuration + if (jobData.Dataset) { + payload.Dataset = jobData.Dataset + } + + // Add optional Training configuration + if (jobData.Training) { + payload.Training = jobData.Training } return client.post('/finetuning/jobs', payload) }, - // Get specific fine-tuning job - getJob: (jobId) => client.get(`/finetuning/jobs/${jobId}`), + // List all fine-tuning jobs + getAllJobs: () => client.get('/finetuning/jobs'), - // Delete fine-tuning job - deleteJob: (jobId) => client.delete(`/finetuning/jobs/${jobId}`), + // Retrieve specific fine-tuning job + getJob: (fineTuningJobId) => { + return client.post('/finetuning/jobs/retrieve', { + fine_tuning_job_id: fineTuningJobId + }) + }, - // Upload dataset file with suffix - uploadFile: (file, suffix, onUploadProgress) => { - const formData = new FormData() - - // Generate suffixed filename - const fileExtension = '.' + file.name.split('.').pop() - const baseFileName = file.name.replace(fileExtension, '') - const suffixedFileName = `${baseFileName}-${suffix}${fileExtension}` - - // Append file with suffixed name - formData.append('file', file, suffixedFileName) - formData.append('purpose', 'fine-tune') // OpenAI API requirement - formData.append('suffix', suffix) - - return client.post('/files/upload', formData, { - headers: { - 'Content-Type': 'multipart/form-data' - }, - onUploadProgress + // Cancel a fine-tuning job + cancelJob: (fineTuningJobId) => { + return client.post('/finetuning/jobs/cancel', { + fine_tuning_job_id: fineTuningJobId + }) + }, + + // List checkpoints of a fine-tuning job + listCheckpoints: (fineTuningJobId) => { + return client.post('/finetuning/jobs/checkpoints', { + fine_tuning_job_id: fineTuningJobId }) }, - // Get available base models - getBaseModels: () => client.get('/finetuning/models'), + // Legacy compatibility methods + deleteJob: (jobId) => { + // Call the backend delete endpoint which will cancel remote job (best-effort) and remove local DB records + return client.post('/finetuning/jobs/delete', { fine_tuning_job_id: jobId }) + }, - // Download fine-tuned model - downloadModel: (jobId) => client.get(`/finetuning/jobs/${jobId}/download`, { - responseType: 'blob' - }) + // Get available base models (to be implemented on backend) + getBaseModels: () => { + // Return common models for now + return Promise.resolve({ + data: [ + 'meta-llama/Llama-2-7b-chat-hf', + 'meta-llama/Llama-2-7b-hf', + 'meta-llama/Llama-2-13b-hf', + 'BAAI/bge-reranker-large', + 'BAAI/bge-base-en-v1.5', + 'Qwen/Qwen2.5-3B', + 'Qwen/Qwen2.5-7B' + ] + }) + } } export default finetuningApi \ No newline at end of file diff --git a/studio-frontend/packages/ui/src/hooks/useApi.jsx b/studio-frontend/packages/ui/src/hooks/useApi.jsx index 932f0a6..8f594d9 100644 --- a/studio-frontend/packages/ui/src/hooks/useApi.jsx +++ b/studio-frontend/packages/ui/src/hooks/useApi.jsx @@ -10,6 +10,8 @@ export default (apiFunc) => { try { const result = await apiFunc(...args) setData(result.data) + // return the data so callers awaiting request() get the payload + return result.data } catch (err) { setError(err || 'Unexpected Error!') } finally { diff --git a/studio-frontend/packages/ui/src/ui-component/button/AnimateButton.jsx b/studio-frontend/packages/ui/src/ui-component/button/AnimateButton.jsx index ce2d3fb..98b8852 100644 --- a/studio-frontend/packages/ui/src/ui-component/button/AnimateButton.jsx +++ b/studio-frontend/packages/ui/src/ui-component/button/AnimateButton.jsx @@ -5,7 +5,7 @@ import { motion, useCycle } from 'framer-motion' // ==============================|| ANIMATION BUTTON ||============================== // -const AnimateButton = forwardRef(function AnimateButton({ children, type, direction, offset, scale }, ref) { +const AnimateButton = forwardRef(function AnimateButton({ children, type = 'scale', direction = 'right', offset = 10, scale = { hover: 1, tap: 0.9 } }, ref) { let offset1 let offset2 switch (direction) { @@ -84,14 +84,6 @@ AnimateButton.propTypes = { scale: PropTypes.oneOfType([PropTypes.number, PropTypes.object]) } -AnimateButton.defaultProps = { - type: 'scale', - offset: 10, - direction: 'right', - scale: { - hover: 1, - tap: 0.9 - } -} +// defaults handled via function parameter defaults export default AnimateButton diff --git a/studio-frontend/packages/ui/src/ui-component/extended/Transitions.jsx b/studio-frontend/packages/ui/src/ui-component/extended/Transitions.jsx index 4942dee..1b4ba51 100644 --- a/studio-frontend/packages/ui/src/ui-component/extended/Transitions.jsx +++ b/studio-frontend/packages/ui/src/ui-component/extended/Transitions.jsx @@ -6,7 +6,7 @@ import { Collapse, Fade, Box, Grow, Slide, Zoom } from '@mui/material' // ==============================|| TRANSITIONS ||============================== // -const Transitions = forwardRef(function Transitions({ children, position, type, direction, ...others }, ref) { +const Transitions = forwardRef(function Transitions({ children, position = 'top-left', type = 'grow', direction = 'up', ...others }, ref) { let positionSX = { transformOrigin: '0 0 0' } @@ -98,10 +98,6 @@ Transitions.propTypes = { direction: PropTypes.oneOf(['up', 'down', 'left', 'right']) } -Transitions.defaultProps = { - type: 'grow', - position: 'top-left', - direction: 'up' -} +// defaults handled via function parameter defaults export default Transitions diff --git a/studio-frontend/packages/ui/src/views/finetuning/FileUploadArea.jsx b/studio-frontend/packages/ui/src/views/finetuning/FileUploadArea.jsx index 07ef190..493f1bb 100644 --- a/studio-frontend/packages/ui/src/views/finetuning/FileUploadArea.jsx +++ b/studio-frontend/packages/ui/src/views/finetuning/FileUploadArea.jsx @@ -95,25 +95,21 @@ const FileUploadArea = ({ }, 100) try { - // Create file object with original name (server will handle suffix) - const fileForUpload = { - ...file, - originalName: file.name, - id: `file-${Date.now()}` // Simple ID for tracking - } - // Simulate upload delay await new Promise(resolve => setTimeout(resolve, 1000)) - + setUploadProgress(100) - setUploadedFile(fileForUpload) - + + // Store the actual File object so parent can upload the real Blob/File + setUploadedFile(file) + // Generate preview await previewFile(file) - - // Notify parent component - onFileUpload(fileForUpload) - + + // Notify parent with the real File object (not a plain JS object) + // Parent will perform the FormData upload and receive server response + onFileUpload(file) + setTimeout(() => setUploadProgress(0), 500) } catch (error) { console.error('Upload error:', error) @@ -243,8 +239,8 @@ const FileUploadArea = ({ } - label="Uploaded" - color="success" + label="Selected" + color="primary" size="small" variant="outlined" /> diff --git a/studio-frontend/packages/ui/src/views/finetuning/FinetuningJobModal.jsx b/studio-frontend/packages/ui/src/views/finetuning/FinetuningJobModal.jsx index c389a54..fb5f859 100644 --- a/studio-frontend/packages/ui/src/views/finetuning/FinetuningJobModal.jsx +++ b/studio-frontend/packages/ui/src/views/finetuning/FinetuningJobModal.jsx @@ -16,7 +16,10 @@ import { MenuItem, Typography, Stack, + Checkbox, + FormControlLabel, IconButton, + CircularProgress, Grid } from '@mui/material' import { useTheme } from '@mui/material/styles' @@ -88,14 +91,26 @@ const FinetuningJobModal = ({ open, onClose, onJobCreated }) => { const [errors, setErrors] = useState({}) const [isSubmitting, setIsSubmitting] = useState(false) + const [loraEnabled, setLoraEnabled] = useState(false) + + const baseModels = [ - 'gpt-3.5-turbo', - 'gpt-4', - 'llama-2-7b', - 'llama-2-13b', - 'mistral-7b', - 'codellama-7b', - 'falcon-7b' + 'meta-llama/Llama-2-7b-chat-hf', + 'meta-llama/Llama-2-7b-hf', + 'meta-llama/Llama-2-13b-hf', + 'BAAI/bge-reranker-large', + 'BAAI/bge-base-en-v1.5', + 'Qwen/Qwen2.5-3B', + 'Qwen/Qwen2.5-7B' + ] + + const taskTypes = [ + { value: 'instruction_tuning', label: 'Instruction Tuning' }, + { value: 'rerank', label: 'Reranking' }, + { value: 'embedding', label: 'Embedding' }, + { value: 'pretraining', label: 'Pretraining' }, + { value: 'dpo', label: 'Direct Preference Optimization (DPO)' }, + { value: 'reasoning', label: 'Reasoning' } ] const handleInputChange = (field, value) => { @@ -133,13 +148,29 @@ const FinetuningJobModal = ({ open, onClose, onJobCreated }) => { })) } + // When a file is selected in FileUploadArea, just store the File object locally. + // The actual upload to the server will happen when the user clicks Create Job. const handleFileUpload = (fileType, file) => { + if (!file) { + setFormData(prev => ({ + ...prev, + [fileType]: null + })) + return + } + + // Store the raw File object and its name; do not upload now + const fileEntry = { + file, + name: file.name + } + setFormData(prev => ({ ...prev, - [fileType]: file + [fileType]: fileEntry })) - - // Clear error for this field + + // Clear any previous error for this field if (errors[fileType]) { setErrors(prev => ({ ...prev, @@ -186,17 +217,19 @@ const FinetuningJobModal = ({ open, onClose, onJobCreated }) => { newErrors.logging_steps = 'Logging steps must be greater than 0' } - // LoRA parameters validation - if (formData.lora.r <= 0) { - newErrors.lora_r = 'LoRA rank must be greater than 0' - } + // LoRA parameters validation (only when enabled) + if (loraEnabled) { + if (formData.lora.r <= 0) { + newErrors.lora_r = 'LoRA rank must be greater than 0' + } - if (formData.lora.lora_alpha <= 0) { - newErrors.lora_alpha = 'LoRA alpha must be greater than 0' - } + if (formData.lora.lora_alpha <= 0) { + newErrors.lora_alpha = 'LoRA alpha must be greater than 0' + } - if (formData.lora.lora_dropout < 0 || formData.lora.lora_dropout > 1) { - newErrors.lora_dropout = 'LoRA dropout must be between 0 and 1' + if (formData.lora.lora_dropout < 0 || formData.lora.lora_dropout > 1) { + newErrors.lora_dropout = 'LoRA dropout must be between 0 and 1' + } } setErrors(newErrors) @@ -211,41 +244,70 @@ const FinetuningJobModal = ({ open, onClose, onJobCreated }) => { setIsSubmitting(true) try { - // TODO: Replace with actual API call - // const response = await finetuningApi.createJob({ - // model: formData.baseModel, - // training_file_id: formData.trainingDataset?.id, - // General: formData.general, - // Dataset: formData.dataset, - // Training: formData.training, - // openai_params: formData.openai_params, - // lora_config: formData.lora - // }) - - // Generate job name automatically based on model and timestamp for simulation - const timestamp = new Date().toISOString().replace(/[:.]/g, '-').slice(0, 19) - const jobId = `ft-${formData.baseModel}-${timestamp}` + // Create the job configuration payload + // Build General object and set lora_config based on the LoRA checkbox + const generalPayload = { ...formData.general } + // If user enabled LoRA, include the object; otherwise send explicit null + generalPayload.lora_config = loraEnabled ? formData.lora : null + + // If the user selected a file but hasn't uploaded it yet, upload it now + let trainingFileName = formData.trainingDataset?.uploadedName || formData.trainingDataset?.name + let trainingFileId = formData.trainingDataset?.id + if (formData.trainingDataset && formData.trainingDataset.file) { + try { + setIsSubmitting(true) + const uploadResp = await finetuningApi.uploadFile(formData.trainingDataset.file, 'fine-tune', (progressEvent) => { + // we could wire progress to UI if desired + }) + trainingFileName = uploadResp.data?.filename || trainingFileName || formData.trainingDataset.file.name + trainingFileId = uploadResp.data?.id || trainingFileId + } catch (err) { + console.error('Error uploading training file before job creation:', err) + setErrors(prev => ({ ...prev, trainingDataset: 'Failed to upload training file: ' + (err.message || 'Unknown') })) + setIsSubmitting(false) + return + } + } + + const jobPayload = { + model: formData.baseModel, + // Use uploaded filename/id (if available) + training_file: trainingFileName, + training_file_id: trainingFileId, + General: generalPayload, + Dataset: { + max_length: formData.dataset.max_length, + query_max_len: formData.dataset.query_max_len, + passage_max_len: formData.dataset.passage_max_len, + padding: formData.dataset.padding_side + }, + Training: { + epochs: formData.training.epochs, + batch_size: formData.openai_params.batch_size, + gradient_accumulation_steps: formData.training.gradient_accumulation_steps + } + } + + // Call the actual API + const response = await finetuningApi.createJob(jobPayload) + // Create job object from response const newJob = { - id: jobId, - name: jobId, - status: 'pending', + id: response.data?.id || response.data?.fine_tuning_job_id || Date.now().toString(), + name: response.data?.id || response.data?.fine_tuning_job_id || `ft-${formData.baseModel}`, + status: response.data?.status || 'pending', model: formData.baseModel, dataset: formData.trainingDataset?.suffixedName || formData.trainingDataset?.name || 'Unknown', progress: '0%', - createdDate: new Date().toISOString(), + createdDate: response.data?.created_at || new Date().toISOString(), // Include all configuration sections openai_params: formData.openai_params, general: formData.general, dataset_config: formData.dataset, training: formData.training, - lora: formData.lora, - training_file_id: formData.trainingDataset?.id + training_file: jobPayload.training_file } - // Simulate API call delay - await new Promise(resolve => setTimeout(resolve, 1000)) - onJobCreated(newJob) handleClose() } catch (error) { @@ -300,14 +362,10 @@ const FinetuningJobModal = ({ open, onClose, onJobCreated }) => { accelerate_mode: 'DDP', hpu_execution_mode: 'lazy', num_training_workers: 1 - }, - lora: { - r: 8, - lora_alpha: 32, - lora_dropout: 0.1, - task_type: 'CAUSAL_LM' } }) + setLoraEnabled(false) + setFormData(prev => ({ ...prev, lora: { r: 8, lora_alpha: 32, lora_dropout: 0.1, task_type: 'CAUSAL_LM' } })) setErrors({}) setIsSubmitting(false) onClose() @@ -488,18 +546,31 @@ const FinetuningJobModal = ({ open, onClose, onJobCreated }) => { fullWidth size="medium" /> + + LoRA Configuration + setLoraEnabled(e.target.checked)} + /> + } + label="Enable LoRA" + /> + handleConfigChange('lora', 'r', parseInt(e.target.value))} - error={!!errors.lora_r} - inputProps={{ min: 1, max: 128, step: 1 }} - size="medium" + type="number" + value={formData.lora.r} + onChange={(e) => handleConfigChange('lora', 'r', parseInt(e.target.value))} + error={!!errors.lora_r} + inputProps={{ min: 1, max: 128, step: 1 }} + size="medium" fullWidth + disabled={!loraEnabled} /> @@ -512,7 +583,7 @@ const FinetuningJobModal = ({ open, onClose, onJobCreated }) => { inputProps={{ min: 1, max: 256, step: 1 }} size="medium" fullWidth - sx={{ '& .MuiInputBase-root': { minHeight: 48 } }} + disabled={!loraEnabled} /> @@ -525,7 +596,7 @@ const FinetuningJobModal = ({ open, onClose, onJobCreated }) => { inputProps={{ min: 0, max: 1, step: 0.01 }} size="medium" fullWidth - sx={{ '& .MuiInputBase-root': { minHeight: 48 } }} + disabled={!loraEnabled} /> @@ -685,7 +756,14 @@ const FinetuningJobModal = ({ open, onClose, onJobCreated }) => { size="large" sx={{ minWidth: 140 }} > - {isSubmitting ? 'Creating...' : 'Create Job'} + {isSubmitting ? ( + <> + + Creating... + + ) : ( + 'Create Job' + )} diff --git a/studio-frontend/packages/ui/src/views/finetuning/FinetuningJobsTable.jsx b/studio-frontend/packages/ui/src/views/finetuning/FinetuningJobsTable.jsx index e551fab..7c4ff2c 100644 --- a/studio-frontend/packages/ui/src/views/finetuning/FinetuningJobsTable.jsx +++ b/studio-frontend/packages/ui/src/views/finetuning/FinetuningJobsTable.jsx @@ -18,31 +18,63 @@ import { Typography, IconButton, Menu, - MenuItem + MenuItem, + Dialog, + DialogTitle, + DialogContent, + DialogActions, + List, + ListItem, + ListItemText } from '@mui/material' import { useTheme } from '@mui/material/styles' // icons -import { IconDots, IconEye, IconTrash, IconDownload } from '@tabler/icons-react' - -// utils - simple date formatting helper -const formatDistanceToNow = (date) => { - const now = new Date() - const diff = now - new Date(date) - const days = Math.floor(diff / (1000 * 60 * 60 * 24)) - const hours = Math.floor(diff / (1000 * 60 * 60)) - const minutes = Math.floor(diff / (1000 * 60)) - - if (days > 0) return `${days} day${days > 1 ? 's' : ''} ago` - if (hours > 0) return `${hours} hour${hours > 1 ? 's' : ''} ago` - if (minutes > 0) return `${minutes} minute${minutes > 1 ? 's' : ''} ago` - return 'Just now' +import { IconDots, IconEye, IconTrash, IconDownload, IconPlayerStop, IconCheckbox } from '@tabler/icons-react' + +// API +import finetuningApi from '@/api/finetuning' + +// utils - format created date as 'MonthName DayOrdinal, Year' e.g. 'September 4th, 2025' +const formatDate = (date) => { + if (!date) return 'Unknown' + let dt + try { + if (typeof date === 'number') { + dt = date < 1e12 ? new Date(date * 1000) : new Date(date) + } else if (typeof date === 'string' && /^\d+$/.test(date)) { + const n = parseInt(date, 10) + dt = n < 1e12 ? new Date(n * 1000) : new Date(n) + } else { + dt = new Date(date) + } + if (isNaN(dt.getTime())) return 'Unknown' + + const month = dt.toLocaleString('default', { month: 'long' }) + const day = dt.getDate() + const year = dt.getFullYear() + + const ordinal = (n) => { + const s = ["th", "st", "nd", "rd"] + const v = n % 100 + return s[(v - 20) % 10] || s[v] || s[0] + } + + return `${month} ${day}${ordinal(day)}, ${year}` + } catch (e) { + return 'Unknown' + } } -const FinetuningJobsTable = ({ data, isLoading }) => { +const FinetuningJobsTable = ({ data, isLoading = false, onRefresh = null }) => { const theme = useTheme() const [anchorEl, setAnchorEl] = useState(null) const [selectedJob, setSelectedJob] = useState(null) + const [actionLoading, setActionLoading] = useState(false) + const [detailsOpen, setDetailsOpen] = useState(false) + const [detailsData, setDetailsData] = useState(null) + const [checkpointsOpen, setCheckpointsOpen] = useState(false) + const [checkpointsData, setCheckpointsData] = useState(null) const handleMenuClick = (event, job) => { setAnchorEl(event.currentTarget) @@ -54,6 +86,60 @@ const FinetuningJobsTable = ({ data, isLoading }) => { setSelectedJob(null) } + const handleCancelJob = async () => { + if (!selectedJob) return + + setActionLoading(true) + try { + await finetuningApi.cancelJob(selectedJob.id) + handleMenuClose() + if (onRefresh) onRefresh() + } catch (error) { + console.error('Error canceling job:', error) + alert('Failed to cancel job: ' + (error.message || 'Unknown error')) + } finally { + setActionLoading(false) + } + } + + const handleViewCheckpoints = async (jobArg = null) => { + const jobToUse = jobArg || selectedJob + if (!jobToUse) return + + // ensure selectedJob is set for downstream operations + setSelectedJob(jobToUse) + + setActionLoading(true) + try { + const response = await finetuningApi.listCheckpoints(jobToUse.id) + setCheckpointsData(response.data) + setCheckpointsOpen(true) + handleMenuClose() + } catch (error) { + console.error('Error fetching checkpoints:', error) + alert('Failed to fetch checkpoints: ' + (error.message || 'Unknown error')) + } finally { + setActionLoading(false) + } + } + + const handleViewDetails = async () => { + if (!selectedJob) return + + setActionLoading(true) + try { + const response = await finetuningApi.getJob(selectedJob.id) + setDetailsData(response.data) + setDetailsOpen(true) + handleMenuClose() + } catch (error) { + console.error('Error fetching job details:', error) + alert('Failed to fetch job details: ' + (error.message || 'Unknown error')) + } finally { + setActionLoading(false) + } + } + const getStatusColor = (status) => { switch (status?.toLowerCase()) { case 'completed': @@ -101,14 +187,15 @@ const FinetuningJobsTable = ({ data, isLoading }) => { - + Job Name Status Model Dataset Progress - Created Date + Checkpoints Actions + Created Date @@ -150,12 +237,13 @@ const FinetuningJobsTable = ({ data, isLoading }) => { - - {job.createdDate - ? formatDistanceToNow(job.createdDate) - : 'Unknown' - } - + { + + + {job.createdDate ? formatDate(job.createdDate) : 'Unknown'} + + ))} @@ -176,36 +269,86 @@ const FinetuningJobsTable = ({ data, isLoading }) => { onClose={handleMenuClose} anchorOrigin={{ vertical: 'bottom', horizontal: 'left' }} > - { /* TODO: View job details */ handleMenuClose() }}> + View Details + {/* View Checkpoints removed from Actions menu: use the Checkpoints column button to open the modal */} { /* TODO: Download model */ handleMenuClose() }} - disabled={selectedJob?.status !== 'completed'} + onClick={handleCancelJob} + disabled={actionLoading || selectedJob?.status === 'completed' || selectedJob?.status === 'cancelled' || selectedJob?.status === 'failed'} > - - Download Model + + Cancel Job { /* TODO: Delete job */ handleMenuClose() }} - sx={{ color: 'error.main' }} + onClick={async () => { + if (!selectedJob) return + if (!window.confirm('Are you sure you want to delete this job?')) return + setActionLoading(true) + try { + await finetuningApi.deleteJob(selectedJob.id) + handleMenuClose() + if (onRefresh) onRefresh() + } catch (error) { + console.error('Error deleting job:', error) + alert('Failed to delete job: ' + (error.message || 'Unknown error')) + } finally { + setActionLoading(false) + } + }} + disabled={actionLoading} > Delete Job + + {/* Details Dialog */} + setDetailsOpen(false)} maxWidth="md" fullWidth> + Job Details + + {detailsData ? ( +
{JSON.stringify(detailsData, null, 2)}
+ ) : ( + No details available + )} +
+ + + +
+ + {/* Checkpoints Dialog */} + setCheckpointsOpen(false)} maxWidth="md" fullWidth> + Checkpoints + + {checkpointsData && Array.isArray(checkpointsData) && checkpointsData.length > 0 ? ( + + {checkpointsData.map((cp) => ( + + + + ))} + + ) : ( + No checkpoints available + )} + + + + + ) } FinetuningJobsTable.propTypes = { data: PropTypes.array.isRequired, - isLoading: PropTypes.bool + isLoading: PropTypes.bool, + onRefresh: PropTypes.func } -FinetuningJobsTable.defaultProps = { - isLoading: false -} +// default props handled via function default parameters export default FinetuningJobsTable \ No newline at end of file diff --git a/studio-frontend/packages/ui/src/views/finetuning/index.jsx b/studio-frontend/packages/ui/src/views/finetuning/index.jsx index 6219ff5..16e5594 100644 --- a/studio-frontend/packages/ui/src/views/finetuning/index.jsx +++ b/studio-frontend/packages/ui/src/views/finetuning/index.jsx @@ -63,7 +63,30 @@ const Finetuning = () => { try { setLoading(true) const response = await getAllJobsApi.request() - setJobs(response || []) + // Normalize server objects (TypeORM entities or external API objects) + const normalizeJob = (j) => { + if (!j) return null + const id = j.id || j.job_id || j.fine_tuning_job_id || String(Date.now()) + const name = j.name || id + const status = j.status || j.state || 'pending' + const model = j.model || 'N/A' + const dataset = j.dataset || j.training_file || j.trainingFile || 'N/A' + const progress = typeof j.progress === 'number' ? `${j.progress}%` : (j.progress || '0%') + const createdDate = j.createdDate || j.created_at || j.createdAt || new Date().toISOString() + return { + ...j, + id, + name, + status, + model, + dataset, + progress, + createdDate + } + } + + const jobsData = Array.isArray(response) ? response.map(normalizeJob).filter(Boolean) : [] + setJobs(jobsData) setLoading(false) } catch (error) { console.error('Error loading fine-tuning jobs:', error) @@ -74,7 +97,14 @@ const Finetuning = () => { } const handleCreateJob = () => { - setJobModalOpen(true) + try { + if (document.activeElement instanceof HTMLElement) { + document.activeElement.blur() + } + } catch (e) { + // ignore in non-browser environments + } + setTimeout(() => setJobModalOpen(true), 0) } const handleJobCreated = (newJob) => { @@ -83,7 +113,12 @@ const Finetuning = () => { } const filterJobs = (jobs) => { - return jobs.filter((job) => job.name.toLowerCase().includes(search.toLowerCase())) + if (!search || search.trim() === '') return jobs + const q = search.toLowerCase() + return jobs.filter((job) => { + const name = (job?.name || job?.id || '').toString().toLowerCase() + return name.includes(q) + }) } return ( @@ -149,6 +184,7 @@ const Finetuning = () => { )} From 697ee32182b88a38034f0d64bdff8015b64a64b8 Mon Sep 17 00:00:00 2001 From: wwanarif Date: Wed, 15 Oct 2025 09:31:27 +0000 Subject: [PATCH 08/23] updates on the db and ft job view Signed-off-by: wwanarif --- .../src/database/entities/FineTuningJob.ts | 23 +--- .../1760424809635-AddFineTuningTables.ts | 9 +- .../1760424809635-AddFineTuningTables.ts | 9 +- .../server/src/services/finetuning/index.ts | 37 ++++-- .../views/finetuning/FinetuningJobModal.jsx | 2 +- .../views/finetuning/FinetuningJobsTable.jsx | 62 +++++++---- .../ui/src/views/finetuning/index.jsx | 105 +++++++++++++++++- 7 files changed, 173 insertions(+), 74 deletions(-) diff --git a/studio-frontend/packages/server/src/database/entities/FineTuningJob.ts b/studio-frontend/packages/server/src/database/entities/FineTuningJob.ts index 342cd06..ba74cf9 100644 --- a/studio-frontend/packages/server/src/database/entities/FineTuningJob.ts +++ b/studio-frontend/packages/server/src/database/entities/FineTuningJob.ts @@ -1,13 +1,10 @@ -import { Entity, Column, PrimaryColumn, CreateDateColumn, UpdateDateColumn } from 'typeorm' +import { Entity, Column, PrimaryColumn, CreateDateColumn } from 'typeorm' @Entity('fine_tuning_job') export class FineTuningJob { @PrimaryColumn() id!: string - @Column({ nullable: true }) - name?: string - @Column({ nullable: true }) model?: string @@ -20,12 +17,6 @@ export class FineTuningJob { @Column({ nullable: true }) training_file?: string - @Column({ nullable: true }) - training_file_id?: string - - @Column({ type: 'text', nullable: true }) - lora_config?: string - @Column({ type: 'text', nullable: true }) hyperparameters?: string @@ -35,21 +26,9 @@ export class FineTuningJob { @Column({ type: 'text', nullable: true }) error?: string - @Column({ nullable: true, type: 'int' }) - progress?: number - @Column({ nullable: true, type: 'int' }) trained_tokens?: number - @Column({ nullable: true, type: 'datetime' }) - estimated_finish?: Date - - @Column({ nullable: true, type: 'datetime' }) - finishedDate?: Date - @CreateDateColumn({ type: 'datetime' }) createdDate!: Date - - @UpdateDateColumn({ type: 'datetime' }) - updatedDate!: Date } diff --git a/studio-frontend/packages/server/src/database/migrations/mysql/1760424809635-AddFineTuningTables.ts b/studio-frontend/packages/server/src/database/migrations/mysql/1760424809635-AddFineTuningTables.ts index 1b9bb8b..67381d4 100644 --- a/studio-frontend/packages/server/src/database/migrations/mysql/1760424809635-AddFineTuningTables.ts +++ b/studio-frontend/packages/server/src/database/migrations/mysql/1760424809635-AddFineTuningTables.ts @@ -5,22 +5,15 @@ export class AddFineTuningTables1760424809635 implements MigrationInterface { await queryRunner.query( `CREATE TABLE IF NOT EXISTS fine_tuning_job ( id varchar(255) PRIMARY KEY NOT NULL, - name varchar(255), model varchar(255), task varchar(255), status varchar(255), training_file varchar(255), - training_file_id varchar(255), - lora_config longtext, hyperparameters longtext, result_files longtext, error longtext, - progress int, trained_tokens int, - estimated_finish datetime, - finishedDate datetime, - createdDate datetime NOT NULL DEFAULT CURRENT_TIMESTAMP, - updatedDate datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP + createdDate datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ) ENGINE=InnoDB; ` ) diff --git a/studio-frontend/packages/server/src/database/migrations/sqlite/1760424809635-AddFineTuningTables.ts b/studio-frontend/packages/server/src/database/migrations/sqlite/1760424809635-AddFineTuningTables.ts index 15288cb..83e91d6 100644 --- a/studio-frontend/packages/server/src/database/migrations/sqlite/1760424809635-AddFineTuningTables.ts +++ b/studio-frontend/packages/server/src/database/migrations/sqlite/1760424809635-AddFineTuningTables.ts @@ -5,22 +5,15 @@ export class AddFineTuningTables1760424809635 implements MigrationInterface { await queryRunner.query( `CREATE TABLE IF NOT EXISTS "fine_tuning_job" ( "id" varchar PRIMARY KEY NOT NULL, - "name" varchar, "model" varchar, "task" varchar, "status" varchar, "training_file" varchar, - "training_file_id" varchar, - "lora_config" text, "hyperparameters" text, "result_files" text, "error" text, - "progress" integer, "trained_tokens" integer, - "estimated_finish" datetime, - "finishedDate" datetime, - "createdDate" datetime NOT NULL DEFAULT (datetime('now')), - "updatedDate" datetime NOT NULL DEFAULT (datetime('now')) + "createdDate" datetime NOT NULL DEFAULT (datetime('now')) );` ) diff --git a/studio-frontend/packages/server/src/services/finetuning/index.ts b/studio-frontend/packages/server/src/services/finetuning/index.ts index 34910d7..4ef8dae 100644 --- a/studio-frontend/packages/server/src/services/finetuning/index.ts +++ b/studio-frontend/packages/server/src/services/finetuning/index.ts @@ -108,6 +108,21 @@ const persistJobToDb = async (jobData: any) => { if (!id) return // Build entity object mapping common fields; fall back to stringifying objects + // Extract task robustly: prefer explicit jobData.task, then jobData.General.task (object or JSON string) + let taskVal: any = jobData.task || undefined + try { + if (!taskVal && jobData.General) { + if (typeof jobData.General === 'string') { + const parsed = JSON.parse(jobData.General) + taskVal = parsed?.task || taskVal + } else if (typeof jobData.General === 'object') { + taskVal = jobData.General?.task || taskVal + } + } + } catch (e) { + // ignore parse errors + } + const entity: any = { id: String(id), name: jobData.name || jobData.id || undefined, @@ -115,20 +130,11 @@ const persistJobToDb = async (jobData: any) => { status: jobData.status || jobData.state || undefined, training_file: jobData.training_file || jobData.trainingFile || undefined, training_file_id: jobData.training_file_id || undefined, - task: jobData.General?.task || jobData.task || undefined, + task: taskVal || undefined, progress: typeof jobData.progress === 'number' ? jobData.progress : undefined, trained_tokens: typeof jobData.trained_tokens === 'number' ? jobData.trained_tokens : undefined } - if (jobData.General) { - try { - entity.lora_config = typeof jobData.General.lora_config === 'object' - ? JSON.stringify(jobData.General.lora_config) - : jobData.General.lora_config ? String(jobData.General.lora_config) : undefined - } catch (e) { - // ignore - } - } if (jobData.hyperparameters) { try { @@ -364,6 +370,17 @@ const createFineTuningJob = async (jobConfig: { // Send the sanitized payload const resp = await attemptPost(sanitizedPayload, 'final') const respData = resp.data + // If the external service didn't echo back the task, preserve task from our sanitized payload + try { + const payloadTask = sanitizedPayload?.General?.task || sanitizedPayload?.task + if (payloadTask && !respData.task) { + // attach task so persistJobToDb stores it + try { respData.task = payloadTask } catch (e) { /* ignore */ } + } + } catch (e) { + // ignore + } + // Persist to local DB (best-effort) try { await persistJobToDb(respData) diff --git a/studio-frontend/packages/ui/src/views/finetuning/FinetuningJobModal.jsx b/studio-frontend/packages/ui/src/views/finetuning/FinetuningJobModal.jsx index fb5f859..07e1c4b 100644 --- a/studio-frontend/packages/ui/src/views/finetuning/FinetuningJobModal.jsx +++ b/studio-frontend/packages/ui/src/views/finetuning/FinetuningJobModal.jsx @@ -294,9 +294,9 @@ const FinetuningJobModal = ({ open, onClose, onJobCreated }) => { // Create job object from response const newJob = { id: response.data?.id || response.data?.fine_tuning_job_id || Date.now().toString(), - name: response.data?.id || response.data?.fine_tuning_job_id || `ft-${formData.baseModel}`, status: response.data?.status || 'pending', model: formData.baseModel, + task: formData.general?.task || formData.lora?.task_type || null, dataset: formData.trainingDataset?.suffixedName || formData.trainingDataset?.name || 'Unknown', progress: '0%', createdDate: response.data?.created_at || new Date().toISOString(), diff --git a/studio-frontend/packages/ui/src/views/finetuning/FinetuningJobsTable.jsx b/studio-frontend/packages/ui/src/views/finetuning/FinetuningJobsTable.jsx index 7c4ff2c..83fb09d 100644 --- a/studio-frontend/packages/ui/src/views/finetuning/FinetuningJobsTable.jsx +++ b/studio-frontend/packages/ui/src/views/finetuning/FinetuningJobsTable.jsx @@ -143,6 +143,7 @@ const FinetuningJobsTable = ({ data, isLoading = false, onRefresh = null }) => { const getStatusColor = (status) => { switch (status?.toLowerCase()) { case 'completed': + case 'succeeded': return 'success' case 'running': return 'primary' @@ -184,15 +185,17 @@ const FinetuningJobsTable = ({ data, isLoading = false, onRefresh = null }) => { } return ( - + <> + +
- Job Name + Job ID Status Model + Task Dataset - Progress Checkpoints Actions Created Date @@ -203,39 +206,49 @@ const FinetuningJobsTable = ({ data, isLoading = false, onRefresh = null }) => { - {job.name} + {job.id} - + {/* Status with blinking indicator when running; show Chip only for other statuses */} + {String(job.status).toLowerCase() === 'running' ? ( + + + {job.status} + + ) : ( + + )} {job.model || 'N/A'} + + + {job.task || job.task_type || job.taskType || 'N/A'} + + {job.dataset || 'N/A'} - - - - - {job.progress || '0%'} - - - + {/* Progress column removed per request */} - + + ) } diff --git a/studio-frontend/packages/ui/src/views/finetuning/index.jsx b/studio-frontend/packages/ui/src/views/finetuning/index.jsx index 16e5594..fdefd27 100644 --- a/studio-frontend/packages/ui/src/views/finetuning/index.jsx +++ b/studio-frontend/packages/ui/src/views/finetuning/index.jsx @@ -1,4 +1,4 @@ -import { useEffect, useState } from 'react' +import { useEffect, useState, useRef } from 'react' import { useNavigate } from 'react-router-dom' // material-ui @@ -52,11 +52,114 @@ const Finetuning = () => { getAllJobsApi = useApi(finetuningApi.getAllJobs) } + const pollingRef = useRef(null) + useEffect(() => { // Load fine-tuning jobs loadJobs() }, []) + // Polling: when there are running jobs, poll backend until all jobs are completed/failed + useEffect(() => { + // helper to clear existing interval + const stopPolling = () => { + if (pollingRef.current) { + clearInterval(pollingRef.current) + pollingRef.current = null + } + } + + // Only start polling if user is authenticated + if (!keycloak?.authenticated) { + stopPolling() + return + } + + const hasRunning = (jobs || []).some(j => (j?.status || '').toString().toLowerCase() === 'running') + if (!hasRunning) { + stopPolling() + return + } + + // If already polling, keep it + if (pollingRef.current) return + + // Start polling every 5 seconds โ€” for each running job call the retrieve endpoint + console.debug('[finetuning] starting polling for running jobs') + pollingRef.current = setInterval(async () => { + console.debug('[finetuning] poll tick - checking running jobs') + try { + // find running jobs from current state + const runningJobs = (jobs || []).filter(j => (j?.status || '').toString().toLowerCase() === 'running') + if (runningJobs.length === 0) { + console.debug('[finetuning] no running jobs found, stopping polling') + stopPolling() + return + } + + // Retrieve updated details for each running job in parallel + const promises = runningJobs.map(j => { + // finetuningApi.getJob returns an axios promise; we want the response.data + console.debug('[finetuning] retrieving job:', j.id) + return finetuningApi.getJob(j.id).then(res => res.data).catch(err => { + console.error('Error retrieving job', j.id, err) + return null + }) + }) + + const updated = await Promise.all(promises) + + // normalize updated jobs and merge into current jobs list + const normalizeJob = (j) => { + if (!j) return null + const id = j.id || j.job_id || j.fine_tuning_job_id || String(Date.now()) + const name = j.name || id + const status = j.status || j.state || 'pending' + const model = j.model || 'N/A' + const dataset = j.dataset || j.training_file || j.trainingFile || 'N/A' + const progress = typeof j.progress === 'number' ? `${j.progress}%` : (j.progress || '0%') + const createdDate = j.createdDate || j.created_at || j.createdAt || new Date().toISOString() + return { + ...j, + id, + name, + status, + model, + dataset, + progress, + createdDate + } + } + + setJobs(prev => { + const updatedMap = {} + updated.forEach(u => { + if (!u) return + const n = normalizeJob(u) + if (n) updatedMap[n.id] = n + }) + + const newList = (prev || []).map(p => updatedMap[p.id] ? { ...p, ...updatedMap[p.id] } : p) + + // determine whether to stop polling based on the merged list + const stillRunningLocal = newList.some(j => (j?.status || '').toString().toLowerCase() === 'running') + if (!stillRunningLocal) { + console.debug('[finetuning] no running jobs remain after merge, stopping polling') + // stopPolling will clear the interval; call asynchronously to avoid interfering with state update + setTimeout(() => stopPolling(), 0) + } + + return newList + }) + } catch (err) { + console.error('Error while polling fine-tuning jobs (retrieve):', err) + } + }, 5000) + + // cleanup on unmount or dependency change + return () => stopPolling() + }, [jobs, keycloak?.authenticated]) + const loadJobs = async () => { if (!getAllJobsApi) return From 16f5911cb8a1ebc071f3d67fb46aa50f57d90995 Mon Sep 17 00:00:00 2001 From: wwanarif Date: Fri, 17 Oct 2025 08:34:01 +0000 Subject: [PATCH 09/23] added logs in ft job table and streamlined its format. updated ft job create modal Signed-off-by: wwanarif --- studio-frontend/.env.development | 2 +- .../src/controllers/finetuning/index.ts | 32 ++ .../server/src/routes/finetuning/index.ts | 3 + .../server/src/services/finetuning/index.ts | 88 +++- .../packages/ui/src/api/finetuning.js | 8 + .../views/finetuning/FinetuningJobModal.jsx | 160 +++++-- .../views/finetuning/FinetuningJobsTable.jsx | 424 ++++++++++++++---- .../ui/src/views/finetuning/index.jsx | 78 +++- 8 files changed, 638 insertions(+), 157 deletions(-) diff --git a/studio-frontend/.env.development b/studio-frontend/.env.development index e3f00ca..8ca9dd0 100644 --- a/studio-frontend/.env.development +++ b/studio-frontend/.env.development @@ -3,4 +3,4 @@ VITE_DISABLE_KEYCLOAK=true NODE_ENV=development VITE_HOST=0.0.0.0 VITE_PORT=8088 -FINETUNING_SERVICE_URL=http://${HOST_IP}:8015 \ No newline at end of file +HOST_IP= # Command to get your host ip: ip route get 1.1.1.1 | awk '{print $7}' \ No newline at end of file diff --git a/studio-frontend/packages/server/src/controllers/finetuning/index.ts b/studio-frontend/packages/server/src/controllers/finetuning/index.ts index 211c8c6..6562a53 100644 --- a/studio-frontend/packages/server/src/controllers/finetuning/index.ts +++ b/studio-frontend/packages/server/src/controllers/finetuning/index.ts @@ -137,6 +137,37 @@ const listFineTuningCheckpoints = async (req: Request, res: Response, next: Next } } +/** + * Fetch Ray/job logs for a fine-tuning job + * POST /api/v1/finetuning/jobs/logs + * body: { fine_tuning_job_id: string, ray_job_id?: string, tail?: number } + */ +const getFineTuningJobLogs = async (req: Request, res: Response, next: NextFunction) => { + try { + if (typeof req.body === 'undefined' || !req.body.fine_tuning_job_id) { + throw new InternalFlowiseError( + StatusCodes.BAD_REQUEST, + 'Error: finetuningController.getFineTuningJobLogs - fine_tuning_job_id not provided!' + ) + } + + const fine_tuning_job_id = req.body.fine_tuning_job_id + const ray_job_id = req.body.ray_job_id + + try { + const apiResponse = await finetuningService.getFineTuningJobLogs(fine_tuning_job_id, { ray_job_id }) + // Service returns either { logs: string } or { logs: '', error: string } + return res.json(apiResponse) + } catch (err: any) { + // If the service throws, return a structured error payload instead of propagating a 500 + const message = err?.message || String(err) || 'Unknown error fetching logs' + return res.json({ logs: '', error: `Error: ${message}` }) + } + } catch (error) { + next(error) + } +} + /** * Debug: proxy an arbitrary job payload to the finetuning service and return raw response * POST /api/v1/finetuning/debug/proxy-job @@ -163,5 +194,6 @@ export default { cancelFineTuningJob, deleteFineTuningJob, listFineTuningCheckpoints, + getFineTuningJobLogs, proxyJobDebug } diff --git a/studio-frontend/packages/server/src/routes/finetuning/index.ts b/studio-frontend/packages/server/src/routes/finetuning/index.ts index 1ee9fe8..b45565b 100644 --- a/studio-frontend/packages/server/src/routes/finetuning/index.ts +++ b/studio-frontend/packages/server/src/routes/finetuning/index.ts @@ -22,6 +22,9 @@ router.get('/jobs', finetuningController.listFineTuningJobs) // Retrieve a specific fine-tuning job router.post('/jobs/retrieve', finetuningController.retrieveFineTuningJob) +// Fetch logs for a fine-tuning job +router.post('/jobs/logs', finetuningController.getFineTuningJobLogs) + // Cancel a fine-tuning job router.post('/jobs/cancel', finetuningController.cancelFineTuningJob) router.post('/jobs/delete', finetuningController.deleteFineTuningJob) diff --git a/studio-frontend/packages/server/src/services/finetuning/index.ts b/studio-frontend/packages/server/src/services/finetuning/index.ts index 4ef8dae..0df22e4 100644 --- a/studio-frontend/packages/server/src/services/finetuning/index.ts +++ b/studio-frontend/packages/server/src/services/finetuning/index.ts @@ -8,9 +8,9 @@ import { getRunningExpressApp } from '../../utils/getRunningExpressApp' import { FineTuningJob } from '../../database/entities/FineTuningJob' import { FineTuningCheckpoint } from '../../database/entities/FineTuningCheckpoint' -// Get finetuning service URL from environment variable or use default -// Note: use host network IP instead of localhost so containerized server can reach the finetuning service -const FINETUNING_SERVICE_URL = process.env.FINETUNING_SERVICE_URL || 'undefined' +// Derive finetuning service base URL from HOST_IP (default port 815) if not explicitly provided. +const FINETUNING_SERVICE_URL = process.env.HOST_IP ? `http://${process.env.HOST_IP}:8015` : 'undefined' +console.debug('finetuningService - FINETUNING_SERVICE_URL', FINETUNING_SERVICE_URL) // Create an axios client with keep-alive to reduce connection churn const agentOptions = { keepAlive: true, maxSockets: 20 } @@ -624,6 +624,87 @@ const proxyJobDebug = async (payload: any) => { } } +/** + * Get logs for a fine-tuning job by querying the Ray head node HTTP API. + * It will call: http:///api/jobs//logs + * Environment: set RAY_HEAD_NODE to the host:port of the Ray head (e.g. "ray-head.example.com:8265"). + */ +const getFineTuningJobLogs = async ( + fineTuningJobId: string, + options: { ray_job_id?: string } = {} +) => { + try { + // Determine Ray dashboard host (host:port). We only use HOST_IP to derive the Ray dashboard address. + const rayHost = process.env.HOST_IP ? `${process.env.HOST_IP}:8265` : 'undefined' + + // If caller provided an explicit ray_job_id, use it. Otherwise attempt to discover the Ray submission id + let submissionId: string | undefined = options.ray_job_id + + // Query Ray /api/jobs/ and select entries where entrypoint contains the FT id (jq-like) + const listUrl = `http://${rayHost}/api/jobs/` + console.debug('finetuningService.getFineTuningJobLogs - listUrl:', listUrl) + try { + const listResp = await axios.get(listUrl, { timeout: 20000 }) + // Debug: log status and length of Ray /api/jobs/ output; full dump only when explicitly enabled + try { + const raw = listResp.data + const len = typeof raw === 'string' ? raw.length : JSON.stringify(raw).length + console.debug('finetuningService.getFineTuningJobLogs - Ray /api/jobs/ status=', listResp.status, 'len=', len) + if (String(process.env.RAY_DUMP_JOBS).toLowerCase() === 'true') { + try { + const pretty = typeof raw === 'string' ? raw : JSON.stringify(raw, null, 2) + console.debug('finetuningService.getFineTuningJobLogs - Ray /api/jobs/ FULL DUMP:\n' + pretty) + } catch (e) { + try { console.debug('finetuningService.getFineTuningJobLogs - failed to stringify full Ray jobs list', String(e)) } catch (ignore) {} + } + } + } catch (logErr) { + try { console.debug('finetuningService.getFineTuningJobLogs - failed to inspect Ray jobs list', String(logErr)) } catch (ignore) {} + } + const jobsList = Array.isArray(listResp.data) ? listResp.data : [] + // Apply strict filter: entrypoint contains the exact FT id + const match = jobsList.find((j: any) => { + try { + const entrypoint = j?.entrypoint || '' + return String(entrypoint).includes(String(fineTuningJobId)) + } catch (e) { + return false + } + }) + if (match) { + submissionId = match.submission_id || match.job_id + } + } catch (e) { + try { console.error('finetuningService.getFineTuningJobLogs - failed to list Ray jobs', String(e)) } catch (err) {} + } + + // Construct logs URL with optional tail and fetch logs + const url = `http://${rayHost}/api/jobs/${encodeURIComponent(String(submissionId))}/logs` + const resp = await axios.get(url, { timeout: 30000 }) + // Normalize logs response so newlines are preserved and objects/arrays are readable + try { + const rawLogs = resp.data + if (typeof rawLogs === 'string') { + // string likely contains proper newlines + return { logs: rawLogs } + } + if (Array.isArray(rawLogs)) { + return { logs: rawLogs.join('\n') } + } + // object -> pretty-print with indentation to preserve newlines + return { logs: JSON.stringify(rawLogs, null, 2) } + } catch (e) { + // fallback to safe stringify + return { logs: JSON.stringify(resp.data, null, 2) } + } + } catch (error: any) { + // Provide helpful error details and return a structured error instead of throwing + const msg = `Error fetching logs: ${getErrorMessage(error)}` + try { (globalThis as any).console?.error && (globalThis as any).console.error('finetuningService.getFineTuningJobLogs -', String(error)) } catch (e) {} + return { logs: '', error: msg } + } +} + export default { uploadTrainingFile, createFineTuningJob, @@ -632,5 +713,6 @@ export default { cancelFineTuningJob, listFineTuningCheckpoints, deleteFineTuningJob, + getFineTuningJobLogs, proxyJobDebug } diff --git a/studio-frontend/packages/ui/src/api/finetuning.js b/studio-frontend/packages/ui/src/api/finetuning.js index ad3352a..c8363c8 100644 --- a/studio-frontend/packages/ui/src/api/finetuning.js +++ b/studio-frontend/packages/ui/src/api/finetuning.js @@ -64,6 +64,14 @@ const finetuningApi = { }) }, + // Get logs for a fine-tuning job + getJobLogs: (fineTuningJobId, opts = {}) => { + return client.post('/finetuning/jobs/logs', { + fine_tuning_job_id: fineTuningJobId, + ray_job_id: opts.ray_job_id + }) + }, + // Legacy compatibility methods deleteJob: (jobId) => { // Call the backend delete endpoint which will cancel remote job (best-effort) and remove local DB records diff --git a/studio-frontend/packages/ui/src/views/finetuning/FinetuningJobModal.jsx b/studio-frontend/packages/ui/src/views/finetuning/FinetuningJobModal.jsx index 07e1c4b..473c708 100644 --- a/studio-frontend/packages/ui/src/views/finetuning/FinetuningJobModal.jsx +++ b/studio-frontend/packages/ui/src/views/finetuning/FinetuningJobModal.jsx @@ -92,6 +92,9 @@ const FinetuningJobModal = ({ open, onClose, onJobCreated }) => { const [isSubmitting, setIsSubmitting] = useState(false) const [loraEnabled, setLoraEnabled] = useState(false) + const [datasetEnabled, setDatasetEnabled] = useState(true) + const [generalEnabled, setGeneralEnabled] = useState(true) + const [trainingEnabled, setTrainingEnabled] = useState(true) const baseModels = [ @@ -191,30 +194,52 @@ const FinetuningJobModal = ({ open, onClose, onJobCreated }) => { newErrors.trainingDataset = 'Training dataset is required' } - // OpenAI parameters validation - if (formData.openai_params.learning_rate_multiplier <= 0) { - newErrors.learning_rate_multiplier = 'Learning rate multiplier must be greater than 0' - } + // OpenAI parameters validation (only when training enabled) + if (trainingEnabled) { + if (formData.openai_params.learning_rate_multiplier <= 0) { + newErrors.learning_rate_multiplier = 'Learning rate multiplier must be greater than 0' + } - if (formData.openai_params.batch_size <= 0) { - newErrors.batch_size = 'Batch size must be greater than 0' - } + if (formData.openai_params.batch_size <= 0) { + newErrors.batch_size = 'Batch size must be greater than 0' + } - if (formData.openai_params.n_epochs <= 0) { - newErrors.n_epochs = 'Number of epochs must be greater than 0' + if (formData.openai_params.n_epochs <= 0) { + newErrors.n_epochs = 'Number of epochs must be greater than 0' + } } - // Training parameters validation - if (formData.training.learning_rate <= 0) { - newErrors.learning_rate = 'Learning rate must be greater than 0' + // Training parameters validation (only when enabled) + if (trainingEnabled) { + if (formData.training.learning_rate <= 0) { + newErrors.learning_rate = 'Learning rate must be greater than 0' + } + + if (formData.training.epochs <= 0) { + newErrors.epochs = 'Epochs must be greater than 0' + } + + if (formData.training.logging_steps <= 0) { + newErrors.logging_steps = 'Logging steps must be greater than 0' + } } - if (formData.training.epochs <= 0) { - newErrors.epochs = 'Epochs must be greater than 0' + // General validation (only when enabled) + if (generalEnabled) { + if (!formData.general.output_dir) { + newErrors.output_dir = 'Output directory is required' + } } - if (formData.training.logging_steps <= 0) { - newErrors.logging_steps = 'Logging steps must be greater than 0' + // Dataset validation (only when enabled) + if (datasetEnabled) { + if (!formData.dataset) { + newErrors.dataset = 'Dataset configuration is required' + } else { + if (formData.dataset.max_length <= 0) { + newErrors.dataset_max_length = 'Max length must be greater than 0' + } + } } // LoRA parameters validation (only when enabled) @@ -269,19 +294,38 @@ const FinetuningJobModal = ({ open, onClose, onJobCreated }) => { } } + // Build payload and only include sections that are enabled const jobPayload = { model: formData.baseModel, // Use uploaded filename/id (if available) training_file: trainingFileName, - training_file_id: trainingFileId, - General: generalPayload, - Dataset: { + training_file_id: trainingFileId + } + + if (generalEnabled) { + // If user enabled LoRA, include the object; otherwise send explicit null inside General + const gen = { ...formData.general } + gen.lora_config = loraEnabled ? formData.lora : null + jobPayload.General = gen + // set top-level task for DB compatibility + jobPayload.task = gen.task || 'instruction_tuning' + } else { + // General disabled: ensure DB task column is set to default + jobPayload.task = 'instruction_tuning' + } + + if (datasetEnabled) { + jobPayload.Dataset = { max_length: formData.dataset.max_length, + // fallback keys if some are undefined query_max_len: formData.dataset.query_max_len, passage_max_len: formData.dataset.passage_max_len, padding: formData.dataset.padding_side - }, - Training: { + } + } + + if (trainingEnabled) { + jobPayload.Training = { epochs: formData.training.epochs, batch_size: formData.openai_params.batch_size, gradient_accumulation_steps: formData.training.gradient_accumulation_steps @@ -296,18 +340,27 @@ const FinetuningJobModal = ({ open, onClose, onJobCreated }) => { id: response.data?.id || response.data?.fine_tuning_job_id || Date.now().toString(), status: response.data?.status || 'pending', model: formData.baseModel, - task: formData.general?.task || formData.lora?.task_type || null, + task: jobPayload.task || (loraEnabled ? formData.lora?.task_type : 'instruction_tuning'), dataset: formData.trainingDataset?.suffixedName || formData.trainingDataset?.name || 'Unknown', progress: '0%', createdDate: response.data?.created_at || new Date().toISOString(), - // Include all configuration sections - openai_params: formData.openai_params, - general: formData.general, - dataset_config: formData.dataset, - training: formData.training, training_file: jobPayload.training_file } + // Mirror payload sections in the newJob object for UI + if (trainingEnabled) { + newJob.openai_params = formData.openai_params + newJob.training = formData.training + } + + if (generalEnabled) { + newJob.general = formData.general + } + + if (datasetEnabled) { + newJob.dataset_config = formData.dataset + } + onJobCreated(newJob) handleClose() } catch (error) { @@ -396,7 +449,7 @@ const FinetuningJobModal = ({ open, onClose, onJobCreated }) => { - + {/* Top Left Quadrant: Model Configuration and Dataset Configuration */} @@ -404,7 +457,7 @@ const FinetuningJobModal = ({ open, onClose, onJobCreated }) => { {/* Model Configuration */} - Model Configuration + Model Configuration* Base Model @@ -429,9 +482,12 @@ const FinetuningJobModal = ({ open, onClose, onJobCreated }) => { {/* Dataset Configuration */} - - Dataset Configuration - + + + Dataset Configuration + + setDatasetEnabled(e.target.checked)} />} label="Enable" /> + @@ -444,6 +500,7 @@ const FinetuningJobModal = ({ open, onClose, onJobCreated }) => { size="medium" fullWidth sx={{ '& .MuiInputBase-root': { minHeight: 48 } }} + disabled={!datasetEnabled} /> @@ -456,6 +513,7 @@ const FinetuningJobModal = ({ open, onClose, onJobCreated }) => { size="medium" fullWidth sx={{ '& .MuiInputBase-root': { minHeight: 48 } }} + disabled={!datasetEnabled} /> @@ -465,6 +523,7 @@ const FinetuningJobModal = ({ open, onClose, onJobCreated }) => { value={formData.dataset.data_preprocess_type} onChange={(e) => handleConfigChange('dataset', 'data_preprocess_type', e.target.value)} label="Preprocess Type" + disabled={!datasetEnabled} > Neural Chat General @@ -481,6 +540,7 @@ const FinetuningJobModal = ({ open, onClose, onJobCreated }) => { size="medium" fullWidth sx={{ '& .MuiInputBase-root': { minHeight: 48 } }} + disabled={!datasetEnabled} /> @@ -509,9 +569,12 @@ const FinetuningJobModal = ({ open, onClose, onJobCreated }) => { {/* Bottom Left Quadrant: General Configuration + LoRA */} - - General Configuration - + + + General Configuration + + setGeneralEnabled(e.target.checked)} />} label="Enable" /> + Task Type @@ -519,6 +582,7 @@ const FinetuningJobModal = ({ open, onClose, onJobCreated }) => { value={formData.general.task} onChange={(e) => handleConfigChange('general', 'task', e.target.value)} label="Task Type" + disabled={!generalEnabled} > Instruction Tuning Pretraining @@ -534,6 +598,7 @@ const FinetuningJobModal = ({ open, onClose, onJobCreated }) => { value={formData.general.report_to} onChange={(e) => handleConfigChange('general', 'report_to', e.target.value)} label="Report To" + disabled={!generalEnabled} > None TensorBoard @@ -545,6 +610,7 @@ const FinetuningJobModal = ({ open, onClose, onJobCreated }) => { onChange={(e) => handleConfigChange('general', 'output_dir', e.target.value)} fullWidth size="medium" + disabled={!generalEnabled} /> LoRA Configuration @@ -607,10 +673,13 @@ const FinetuningJobModal = ({ open, onClose, onJobCreated }) => { {/* Bottom Right Quadrant: Training Configuration + OpenAI */} - - - Training Configuration - + + + + Training Configuration + + setTrainingEnabled(e.target.checked)} />} label="Enable" /> + @@ -625,6 +694,7 @@ const FinetuningJobModal = ({ open, onClose, onJobCreated }) => { size="medium" fullWidth sx={{ '& .MuiInputBase-root': { minHeight: 48 } }} + disabled={!trainingEnabled} /> @@ -638,6 +708,7 @@ const FinetuningJobModal = ({ open, onClose, onJobCreated }) => { size="medium" fullWidth sx={{ '& .MuiInputBase-root': { minHeight: 48 } }} + disabled={!trainingEnabled} /> @@ -652,13 +723,15 @@ const FinetuningJobModal = ({ open, onClose, onJobCreated }) => { size="medium" fullWidth sx={{ '& .MuiInputBase-root': { minHeight: 48 } }} + disabled={!trainingEnabled} /> Optimizer
- - - Job ID - Status - Model - Task - Dataset - Checkpoints - Actions - Created Date - - - - {data.map((job) => ( - - - - {job.id} - - - - {/* Status with blinking indicator when running; show Chip only for other statuses */} - {String(job.status).toLowerCase() === 'running' ? ( - - - {job.status} - - ) : ( - - )} - - - - {job.model || 'N/A'} - - - - - {job.task || job.task_type || job.taskType || 'N/A'} - - - - - {job.dataset || 'N/A'} - - - {/* Progress column removed per request */} - -
+ + + + handleRequestSort('id')} > - View - - - - handleMenuClick(e, job)} + ID + + + Status + Model + Task + Dataset + Checkpoints + Logs + Actions + + handleRequestSort('createdDate')} > - - - - - - {job.createdDate ? formatDate(job.createdDate) : 'Unknown'} - - - - ))} - -
- + Created Date + +
+ + + + {visibleData.length === 0 ? ( + + + No fine-tuning jobs match the current filter + + + ) : ( + visibleData.map((job) => ( + + + + {job.id} + + + + {/* Status with blinking indicator when running; show Chip only for other statuses */} + {String(job.status).toLowerCase() === 'running' ? ( + + + {job.status} + + ) : ( + + )} + + + + {job.model || 'N/A'} + + + + + {job.task || job.task_type || job.taskType || 'N/A'} + + + + + {job.dataset || 'N/A'} + + + + + + + + handleViewLogs(job)} title="Logs"> + + + + + + handleMenuClick(e, job)} + > + + + + + + {job.createdDate ? formatDate(job.createdDate) : 'Unknown'} + + + + )) + )} + + + { anchorOrigin={{ vertical: 'bottom', horizontal: 'left' }} > - + View Details {/* View Checkpoints removed from Actions menu: use the Checkpoints column button to open the modal */} @@ -352,7 +569,23 @@ const FinetuningJobsTable = ({ data, isLoading = false, onRefresh = null }) => { - + + {/* Logs Dialog */} + setLogsOpen(false)} maxWidth="lg" fullWidth> + Job Logs + + {logsLoading ? ( + Loading logs... + ) : ( +
+
{logsData || 'No logs available'}
+
+ )} +
+ + + +
) } @@ -360,7 +593,12 @@ const FinetuningJobsTable = ({ data, isLoading = false, onRefresh = null }) => { FinetuningJobsTable.propTypes = { data: PropTypes.array.isRequired, isLoading: PropTypes.bool, - onRefresh: PropTypes.func + onRefresh: PropTypes.func, + filterFunction: PropTypes.func +} + +FinetuningJobsTable.defaultProps = { + filterFunction: null } // default props handled via function default parameters diff --git a/studio-frontend/packages/ui/src/views/finetuning/index.jsx b/studio-frontend/packages/ui/src/views/finetuning/index.jsx index fdefd27..b8b42d0 100644 --- a/studio-frontend/packages/ui/src/views/finetuning/index.jsx +++ b/studio-frontend/packages/ui/src/views/finetuning/index.jsx @@ -2,7 +2,7 @@ import { useEffect, useState, useRef } from 'react' import { useNavigate } from 'react-router-dom' // material-ui -import { Box, Skeleton, Stack, ToggleButton, ToggleButtonGroup, Typography } from '@mui/material' +import { Box, Skeleton, Stack, ToggleButton, ToggleButtonGroup, Typography, Input } from '@mui/material' import { useTheme } from '@mui/material/styles' // project imports @@ -25,7 +25,7 @@ import finetuningApi from '@/api/finetuning' import useApi from '@/hooks/useApi' // icons -import { IconPlus, IconLayoutGrid, IconList } from '@tabler/icons-react' +import { IconPlus, IconLayoutGrid, IconList, IconSearch } from '@tabler/icons-react' //keycloak import { useKeycloak } from '../../KeycloakContext' @@ -215,13 +215,21 @@ const Finetuning = () => { setJobModalOpen(false) } - const filterJobs = (jobs) => { - if (!search || search.trim() === '') return jobs + const onSearchChange = (event) => { + setSearch(event.target.value) + } + + // Predicate function used by FinetuningJobsTable to show/hide rows + const filterJobs = (job) => { + if (!search || search.trim() === '') return true const q = search.toLowerCase() - return jobs.filter((job) => { - const name = (job?.name || job?.id || '').toString().toLowerCase() - return name.includes(q) - }) + const id = (job?.id || '').toString().toLowerCase() + const name = (job?.name || '').toString().toLowerCase() + const model = (job?.model || '').toString().toLowerCase() + const dataset = (job?.dataset || job?.training_file || '').toString().toLowerCase() + const task = (job?.task || job?.task_type || job?.taskType || '').toString().toLowerCase() + const status = (job?.status || '').toString().toLowerCase() + return id.includes(q) || name.includes(q) || model.includes(q) || dataset.includes(q) || task.includes(q) || status.includes(q) } return ( @@ -241,16 +249,47 @@ const Finetuning = () => { > Fine-tuning Jobs - - } - sx={{ borderRadius: 2, height: 40 }} - > - Create New Job - - + + + } + sx={{ borderRadius: 2, height: 40 }} + > + Create New Job + + + + + + + } + type='search' + /> + {isLoading ? ( @@ -285,7 +324,8 @@ const Finetuning = () => { ) : ( From 0b695123fc6dadfcb1b34a964aab4bea17650c8e Mon Sep 17 00:00:00 2001 From: wwanarif Date: Tue, 21 Oct 2025 08:23:19 +0000 Subject: [PATCH 10/23] integrate finetuning server in studio k8 namespace Signed-off-by: wwanarif --- .../manifests/studio-manifest.yaml | 61 ++++++++++++++++++- .../setup-genai-studio/studio-config.yaml | 3 +- studio-frontend/.env.development | 2 +- .../server/src/services/finetuning/index.ts | 6 +- 4 files changed, 65 insertions(+), 7 deletions(-) diff --git a/setup-scripts/setup-genai-studio/manifests/studio-manifest.yaml b/setup-scripts/setup-genai-studio/manifests/studio-manifest.yaml index 94d04c9..dc39230 100644 --- a/setup-scripts/setup-genai-studio/manifests/studio-manifest.yaml +++ b/setup-scripts/setup-genai-studio/manifests/studio-manifest.yaml @@ -434,6 +434,9 @@ spec: securityContext: {} image: ${REGISTRY}/studio-frontend:${TAG} imagePullPolicy: Always + envFrom: + - configMapRef: + name: studio-config env: - name: DATABASE_TYPE value: mysql @@ -611,4 +614,60 @@ spec: port: 8443 targetPort: 8443 selector: - app: keycloak \ No newline at end of file + app: keycloak + +--- +apiVersion: v1 +kind: Service +metadata: + name: finetuning-server + namespace: studio +spec: + type: ClusterIP + ports: + - name: api + protocol: TCP + port: 8015 + targetPort: 8015 + - name: dashboard + protocol: TCP + port: 8265 + targetPort: 8265 + selector: + app: finetuning-server + +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: finetuning-server + namespace: studio + labels: + app: finetuning-server +spec: + replicas: 1 + selector: + matchLabels: + app: finetuning-server + template: + metadata: + labels: + app: finetuning-server + spec: + # Mirror --ipc=host from docker run + # hostIPC: true + containers: + - name: finetuning-server + image: opea/finetuning:latest + imagePullPolicy: IfNotPresent + ports: + - containerPort: 8015 + - containerPort: 8265 + env: + - name: http_proxy + value: ${HTTP_PROXY} + - name: https_proxy + value: ${HTTP_PROXY} + - name: NO_PROXY + value: ${NO_PROXY} + resources: {} \ No newline at end of file diff --git a/setup-scripts/setup-genai-studio/studio-config.yaml b/setup-scripts/setup-genai-studio/studio-config.yaml index af9d391..e876692 100644 --- a/setup-scripts/setup-genai-studio/studio-config.yaml +++ b/setup-scripts/setup-genai-studio/studio-config.yaml @@ -14,4 +14,5 @@ data: APP_BACKEND_DNS: "app-backend.$namespace.svc.cluster.local:8899" APP_CHATHISTORY_DNS: "chathistory-mongo.$namespace.svc.cluster.local:6012" PREPARE_DOC_REDIS_PREP_DNS: "opea-prepare-doc-redis-prep-0.$namespace.svc.cluster.local:6007" - STUDIO_BACKEND_DNS: "studio-backend.studio.svc.cluster.local:5000" \ No newline at end of file + STUDIO_BACKEND_DNS: "studio-backend.studio.svc.cluster.local:5000" + FINETUNING_HOST: "finetuning-server.studio.svc.cluster.local" \ No newline at end of file diff --git a/studio-frontend/.env.development b/studio-frontend/.env.development index 8ca9dd0..0a6398e 100644 --- a/studio-frontend/.env.development +++ b/studio-frontend/.env.development @@ -3,4 +3,4 @@ VITE_DISABLE_KEYCLOAK=true NODE_ENV=development VITE_HOST=0.0.0.0 VITE_PORT=8088 -HOST_IP= # Command to get your host ip: ip route get 1.1.1.1 | awk '{print $7}' \ No newline at end of file +FINETUNING_HOST= # Command to get your host ip: ip route get 1.1.1.1 | awk '{print $7}' \ No newline at end of file diff --git a/studio-frontend/packages/server/src/services/finetuning/index.ts b/studio-frontend/packages/server/src/services/finetuning/index.ts index 0df22e4..89dcbc9 100644 --- a/studio-frontend/packages/server/src/services/finetuning/index.ts +++ b/studio-frontend/packages/server/src/services/finetuning/index.ts @@ -8,8 +8,7 @@ import { getRunningExpressApp } from '../../utils/getRunningExpressApp' import { FineTuningJob } from '../../database/entities/FineTuningJob' import { FineTuningCheckpoint } from '../../database/entities/FineTuningCheckpoint' -// Derive finetuning service base URL from HOST_IP (default port 815) if not explicitly provided. -const FINETUNING_SERVICE_URL = process.env.HOST_IP ? `http://${process.env.HOST_IP}:8015` : 'undefined' +const FINETUNING_SERVICE_URL = process.env.FINETUNING_HOST ? `http://${process.env.FINETUNING_HOST}:8015` : 'undefined' console.debug('finetuningService - FINETUNING_SERVICE_URL', FINETUNING_SERVICE_URL) // Create an axios client with keep-alive to reduce connection churn @@ -634,8 +633,7 @@ const getFineTuningJobLogs = async ( options: { ray_job_id?: string } = {} ) => { try { - // Determine Ray dashboard host (host:port). We only use HOST_IP to derive the Ray dashboard address. - const rayHost = process.env.HOST_IP ? `${process.env.HOST_IP}:8265` : 'undefined' + const rayHost = process.env.FINETUNING_HOST ? `${process.env.FINETUNING_HOST}:8265` : 'undefined' // If caller provided an explicit ray_job_id, use it. Otherwise attempt to discover the Ray submission id let submissionId: string | undefined = options.ray_job_id From 05e5cf3569c3384b90bb3b952656baf84ab7452d Mon Sep 17 00:00:00 2001 From: wwanarif Date: Fri, 24 Oct 2025 03:54:43 +0000 Subject: [PATCH 11/23] update improvements to ft job table and modal Signed-off-by: wwanarif --- .../src/controllers/finetuning/index.ts | 14 +-- .../server/src/services/finetuning/index.ts | 105 +----------------- .../packages/ui/src/api/finetuning.js | 2 - .../views/finetuning/FinetuningJobModal.jsx | 82 +++++++++----- .../views/finetuning/FinetuningJobsTable.jsx | 93 +++++++++++----- 5 files changed, 125 insertions(+), 171 deletions(-) diff --git a/studio-frontend/packages/server/src/controllers/finetuning/index.ts b/studio-frontend/packages/server/src/controllers/finetuning/index.ts index 6562a53..798dac7 100644 --- a/studio-frontend/packages/server/src/controllers/finetuning/index.ts +++ b/studio-frontend/packages/server/src/controllers/finetuning/index.ts @@ -29,13 +29,13 @@ const uploadTrainingFile = async (req: Request, res: Response, next: NextFunctio */ const createFineTuningJob = async (req: Request, res: Response, next: NextFunction) => { try { - const hasFile = !!req.body?.training_file || !!(req.body as any).training_file_id - if (!req.body || !hasFile || !req.body.model) { - throw new InternalFlowiseError( - StatusCodes.BAD_REQUEST, - 'Error: finetuningController.createFineTuningJob - model and training_file (or training_file_id) are required!' - ) - } + const hasFile = !!req.body?.training_file + if (!req.body || !hasFile || !req.body.model) { + throw new InternalFlowiseError( + StatusCodes.BAD_REQUEST, + 'Error: finetuningController.createFineTuningJob - model and training_file are required!' + ) + } const apiResponse = await finetuningService.createFineTuningJob(req.body) return res.json(apiResponse) diff --git a/studio-frontend/packages/server/src/services/finetuning/index.ts b/studio-frontend/packages/server/src/services/finetuning/index.ts index 89dcbc9..2acb1c3 100644 --- a/studio-frontend/packages/server/src/services/finetuning/index.ts +++ b/studio-frontend/packages/server/src/services/finetuning/index.ts @@ -58,32 +58,6 @@ const uploadTrainingFile = async (file: Express.Multer.File, purpose: string = ' // ignore logging errors } - // If the finetuning service returned an id and a filename, store mappings for both - try { - const returnedId = response?.data?.id || response?.data?.file_id || response?.data?.name || undefined - const returnedFilenameRaw = response?.data?.filename || response?.data?.name || undefined - if (returnedId && returnedFilenameRaw) { - // store both raw and decoded filename keys - let decodedFilename = returnedFilenameRaw - try { - decodedFilename = decodeURIComponent(returnedFilenameRaw) - } catch (e) { - // ignore decode errors - } - - const entry = { id: returnedId, rawFilename: returnedFilenameRaw } - uploadedFileIdMap.set(returnedFilenameRaw, entry) - if (decodedFilename !== returnedFilenameRaw) { - uploadedFileIdMap.set(decodedFilename, entry) - } - - // eslint-disable-next-line no-console - console.debug('finetuningService.uploadTrainingFile - stored mapping', decodedFilename, '<->', returnedFilenameRaw, '->', returnedId) - } - } catch (e) { - // ignore mapping errors - } - return response.data } catch (error: any) { throw new InternalFlowiseError( @@ -128,7 +102,6 @@ const persistJobToDb = async (jobData: any) => { model: jobData.model || undefined, status: jobData.status || jobData.state || undefined, training_file: jobData.training_file || jobData.trainingFile || undefined, - training_file_id: jobData.training_file_id || undefined, task: taskVal || undefined, progress: typeof jobData.progress === 'number' ? jobData.progress : undefined, trained_tokens: typeof jobData.trained_tokens === 'number' ? jobData.trained_tokens : undefined @@ -228,39 +201,9 @@ const createFineTuningJob = async (jobConfig: { // the external service may expect the raw (possibly URL-encoded) filename. const forwardedJobConfig = { ...jobConfig } - // Debug: log the jobConfig being forwarded to the external finetuning service - try { - // eslint-disable-next-line no-console - console.debug('finetuningService.createFineTuningJob - initial jobConfig:', forwardedJobConfig) - } catch (logErr) { - // ignore - } - // Sanitize the payload: remove undefined values and empty nested objects + // (Removed verbose initial jobConfig logging to reduce noise) const sanitizedPayload = JSON.parse(JSON.stringify(forwardedJobConfig)) - - // // Fix lora_config: must be explicitly null for rerank/embedding tasks, or omitted for instruction tuning - // if (sanitizedPayload?.General) { - // if (Object.prototype.hasOwnProperty.call(sanitizedPayload.General, 'lora_config')) { - // const task = sanitizedPayload.General.task - // if (task === 'rerank' || task === 'embedding') { - // // For rerank/embedding tasks, lora_config must be explicitly null - // sanitizedPayload.General.lora_config = null - // // eslint-disable-next-line no-console - // console.debug('finetuningService.createFineTuningJob - setting General.lora_config to null for task:', task) - // } else { - // // For instruction tuning or other tasks, remove lora_config - // // eslint-disable-next-line no-console - // console.debug('finetuningService.createFineTuningJob - removing General.lora_config for instruction tuning') - // delete sanitizedPayload.General.lora_config - // } - // } else if (sanitizedPayload.General.task === 'rerank' || sanitizedPayload.General.task === 'embedding') { - // // If lora_config is missing for rerank/embedding, add it as null - // sanitizedPayload.General.lora_config = null - // // eslint-disable-next-line no-console - // console.debug('finetuningService.createFineTuningJob - adding lora_config=null for task:', sanitizedPayload.General.task) - // } - // } - + // Remove empty nested objects that may confuse the server if (sanitizedPayload.General && Object.keys(sanitizedPayload.General).length === 0) { delete sanitizedPayload.General @@ -272,27 +215,6 @@ const createFineTuningJob = async (jobConfig: { delete sanitizedPayload.Training } - // // For embedding/rerank tasks, only send training_file, model, and General (as per documentation examples) - // // Additional Dataset/Training params may cause 500 errors - // const task = sanitizedPayload.General?.task - // if (task === 'embedding' || task === 'rerank') { - // // Create minimal payload for embedding/rerank - // const minimalPayload: any = { - // training_file: sanitizedPayload.training_file, - // model: sanitizedPayload.model, - // General: sanitizedPayload.General - // } - // // Only include Dataset/Training if they have non-default values - // // eslint-disable-next-line no-console - // console.debug('finetuningService.createFineTuningJob - using minimal payload for', task, 'task') - // Object.assign(sanitizedPayload, minimalPayload) - // // Remove Dataset and Training for embedding/rerank to match documentation - // delete (sanitizedPayload as any).Dataset - // delete (sanitizedPayload as any).Training - // } - - // Use the stored raw filename from upload if available - // The upload response returns the exact filename as stored on the finetuning service if (sanitizedPayload.training_file && typeof sanitizedPayload.training_file === 'string') { const originalFilename = sanitizedPayload.training_file @@ -314,23 +236,8 @@ const createFineTuningJob = async (jobConfig: { if (stored && stored.rawFilename) { sanitizedPayload.training_file = stored.rawFilename - // eslint-disable-next-line no-console - console.debug('finetuningService.createFineTuningJob - using stored raw filename from upload:', stored.rawFilename) - } else { - // No stored mapping, try to use the original filename as-is - // The upload service may have stored it with the encoded name - sanitizedPayload.training_file = originalFilename - // eslint-disable-next-line no-console - console.debug('finetuningService.createFineTuningJob - no stored mapping found, using filename as-is:', originalFilename) } } - - // Remove training_file_id - the API doesn't accept it, only training_file is required - if ((sanitizedPayload as any).training_file_id) { - // eslint-disable-next-line no-console - console.debug('finetuningService.createFineTuningJob - removing training_file_id from payload') - delete (sanitizedPayload as any).training_file_id - } // Try a sequence of attempts to accommodate naming/encoding/id differences. const attemptPost = async (payload: any, label = 'attempt') => { @@ -358,14 +265,6 @@ const createFineTuningJob = async (jobConfig: { } } - // Log the final sanitized payload - try { - // eslint-disable-next-line no-console - console.debug('finetuningService.createFineTuningJob - final sanitized payload:', JSON.stringify(sanitizedPayload, null, 2)) - } catch (e) { - // ignore - } - // Send the sanitized payload const resp = await attemptPost(sanitizedPayload, 'final') const respData = resp.data diff --git a/studio-frontend/packages/ui/src/api/finetuning.js b/studio-frontend/packages/ui/src/api/finetuning.js index c8363c8..c5636a1 100644 --- a/studio-frontend/packages/ui/src/api/finetuning.js +++ b/studio-frontend/packages/ui/src/api/finetuning.js @@ -17,8 +17,6 @@ const finetuningApi = { createJob: (jobData) => { const payload = { training_file: jobData.training_file, - // forward training_file_id when available (server will prefer id) - ...(jobData.training_file_id ? { training_file_id: jobData.training_file_id } : {}), model: jobData.model } diff --git a/studio-frontend/packages/ui/src/views/finetuning/FinetuningJobModal.jsx b/studio-frontend/packages/ui/src/views/finetuning/FinetuningJobModal.jsx index 473c708..39fd4ea 100644 --- a/studio-frontend/packages/ui/src/views/finetuning/FinetuningJobModal.jsx +++ b/studio-frontend/packages/ui/src/views/finetuning/FinetuningJobModal.jsx @@ -22,6 +22,7 @@ import { CircularProgress, Grid } from '@mui/material' +import Autocomplete from '@mui/material/Autocomplete' import { useTheme } from '@mui/material/styles' // icons @@ -39,6 +40,7 @@ const FinetuningJobModal = ({ open, onClose, onJobCreated }) => { const [formData, setFormData] = useState({ baseModel: '', trainingDataset: null, + hf_token: '', // OpenAI standard parameters openai_params: { n_epochs: 3, @@ -276,16 +278,12 @@ const FinetuningJobModal = ({ open, onClose, onJobCreated }) => { generalPayload.lora_config = loraEnabled ? formData.lora : null // If the user selected a file but hasn't uploaded it yet, upload it now - let trainingFileName = formData.trainingDataset?.uploadedName || formData.trainingDataset?.name - let trainingFileId = formData.trainingDataset?.id + let trainingFileName = formData.trainingDataset?.uploadedName || null if (formData.trainingDataset && formData.trainingDataset.file) { try { setIsSubmitting(true) - const uploadResp = await finetuningApi.uploadFile(formData.trainingDataset.file, 'fine-tune', (progressEvent) => { - // we could wire progress to UI if desired - }) - trainingFileName = uploadResp.data?.filename || trainingFileName || formData.trainingDataset.file.name - trainingFileId = uploadResp.data?.id || trainingFileId + const uploadResp = await finetuningApi.uploadFile(formData.trainingDataset.file, 'fine-tune', () => {}) + trainingFileName = uploadResp.data?.filename || null } catch (err) { console.error('Error uploading training file before job creation:', err) setErrors(prev => ({ ...prev, trainingDataset: 'Failed to upload training file: ' + (err.message || 'Unknown') })) @@ -297,21 +295,26 @@ const FinetuningJobModal = ({ open, onClose, onJobCreated }) => { // Build payload and only include sections that are enabled const jobPayload = { model: formData.baseModel, - // Use uploaded filename/id (if available) - training_file: trainingFileName, - training_file_id: trainingFileId + training_file: trainingFileName } if (generalEnabled) { // If user enabled LoRA, include the object; otherwise send explicit null inside General const gen = { ...formData.general } gen.lora_config = loraEnabled ? formData.lora : null + // Ensure config exists and place hf_token if provided + gen.config = gen.config || {} + if (formData.hf_token) { + gen.config.token = formData.hf_token + } jobPayload.General = gen - // set top-level task for DB compatibility jobPayload.task = gen.task || 'instruction_tuning' } else { - // General disabled: ensure DB task column is set to default jobPayload.task = 'instruction_tuning' + // If HF token was provided while General is disabled, create minimal General with config.token + if (formData.hf_token) { + jobPayload.General = { config: { token: formData.hf_token } } + } } if (datasetEnabled) { @@ -355,6 +358,9 @@ const FinetuningJobModal = ({ open, onClose, onJobCreated }) => { if (generalEnabled) { newJob.general = formData.general + if (formData.hf_token) { + newJob.general = { ...newJob.general, config: { ...(newJob.general.config || {}), token: formData.hf_token } } + } } if (datasetEnabled) { @@ -417,6 +423,8 @@ const FinetuningJobModal = ({ open, onClose, onJobCreated }) => { num_training_workers: 1 } }) + // reset token as well + setFormData(prev => ({ ...prev, hf_token: '' })) setLoraEnabled(false) setFormData(prev => ({ ...prev, lora: { r: 8, lora_alpha: 32, lora_dropout: 0.1, task_type: 'CAUSAL_LM' } })) setErrors({}) @@ -459,25 +467,38 @@ const FinetuningJobModal = ({ open, onClose, onJobCreated }) => { Model Configuration* - - Base Model - - {errors.baseModel && ( - - {errors.baseModel} - + handleInputChange('baseModel', newValue || '')} + onInputChange={(event, newInputValue) => handleInputChange('baseModel', newInputValue)} + renderInput={(params) => ( + )} - + /> + {errors.baseModel && ( + + {errors.baseModel} + + )} + handleInputChange('hf_token', e.target.value)} + fullWidth + size="medium" + sx={{ mt: 2 }} + /> {/* Dataset Configuration */} @@ -612,6 +633,7 @@ const FinetuningJobModal = ({ open, onClose, onJobCreated }) => { size="medium" disabled={!generalEnabled} /> + LoRA Configuration { + if (!logsOpen || !selectedJob) return + + const fetchLogs = async () => { + try { + const response = await finetuningApi.getJobLogs(selectedJob.id) + const body = response.data + // Support two shapes: { logs: string } or raw string + if (body && typeof body === 'object' && 'logs' in body) { + // If the service provided an error message, prefer showing that when logs are empty + const logsStr = body.logs || '' + if (!logsStr && body.error) { + setLogsData(`Error: ${body.error}`) + } else { + setLogsData(normalizeLogs(logsStr)) + } + } else if (typeof body === 'string') { + setLogsData(normalizeLogs(body)) + } else { + setLogsData(JSON.stringify(body, null, 2)) + } + } catch (error) { + console.error('Error auto-refreshing logs:', error) + } + } + + // Initial fetch when dialog opens + fetchLogs() + + // Set up interval for auto-refresh every 5 seconds + const intervalId = setInterval(fetchLogs, 5000) + + return () => clearInterval(intervalId) + }, [logsOpen, selectedJob]) + // When logs dialog opens or logsData changes, scroll to bottom useEffect(() => { if (!logsOpen) return @@ -195,34 +231,17 @@ const FinetuningJobsTable = ({ data, isLoading = false, onRefresh = null, filter // ensure selectedJob is set for downstream operations setSelectedJob(jobToUse) - + + // Clear any existing logs data and show loading setLogsLoading(true) - try { - // call backend API to fetch logs (may return { logs: string }) - const response = await finetuningApi.getJobLogs(jobToUse.id) - const body = response.data - // Support two shapes: { logs: string } or raw string - if (body && typeof body === 'object' && 'logs' in body) { - // If the service provided an error message, prefer showing that when logs are empty - const logsStr = body.logs || '' - if (!logsStr && body.error) { - setLogsData(`Error: ${body.error}`) - } else { - setLogsData(normalizeLogs(logsStr)) - } - } else if (typeof body === 'string') { - setLogsData(normalizeLogs(body)) - } else { - setLogsData(JSON.stringify(body, null, 2)) - } - setLogsOpen(true) - handleMenuClose() - } catch (error) { - console.error('Error fetching logs:', error) - alert('Failed to fetch logs: ' + (error?.message || 'Unknown error')) - } finally { - setLogsLoading(false) - } + + // Open the dialog - the auto-refresh effect will handle fetching logs + setLogsOpen(true) + // Close the menu but keep selectedJob set so auto-refresh can use it + setAnchorEl(null) + + // Stop loading indicator after a brief moment as auto-refresh takes over + setTimeout(() => setLogsLoading(false), 500) } // Normalize logs string: @@ -571,7 +590,16 @@ const FinetuningJobsTable = ({ data, isLoading = false, onRefresh = null, filter {/* Logs Dialog */} - setLogsOpen(false)} maxWidth="lg" fullWidth> + { + setLogsOpen(false) + // clear selected job when dialog closes to avoid stale selection + setSelectedJob(null) + }} + maxWidth="lg" + fullWidth + > Job Logs {logsLoading ? ( @@ -583,7 +611,14 @@ const FinetuningJobsTable = ({ data, isLoading = false, onRefresh = null, filter )} - + From 7e5b1ae0fa887742fa09dcea7c4c84bcd2243403 Mon Sep 17 00:00:00 2001 From: wwanarif Date: Tue, 28 Oct 2025 09:32:04 +0000 Subject: [PATCH 12/23] initial push for ft job outputs downloading Signed-off-by: wwanarif --- .../manifests/studio-manifest.yaml | 31 ++- studio-frontend/docker-compose.dev.yml | 26 ++- .../src/controllers/finetuning/index.ts | 68 ++++--- .../server/src/database/entities/index.ts | 4 +- studio-frontend/packages/server/src/index.ts | 7 +- .../server/src/routes/finetuning/index.ts | 7 +- .../server/src/services/finetuning/index.ts | 148 +++++++++++--- .../src/utils/webSocketDownloadManager.ts | 0 .../server/src/ws/finetuningDownload.ts | 79 ++++++++ .../packages/ui/src/api/finetuning.js | 100 +++++++++- .../src/hooks/useFineTuningDownloadSocket.js | 0 .../ui/src/hooks/useWebSocketDownload.js | 0 .../views/finetuning/FinetuningJobsTable.jsx | 181 ++++++++++++------ studio-frontend/packages/ui/vite.config.js | 3 +- 14 files changed, 515 insertions(+), 139 deletions(-) create mode 100644 studio-frontend/packages/server/src/utils/webSocketDownloadManager.ts create mode 100644 studio-frontend/packages/server/src/ws/finetuningDownload.ts create mode 100644 studio-frontend/packages/ui/src/hooks/useFineTuningDownloadSocket.js create mode 100644 studio-frontend/packages/ui/src/hooks/useWebSocketDownload.js diff --git a/setup-scripts/setup-genai-studio/manifests/studio-manifest.yaml b/setup-scripts/setup-genai-studio/manifests/studio-manifest.yaml index dc39230..7746830 100644 --- a/setup-scripts/setup-genai-studio/manifests/studio-manifest.yaml +++ b/setup-scripts/setup-genai-studio/manifests/studio-manifest.yaml @@ -397,6 +397,18 @@ spec: serviceAccountName: studio-backend-sa --- apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: shared-finetuning-output + namespace: studio +spec: + accessModes: + - ReadWriteMany + resources: + requests: + storage: 1Gi +--- +apiVersion: v1 kind: Service metadata: name: studio-frontend @@ -469,6 +481,8 @@ spec: - name: ssh-key-volume mountPath: /root/.ssh readOnly: true + - name: shared-output-volume + mountPath: /tmp/finetuning/output volumes: - name: tmp emptyDir: {} @@ -479,6 +493,9 @@ spec: - key: studio-id_rsa.pub path: id_rsa.pub mode: 0644 + - name: shared-output-volume + persistentVolumeClaim: + claimName: shared-finetuning-output --- apiVersion: apps/v1 kind: Deployment @@ -654,11 +671,10 @@ spec: labels: app: finetuning-server spec: - # Mirror --ipc=host from docker run - # hostIPC: true containers: - name: finetuning-server - image: opea/finetuning:latest + # image: opea/finetuning:latest + image: ${REGISTRY}/finetuning:${TAG} imagePullPolicy: IfNotPresent ports: - containerPort: 8015 @@ -670,4 +686,11 @@ spec: value: ${HTTP_PROXY} - name: NO_PROXY value: ${NO_PROXY} - resources: {} \ No newline at end of file + resources: {} + volumeMounts: + - name: shared-output-volume + mountPath: /home/user/comps/finetuning/src/output + volumes: + - name: shared-output-volume + persistentVolumeClaim: + claimName: shared-finetuning-output \ No newline at end of file diff --git a/studio-frontend/docker-compose.dev.yml b/studio-frontend/docker-compose.dev.yml index ff473e3..3624dcb 100644 --- a/studio-frontend/docker-compose.dev.yml +++ b/studio-frontend/docker-compose.dev.yml @@ -1,15 +1,27 @@ version: '3.8' services: + finetuning-server: + image: opea/finetuning:latest + container_name: finetuning-server + user: "0:0" + ipc: host + ports: + - "8015:8015" + - "8265:8265" + environment: + - http_proxy=${http_proxy} + - https_proxy=${https_proxy} + - no_proxy=${no_proxy} + volumes: + - finetune-output:/home/user/comps/finetuning/src/output + restart: unless-stopped + studio-frontend: image: studio-frontend:latest build: context: . dockerfile: Dockerfile - args: - - http_proxy=${http_proxy} - - https_proxy=${https_proxy} - - no_proxy=${no_proxy} container_name: studio-frontend-dev ports: - "3000:3000" @@ -19,7 +31,8 @@ services: - node_modules:/usr/src/node_modules - pnpm_store:/usr/src/.pnpm-store - /usr/src/packages/ui/build - command: ["sh", "-c", "cp /usr/src/.env.development /usr/src/packages/ui/.env; cp /usr/src/.env.development /usr/src/packages/server/.env; pnpm install; pnpm dev"] + - finetune-output:/tmp/finetuning/output + command: ["sh", "-c", "pnpm install && pnpm dev"] environment: - http_proxy=${http_proxy} - https_proxy=${https_proxy} @@ -30,4 +43,5 @@ services: volumes: node_modules: - pnpm_store: \ No newline at end of file + pnpm_store: + finetune-output: \ No newline at end of file diff --git a/studio-frontend/packages/server/src/controllers/finetuning/index.ts b/studio-frontend/packages/server/src/controllers/finetuning/index.ts index 798dac7..add0c5a 100644 --- a/studio-frontend/packages/server/src/controllers/finetuning/index.ts +++ b/studio-frontend/packages/server/src/controllers/finetuning/index.ts @@ -117,25 +117,7 @@ const deleteFineTuningJob = async (req: Request, res: Response, next: NextFuncti } } -/** - * List checkpoints of a fine-tuning job - * POST /api/v1/finetuning/jobs/checkpoints - */ -const listFineTuningCheckpoints = async (req: Request, res: Response, next: NextFunction) => { - try { - if (typeof req.body === 'undefined' || !req.body.fine_tuning_job_id) { - throw new InternalFlowiseError( - StatusCodes.BAD_REQUEST, - 'Error: finetuningController.listFineTuningCheckpoints - fine_tuning_job_id not provided!' - ) - } - const apiResponse = await finetuningService.listFineTuningCheckpoints(req.body.fine_tuning_job_id) - return res.json(apiResponse) - } catch (error) { - next(error) - } -} /** * Fetch Ray/job logs for a fine-tuning job @@ -168,24 +150,55 @@ const getFineTuningJobLogs = async (req: Request, res: Response, next: NextFunct } } + /** - * Debug: proxy an arbitrary job payload to the finetuning service and return raw response - * POST /api/v1/finetuning/debug/proxy-job + * Download fine-tuning job output as a zip file + * GET /api/v1/finetuning/download-ft/:jobId */ -const proxyJobDebug = async (req: Request, res: Response, next: NextFunction) => { +const downloadFineTuningOutput = async (req: Request, res: Response, next: NextFunction) => { try { - if (typeof req.body === 'undefined') { - throw new InternalFlowiseError(StatusCodes.BAD_REQUEST, 'Error: finetuningController.proxyJobDebug - body is required') + const { jobId } = req.params + + if (!jobId) { + throw new InternalFlowiseError( + StatusCodes.BAD_REQUEST, + 'Error: finetuningController.downloadFineTuningOutput - jobId is required!' + ) + } + + // Get the zip file path (creates if needed, but returns immediately if already exists) + const filePath = await finetuningService.downloadFineTuningOutput(jobId) + if (!filePath) { + throw new InternalFlowiseError( + StatusCodes.NOT_FOUND, + `Error: finetuningController.downloadFineTuningOutput - output not found for job: ${jobId}` + ) } - const apiResponse = await finetuningService.proxyJobDebug(req.body) - // Return the raw response object from the finetuning service - return res.status(apiResponse.status).send(apiResponse.body) + // Set response headers for file download + const fileName = `${jobId}-output.zip` + res.setHeader('Content-Type', 'application/zip') + res.setHeader('Content-Disposition', `attachment; filename="${fileName}"`) + + // Stream the file + const fs = require('fs') + const fileStream = fs.createReadStream(filePath) + fileStream.on('error', (err: any) => { + console.error('Error streaming fine-tuning output file:', err) + if (!res.headersSent) { + res.status(StatusCodes.INTERNAL_SERVER_ERROR).json({ + error: 'Error streaming fine-tuning output file' + }) + } + }) + fileStream.pipe(res) } catch (error) { next(error) } } + + export default { uploadTrainingFile, createFineTuningJob, @@ -193,7 +206,6 @@ export default { retrieveFineTuningJob, cancelFineTuningJob, deleteFineTuningJob, - listFineTuningCheckpoints, getFineTuningJobLogs, - proxyJobDebug + downloadFineTuningOutput } diff --git a/studio-frontend/packages/server/src/database/entities/index.ts b/studio-frontend/packages/server/src/database/entities/index.ts index caa35dc..ba8e1fa 100644 --- a/studio-frontend/packages/server/src/database/entities/index.ts +++ b/studio-frontend/packages/server/src/database/entities/index.ts @@ -12,7 +12,6 @@ import { UpsertHistory } from './UpsertHistory' import { ApiKey } from './ApiKey' import { CustomTemplate } from './CustomTemplate' import { FineTuningJob } from './FineTuningJob' -import { FineTuningCheckpoint } from './FineTuningCheckpoint' export const entities = { ChatFlow, @@ -28,6 +27,5 @@ export const entities = { UpsertHistory, ApiKey, CustomTemplate, - FineTuningJob, - FineTuningCheckpoint + FineTuningJob } diff --git a/studio-frontend/packages/server/src/index.ts b/studio-frontend/packages/server/src/index.ts index 5b3aa21..b75858f 100644 --- a/studio-frontend/packages/server/src/index.ts +++ b/studio-frontend/packages/server/src/index.ts @@ -22,6 +22,7 @@ import flowiseApiV1Router from './routes' import errorHandlerMiddleware from './middlewares/errors' import { SSEStreamer } from './utils/SSEStreamer' import { validateAPIKey } from './utils/validateKey' +import { setupFineTuningDownloadHandlers } from './ws/finetuningDownload' declare global { namespace Express { @@ -141,7 +142,8 @@ export class App { '/api/v1/leads', '/api/v1/get-upload-file', '/api/v1/ip', - '/api/v1/ping' + '/api/v1/ping', + '/api/v1/finetuning/download-ft/' ] const URL_CASE_INSENSITIVE_REGEX: RegExp = /\/api\/v1\//i const URL_CASE_SENSITIVE_REGEX: RegExp = /\/api\/v1\// @@ -290,6 +292,9 @@ export async function start(): Promise { cors: getCorsOptions() }) + // Setup WebSocket handlers + setupFineTuningDownloadHandlers(io) + await serverApp.initDatabase() await serverApp.config(io) diff --git a/studio-frontend/packages/server/src/routes/finetuning/index.ts b/studio-frontend/packages/server/src/routes/finetuning/index.ts index b45565b..35c6114 100644 --- a/studio-frontend/packages/server/src/routes/finetuning/index.ts +++ b/studio-frontend/packages/server/src/routes/finetuning/index.ts @@ -13,9 +13,6 @@ router.post('/files', upload.single('file'), finetuningController.uploadTraining // Create fine-tuning job router.post('/jobs', finetuningController.createFineTuningJob) -// Debug: proxy an arbitrary job payload to the external finetuning service -router.post('/debug/proxy-job', finetuningController.proxyJobDebug) - // List all fine-tuning jobs router.get('/jobs', finetuningController.listFineTuningJobs) @@ -29,7 +26,7 @@ router.post('/jobs/logs', finetuningController.getFineTuningJobLogs) router.post('/jobs/cancel', finetuningController.cancelFineTuningJob) router.post('/jobs/delete', finetuningController.deleteFineTuningJob) -// List checkpoints of a fine-tuning job -router.post('/jobs/checkpoints', finetuningController.listFineTuningCheckpoints) +// Download fine-tuning job output +router.get('/download-ft/:jobId', finetuningController.downloadFineTuningOutput) export default router diff --git a/studio-frontend/packages/server/src/services/finetuning/index.ts b/studio-frontend/packages/server/src/services/finetuning/index.ts index 2acb1c3..3d1f554 100644 --- a/studio-frontend/packages/server/src/services/finetuning/index.ts +++ b/studio-frontend/packages/server/src/services/finetuning/index.ts @@ -1,12 +1,14 @@ import axios, { AxiosInstance } from 'axios' import http from 'http' import https from 'https' +import * as fs from 'fs' +import * as path from 'path' +import { execSync } from 'child_process' import { StatusCodes } from 'http-status-codes' import { InternalFlowiseError } from '../../errors/internalFlowiseError' import { getErrorMessage } from '../../errors/utils' import { getRunningExpressApp } from '../../utils/getRunningExpressApp' import { FineTuningJob } from '../../database/entities/FineTuningJob' -import { FineTuningCheckpoint } from '../../database/entities/FineTuningCheckpoint' const FINETUNING_SERVICE_URL = process.env.FINETUNING_HOST ? `http://${process.env.FINETUNING_HOST}:8015` : 'undefined' console.debug('finetuningService - FINETUNING_SERVICE_URL', FINETUNING_SERVICE_URL) @@ -29,6 +31,73 @@ const axiosClient: AxiosInstance = axios.create({ // In-memory mapping: filename (raw and decoded) -> { id, rawFilename } const uploadedFileIdMap: Map = new Map() +/** + * Helper function to zip a fine-tuning job output directory + * Checks if zip already exists and is up-to-date before creating a new one + * @param outputDir - Full path to the output directory for the job + * @param jobId - ID of the fine-tuning job + * @returns Path to the zipped file or null if failed + */ +const ensureFineTuningOutputZip = async (outputDir: string, jobId: string): Promise => { + try { + // eslint-disable-next-line no-console + console.debug(`finetuningService.ensureFineTuningOutputZip - processing output for job: ${jobId}`) + + // Validate output directory exists + if (!fs.existsSync(outputDir)) { + // eslint-disable-next-line no-console + console.warn(`finetuningService.ensureFineTuningOutputZip - output directory not found: ${outputDir}`) + return null + } + + const zipFilePath = `${outputDir}.zip` + const outputStats = fs.statSync(outputDir) + + // Check if zip exists and is up-to-date + if (fs.existsSync(zipFilePath)) { + const zipStats = fs.statSync(zipFilePath) + // If zip is newer than the output directory, skip re-zipping + if (zipStats.mtimeMs > outputStats.mtimeMs) { + // eslint-disable-next-line no-console + console.debug(`finetuningService.ensureFineTuningOutputZip - zip already up-to-date: ${zipFilePath}`) + return zipFilePath + } + // Remove outdated zip + try { + fs.unlinkSync(zipFilePath) + // eslint-disable-next-line no-console + console.debug(`finetuningService.ensureFineTuningOutputZip - removed outdated zip: ${zipFilePath}`) + } catch (e) { + // eslint-disable-next-line no-console + console.warn(`finetuningService.ensureFineTuningOutputZip - failed to remove outdated zip: ${e}`) + } + } + + // Create zip file using tar (more efficient than node zip libraries) + // eslint-disable-next-line no-console + console.debug(`finetuningService.ensureFineTuningOutputZip - starting to zip output for job ${jobId}`) + try { + // Use tar to create a compressed archive + const parentDir = path.dirname(outputDir) + const dirName = path.basename(outputDir) + const cmd = `cd "${parentDir}" && tar -czf "${path.basename(zipFilePath)}" "${dirName}"` + execSync(cmd, { stdio: 'pipe', timeout: 300000 }) // 5 minute timeout + + // eslint-disable-next-line no-console + console.debug(`finetuningService.ensureFineTuningOutputZip - zip created successfully for job ${jobId}: ${zipFilePath}`) + return zipFilePath + } catch (execErr: any) { + // eslint-disable-next-line no-console + console.error(`finetuningService.ensureFineTuningOutputZip - tar failed for job ${jobId}: ${execErr?.message || execErr}`) + return null + } + } catch (error: any) { + // eslint-disable-next-line no-console + console.error(`finetuningService.ensureFineTuningOutputZip - error: ${error?.message || error}`) + return null + } +} + /** * Upload a training file to the finetuning service */ @@ -468,10 +537,7 @@ const deleteFineTuningJob = async (fineTuningJobId: string) => { try { const appServer = getRunningExpressApp() const repo = appServer.AppDataSource.getRepository(FineTuningJob) - const checkpointRepo = appServer.AppDataSource.getRepository(FineTuningCheckpoint) - // delete checkpoints first - await checkpointRepo.delete({ fine_tuning_job_id: String(fineTuningJobId) }) // delete job await repo.delete({ id: String(fineTuningJobId) }) } catch (e) { @@ -491,37 +557,64 @@ const deleteFineTuningJob = async (fineTuningJobId: string) => { } /** - * List checkpoints of a fine-tuning job + * Download fine-tuning job output as a zip file + * Creates zip if needed, or returns existing zip immediately + * @param jobId - ID of the fine-tuning job + * @returns Path to the zipped file or null if not found */ -const listFineTuningCheckpoints = async (fineTuningJobId: string) => { +const downloadFineTuningOutput = async (jobId: string): Promise => { try { - const response = await axiosClient.post('/v1/finetune/list_checkpoints', { - fine_tuning_job_id: fineTuningJobId - }) - return response.data + if (!jobId) { + throw new InternalFlowiseError(StatusCodes.BAD_REQUEST, 'Job ID is required') + } + + const OUTPUT_BASE_DIR = '/tmp/finetuning/output' + const jobOutputDir = path.join(OUTPUT_BASE_DIR, jobId) + + // eslint-disable-next-line no-console + console.debug(`finetuningService.downloadFineTuningOutput - checking for output: ${jobOutputDir}`) + + // Verify job output directory exists + if (!fs.existsSync(jobOutputDir)) { + // eslint-disable-next-line no-console + console.warn(`finetuningService.downloadFineTuningOutput - output directory not found: ${jobOutputDir}`) + throw new InternalFlowiseError(StatusCodes.NOT_FOUND, `Fine-tuning job output not found for job: ${jobId}`) + } + + // Security check: ensure path is within the expected directory + const resolvedJobDir = path.resolve(jobOutputDir) + const resolvedBaseDir = path.resolve(OUTPUT_BASE_DIR) + if (!resolvedJobDir.startsWith(resolvedBaseDir)) { + // eslint-disable-next-line no-console + console.error(`finetuningService.downloadFineTuningOutput - path traversal attempt: ${jobOutputDir}`) + throw new InternalFlowiseError(StatusCodes.FORBIDDEN, 'Invalid job output path') + } + + // Ensure the output is zipped (returns immediately if zip is up-to-date) + const finalZipPath = await ensureFineTuningOutputZip(jobOutputDir, jobId) + if (!finalZipPath) { + throw new InternalFlowiseError( + StatusCodes.INTERNAL_SERVER_ERROR, + `Failed to create zip for job ${jobId}` + ) + } + + // eslint-disable-next-line no-console + console.debug(`finetuningService.downloadFineTuningOutput - file ready for download: ${finalZipPath}`) + return finalZipPath } catch (error: any) { + if (error instanceof InternalFlowiseError) { + throw error + } + // eslint-disable-next-line no-console + console.error(`finetuningService.downloadFineTuningOutput - error: ${error?.message || error}`) throw new InternalFlowiseError( StatusCodes.INTERNAL_SERVER_ERROR, - `Error: finetuningService.listFineTuningCheckpoints - ${getErrorMessage(error)}` + `Error: finetuningService.downloadFineTuningOutput - ${getErrorMessage(error)}` ) } } -/** - * Debug helper: forward any payload to the external finetuning job endpoint and return raw status/body - */ -const proxyJobDebug = async (payload: any) => { - try { - const resp = await axiosClient.post('/v1/fine_tuning/jobs', payload) - return { status: resp.status, body: resp.data } - } catch (error: any) { - // Return the status and response data (stringify if needed) - const status = error?.response?.status || 500 - const body = error?.response?.data || (error?.message || 'Unknown error') - return { status, body } - } -} - /** * Get logs for a fine-tuning job by querying the Ray head node HTTP API. * It will call: http:///api/jobs//logs @@ -608,8 +701,7 @@ export default { listFineTuningJobs, retrieveFineTuningJob, cancelFineTuningJob, - listFineTuningCheckpoints, deleteFineTuningJob, getFineTuningJobLogs, - proxyJobDebug + downloadFineTuningOutput } diff --git a/studio-frontend/packages/server/src/utils/webSocketDownloadManager.ts b/studio-frontend/packages/server/src/utils/webSocketDownloadManager.ts new file mode 100644 index 0000000..e69de29 diff --git a/studio-frontend/packages/server/src/ws/finetuningDownload.ts b/studio-frontend/packages/server/src/ws/finetuningDownload.ts new file mode 100644 index 0000000..f07ae53 --- /dev/null +++ b/studio-frontend/packages/server/src/ws/finetuningDownload.ts @@ -0,0 +1,79 @@ +import { Server, Socket } from 'socket.io' +import finetuningService from '../services/finetuning' +import logger from '../utils/logger' + +/** + * Setup WebSocket handlers for fine-tuning output downloads + * This allows non-blocking, asynchronous zip creation and download + */ +export const setupFineTuningDownloadHandlers = (io: Server) => { + logger.info('[WS] Setting up fine-tuning download handlers') + + io.on('connection', (socket: Socket) => { + logger.info(`[WS] Client connected - Socket ID: ${socket.id}`) + + /** + * Handle fine-tuning output download request + * Client sends: { jobId: string } + * Server emits progress updates and final download URL + */ + socket.on('download-finetuning-output', async (data: { jobId: string }) => { + try { + const { jobId } = data + + if (!jobId) { + socket.emit('download-finetuning-error', { + jobId: null, + error: 'Job ID is required' + }) + return + } + + logger.info(`[WS] Starting download preparation for job: ${jobId}`) + + // Emit starting status + socket.emit('download-finetuning-progress', { + jobId, + status: 'starting', + message: 'Preparing download...' + }) + + // Call the service to prepare the zip file + // This may take time, so we do it asynchronously + const zipFilePath = await finetuningService.downloadFineTuningOutput(jobId) + + if (!zipFilePath) { + socket.emit('download-finetuning-error', { + jobId, + error: 'Failed to create output archive' + }) + return + } + + logger.info(`[WS] Download ready for job: ${jobId}`) + + // Emit completion with download URL + socket.emit('download-finetuning-complete', { + jobId, + downloadUrl: `/api/v1/finetuning/download-ft/${jobId}`, + fileName: `${jobId}-output.zip` + }) + + } catch (error: any) { + const errorMessage = error?.message || String(error) || 'Unknown error' + logger.error(`[WS] Error preparing download: ${errorMessage}`) + + socket.emit('download-finetuning-error', { + jobId: data?.jobId || null, + error: errorMessage + }) + } + }) + + socket.on('disconnect', (reason) => { + logger.info(`[WS] Client disconnected - Socket ID: ${socket.id}, Reason: ${reason}`) + }) + + logger.debug(`[WS] Fine-tuning download handlers attached to socket ${socket.id}`) + }) +} diff --git a/studio-frontend/packages/ui/src/api/finetuning.js b/studio-frontend/packages/ui/src/api/finetuning.js index c5636a1..1bae6fe 100644 --- a/studio-frontend/packages/ui/src/api/finetuning.js +++ b/studio-frontend/packages/ui/src/api/finetuning.js @@ -1,4 +1,12 @@ import client from './client' +import { io } from 'socket.io-client' +import { baseURL as apiBaseURL } from '@/store/constant' + +// Get the base URL for WebSocket connection +const getSocketUrl = () => { + // Use the base URL from constants (without /api/v1) + return apiBaseURL || window.location.origin +} const finetuningApi = { // Upload training file @@ -55,13 +63,6 @@ const finetuningApi = { }) }, - // List checkpoints of a fine-tuning job - listCheckpoints: (fineTuningJobId) => { - return client.post('/finetuning/jobs/checkpoints', { - fine_tuning_job_id: fineTuningJobId - }) - }, - // Get logs for a fine-tuning job getJobLogs: (fineTuningJobId, opts = {}) => { return client.post('/finetuning/jobs/logs', { @@ -70,6 +71,91 @@ const finetuningApi = { }) }, + // Download fine-tuning job output as a zip file + // This returns a blob that can be saved as a file + // Accepts optional `onDownloadProgress` callback (progress event) and `signal` (AbortSignal) + downloadFinetuningOutput: (jobId, onDownloadProgress = undefined, signal = undefined) => { + const cfg = { + responseType: 'blob', + // allow long-running / large downloads + timeout: 0, + maxContentLength: Infinity, + maxBodyLength: Infinity + } + if (typeof onDownloadProgress === 'function') cfg.onDownloadProgress = onDownloadProgress + if (signal) cfg.signal = signal + return client.get(`/finetuning/download-ft/${encodeURIComponent(jobId)}`, cfg) + }, + + /** + * Download fine-tuning output using WebSocket for async zip preparation + * @param {string} jobId - The fine-tuning job ID + * @param {Object} callbacks - Callback functions { onProgress, onComplete, onError } + * @returns {Function} Cleanup function to disconnect socket + */ + downloadFinetuningOutputWS: (jobId, callbacks = {}) => { + const { onProgress, onComplete, onError } = callbacks + + // Get socket URL + const socketUrl = getSocketUrl() + console.log('[WS] Connecting to:', socketUrl) + + // Create socket connection + const socket = io(socketUrl, { + transports: ['websocket', 'polling'], + reconnection: false, + timeout: 10000 + }) + + // Handle connection + socket.on('connect', () => { + console.log('[WS] Connected to server for download, socket ID:', socket.id) + // Request download preparation + socket.emit('download-finetuning-output', { jobId }) + }) + + // Handle progress updates + socket.on('download-finetuning-progress', (data) => { + console.log('[WS] Download progress:', data) + if (onProgress) onProgress(data) + }) + + // Handle completion + socket.on('download-finetuning-complete', (data) => { + console.log('[WS] Download ready:', data) + if (onComplete) onComplete(data) + // Disconnect after completion + socket.disconnect() + }) + + // Handle errors + socket.on('download-finetuning-error', (data) => { + console.error('[WS] Download error:', data) + if (onError) onError(data) + socket.disconnect() + }) + + // Handle connection errors + socket.on('connect_error', (error) => { + console.error('[WS] Connection error:', error.message, error) + if (onError) onError({ error: `WebSocket connection failed: ${error.message}` }) + socket.disconnect() + }) + + // Handle disconnect + socket.on('disconnect', (reason) => { + console.log('[WS] Disconnected:', reason) + }) + + // Return cleanup function + return () => { + if (socket.connected) { + console.log('[WS] Manually disconnecting socket') + socket.disconnect() + } + } + }, + // Legacy compatibility methods deleteJob: (jobId) => { // Call the backend delete endpoint which will cancel remote job (best-effort) and remove local DB records diff --git a/studio-frontend/packages/ui/src/hooks/useFineTuningDownloadSocket.js b/studio-frontend/packages/ui/src/hooks/useFineTuningDownloadSocket.js new file mode 100644 index 0000000..e69de29 diff --git a/studio-frontend/packages/ui/src/hooks/useWebSocketDownload.js b/studio-frontend/packages/ui/src/hooks/useWebSocketDownload.js new file mode 100644 index 0000000..e69de29 diff --git a/studio-frontend/packages/ui/src/views/finetuning/FinetuningJobsTable.jsx b/studio-frontend/packages/ui/src/views/finetuning/FinetuningJobsTable.jsx index dbf5538..eae60d8 100644 --- a/studio-frontend/packages/ui/src/views/finetuning/FinetuningJobsTable.jsx +++ b/studio-frontend/packages/ui/src/views/finetuning/FinetuningJobsTable.jsx @@ -19,21 +19,20 @@ import { TableSortLabel, Typography, IconButton, + Tooltip, Menu, MenuItem, Dialog, DialogTitle, DialogContent, - DialogActions, - List, - ListItem, - ListItemText + DialogActions } from '@mui/material' import { useTheme, styled } from '@mui/material/styles' import { tableCellClasses } from '@mui/material/TableCell' +import { CircularProgress } from '@mui/material' // icons -import { IconDots, IconEye, IconTrash, IconDownload, IconPlayerStop, IconCheckbox } from '@tabler/icons-react' +import { IconDots, IconEye, IconTrash, IconDownload, IconPlayerStop } from '@tabler/icons-react' // API import finetuningApi from '@/api/finetuning' @@ -116,10 +115,12 @@ const FinetuningJobsTable = ({ data, isLoading = false, onRefresh = null, filter const [anchorEl, setAnchorEl] = useState(null) const [selectedJob, setSelectedJob] = useState(null) const [actionLoading, setActionLoading] = useState(false) + // Track multiple concurrent downloads: { [jobId]: { progress: number } } + const [downloadingJobs, setDownloadingJobs] = useState({}) + const [downloadDialogOpen, setDownloadDialogOpen] = useState(false) + const [downloadProgress, setDownloadProgress] = useState(0) const [detailsOpen, setDetailsOpen] = useState(false) const [detailsData, setDetailsData] = useState(null) - const [checkpointsOpen, setCheckpointsOpen] = useState(false) - const [checkpointsData, setCheckpointsData] = useState(null) const [logsOpen, setLogsOpen] = useState(false) const [logsData, setLogsData] = useState('') const [logsLoading, setLogsLoading] = useState(false) @@ -204,25 +205,72 @@ const FinetuningJobsTable = ({ data, isLoading = false, onRefresh = null, filter } } - const handleViewCheckpoints = async (jobArg = null) => { - const jobToUse = jobArg || selectedJob - if (!jobToUse) return + const handleDownloadFinetuningOutput = async (job) => { + if (!job) { + alert('Job is required') + return + } - // ensure selectedJob is set for downstream operations - setSelectedJob(jobToUse) + const id = String(job.id) + setDownloadProgress(0) + // mark this job as preparing; show dialog (user can close dialog without cancelling) + setDownloadingJobs((prev) => ({ ...(prev || {}), [id]: { progress: 0 } })) + setDownloadDialogOpen(true) + + // Use WebSocket-based download for non-blocking zip creation + const cleanup = finetuningApi.downloadFinetuningOutputWS(job.id, { + onProgress: (data) => { + console.log('Download progress:', data) + // Update UI to show preparation is in progress + setDownloadingJobs((prev) => ({ + ...(prev || {}), + [id]: { progress: 0, status: data.status, message: data.message } + })) + }, + onComplete: async (data) => { + console.log('Download complete:', data) + + // File is ready - trigger native browser download + // No authentication needed (endpoint is whitelisted) + const downloadUrl = data.downloadUrl || `/api/v1/finetuning/download-ft/${job.id}` + const fileName = data.fileName || `${job.id}-output.zip` + + console.log('Starting native browser download:', downloadUrl) + + // Use window.location.href to trigger native browser download + // Browser will show download in download manager with progress bar + window.location.href = downloadUrl + + // Mark this job finished and close dialog + setDownloadingJobs((prev) => ({ ...(prev || {}), [id]: { progress: 100 } })) + setDownloadProgress(100) + setTimeout(() => { + setDownloadingJobs((prev) => { + const copy = { ...(prev || {}) } + delete copy[id] + return copy + }) + setDownloadDialogOpen(false) + }, 800) + }, + onError: (data) => { + console.error('Download preparation error:', data) + alert('Failed to prepare download: ' + (data.error || 'Unknown error')) + // Clear downloading state + setDownloadingJobs((prev) => { + const copy = { ...(prev || {}) } + delete copy[id] + return copy + }) + setDownloadProgress(0) + setActionLoading(false) + setDownloadDialogOpen(false) + } + }) - setActionLoading(true) - try { - const response = await finetuningApi.listCheckpoints(jobToUse.id) - setCheckpointsData(response.data) - setCheckpointsOpen(true) - handleMenuClose() - } catch (error) { - console.error('Error fetching checkpoints:', error) - alert('Failed to fetch checkpoints: ' + (error.message || 'Unknown error')) - } finally { - setActionLoading(false) - } + // Store cleanup function to allow cancellation if needed + // (optional enhancement: you could add a cancel button to call this) + window._ftDownloadCleanup = cleanup } const handleViewLogs = async (jobArg = null) => { @@ -350,6 +398,13 @@ const FinetuningJobsTable = ({ data, isLoading = false, onRefresh = null, filter return progress || 0 } + // Only allow downloads when job status indicates completion/success + const isDownloadableStatus = (status) => { + if (!status) return false + const s = String(status).toLowerCase() + return s === 'succeeded' + } + if (isLoading) { return ( @@ -410,7 +465,7 @@ const FinetuningJobsTable = ({ data, isLoading = false, onRefresh = null, filter Model Task Dataset - Checkpoints + Output Logs Actions @@ -427,7 +482,7 @@ const FinetuningJobsTable = ({ data, isLoading = false, onRefresh = null, filter {visibleData.length === 0 ? ( - + No fine-tuning jobs match the current filter @@ -478,17 +533,38 @@ const FinetuningJobsTable = ({ data, isLoading = false, onRefresh = null, filter {job.dataset || 'N/A'} - - + + {(() => { + const jid = String(job.id) + const isPreparing = Boolean(downloadingJobs && downloadingJobs[jid]) + return ( + + + handleDownloadFinetuningOutput(job)} + disabled={ + actionLoading || + isPreparing || + !isDownloadableStatus(job.status) + } + title={isPreparing ? 'Preparing download' : 'Download fine-tuning output'} + > + {isPreparing ? ( + + ) : ( + + )} + + + + ) + })()} - handleViewLogs(job)} title="Logs"> + handleViewLogs(job)} title="View Logs"> @@ -522,7 +598,6 @@ const FinetuningJobsTable = ({ data, isLoading = false, onRefresh = null, filter View Details - {/* View Checkpoints removed from Actions menu: use the Checkpoints column button to open the modal */}
+ {/* Preparing Download Dialog */} + setDownloadDialogOpen(false)} maxWidth="xs" fullWidth> + Preparing download + + + The server is preparing the job output for download. This may take a few moments for large outputs. + {downloadProgress > 0 ? ` (${downloadProgress}%)` : ''} + + 0 ? 'determinate' : 'indeterminate'} value={downloadProgress} /> + + + + + + {/* Details Dialog */} setDetailsOpen(false)} maxWidth="md" fullWidth> Job Details @@ -568,27 +658,6 @@ const FinetuningJobsTable = ({ data, isLoading = false, onRefresh = null, filter - {/* Checkpoints Dialog */} - setCheckpointsOpen(false)} maxWidth="md" fullWidth> - Checkpoints - - {checkpointsData && Array.isArray(checkpointsData) && checkpointsData.length > 0 ? ( - - {checkpointsData.map((cp) => ( - - - - ))} - - ) : ( - No checkpoints available - )} - - - - - - {/* Logs Dialog */} { }, '/socket.io': { target: `http://${serverHost}:${serverPort}`, - changeOrigin: true + changeOrigin: true, + ws: true } } } From b0402a386eb91daeee2dc60bfee85ab6316ed456 Mon Sep 17 00:00:00 2001 From: wwanarif Date: Wed, 29 Oct 2025 09:38:51 +0000 Subject: [PATCH 13/23] enable ws connections for ft status and output downloading Signed-off-by: wwanarif --- studio-frontend/packages/server/src/index.ts | 2 + .../server/src/services/finetuning/index.ts | 93 +++++-- .../server/src/ws/finetuningDownload.ts | 165 ++++++++++--- .../server/src/ws/finetuningStatus.ts | 226 ++++++++++++++++++ .../packages/ui/src/api/finetuning.js | 173 +++++++++++--- .../src/ui-component/table/FlowListTable.jsx | 10 +- .../views/finetuning/FinetuningJobsTable.jsx | 92 ++++++- .../ui/src/views/finetuning/index.jsx | 151 +++++------- studio-frontend/packages/ui/vite.config.js | 22 +- 9 files changed, 721 insertions(+), 213 deletions(-) create mode 100644 studio-frontend/packages/server/src/ws/finetuningStatus.ts diff --git a/studio-frontend/packages/server/src/index.ts b/studio-frontend/packages/server/src/index.ts index b75858f..06b236d 100644 --- a/studio-frontend/packages/server/src/index.ts +++ b/studio-frontend/packages/server/src/index.ts @@ -23,6 +23,7 @@ import errorHandlerMiddleware from './middlewares/errors' import { SSEStreamer } from './utils/SSEStreamer' import { validateAPIKey } from './utils/validateKey' import { setupFineTuningDownloadHandlers } from './ws/finetuningDownload' +import { setupFineTuningStatusHandlers } from './ws/finetuningStatus' declare global { namespace Express { @@ -294,6 +295,7 @@ export async function start(): Promise { // Setup WebSocket handlers setupFineTuningDownloadHandlers(io) + setupFineTuningStatusHandlers(io) await serverApp.initDatabase() await serverApp.config(io) diff --git a/studio-frontend/packages/server/src/services/finetuning/index.ts b/studio-frontend/packages/server/src/services/finetuning/index.ts index 3d1f554..0fed1c8 100644 --- a/studio-frontend/packages/server/src/services/finetuning/index.ts +++ b/studio-frontend/packages/server/src/services/finetuning/index.ts @@ -3,12 +3,16 @@ import http from 'http' import https from 'https' import * as fs from 'fs' import * as path from 'path' -import { execSync } from 'child_process' +import { exec } from 'child_process' +import { promisify } from 'util' import { StatusCodes } from 'http-status-codes' import { InternalFlowiseError } from '../../errors/internalFlowiseError' import { getErrorMessage } from '../../errors/utils' import { getRunningExpressApp } from '../../utils/getRunningExpressApp' import { FineTuningJob } from '../../database/entities/FineTuningJob' +import logger from '../../utils/logger' + +const execAsync = promisify(exec) const FINETUNING_SERVICE_URL = process.env.FINETUNING_HOST ? `http://${process.env.FINETUNING_HOST}:8015` : 'undefined' console.debug('finetuningService - FINETUNING_SERVICE_URL', FINETUNING_SERVICE_URL) @@ -77,11 +81,13 @@ const ensureFineTuningOutputZip = async (outputDir: string, jobId: string): Prom // eslint-disable-next-line no-console console.debug(`finetuningService.ensureFineTuningOutputZip - starting to zip output for job ${jobId}`) try { - // Use tar to create a compressed archive const parentDir = path.dirname(outputDir) const dirName = path.basename(outputDir) const cmd = `cd "${parentDir}" && tar -czf "${path.basename(zipFilePath)}" "${dirName}"` - execSync(cmd, { stdio: 'pipe', timeout: 300000 }) // 5 minute timeout + await execAsync(cmd, { + maxBuffer: 1024 * 1024 * 100, // 100MB buffer for large outputs + timeout: 600000 // 10 minute timeout + }) // eslint-disable-next-line no-console console.debug(`finetuningService.ensureFineTuningOutputZip - zip created successfully for job ${jobId}: ${zipFilePath}`) @@ -150,7 +156,6 @@ const persistJobToDb = async (jobData: any) => { if (!id) return // Build entity object mapping common fields; fall back to stringifying objects - // Extract task robustly: prefer explicit jobData.task, then jobData.General.task (object or JSON string) let taskVal: any = jobData.task || undefined try { if (!taskVal && jobData.General) { @@ -222,7 +227,7 @@ const persistJobToDb = async (jobData: any) => { } } -// Helper: update specific fields for a job in the DB (best-effort) +// Helper: update specific fields for a job in the DB const updateJobInDb = async (jobId: string, updates: Partial) => { try { if (!jobId) return @@ -266,8 +271,7 @@ const createFineTuningJob = async (jobConfig: { } }) => { try { - // Work with the jobConfig as-provided by the UI. Do not decode training_file automatically; - // the external service may expect the raw (possibly URL-encoded) filename. + // Work with the jobConfig as-provided by the UI. const forwardedJobConfig = { ...jobConfig } // (Removed verbose initial jobConfig logging to reduce noise) @@ -348,7 +352,7 @@ const createFineTuningJob = async (jobConfig: { // ignore } - // Persist to local DB (best-effort) + // Persist to local DB try { await persistJobToDb(respData) } catch (e) { @@ -429,15 +433,11 @@ const retrieveFineTuningJob = async (fineTuningJobId: string) => { for (let attempt = 1; attempt <= maxAttempts; attempt++) { try { - // Log attempt for easier correlation in logs - // eslint-disable-next-line no-console - console.debug(`finetuningService.retrieveFineTuningJob - attempt ${attempt} for job ${fineTuningJobId}`) - const response = await axiosClient.post('/v1/fine_tuning/jobs/retrieve', { fine_tuning_job_id: fineTuningJobId }) const respData = response.data - // Persist/update DB with latest status (best-effort) + // Persist/update DB with latest status try { await persistJobToDb(respData) } catch (e) { @@ -446,9 +446,6 @@ const retrieveFineTuningJob = async (fineTuningJobId: string) => { return respData } catch (error: any) { const msg = getErrorMessage(error) - // eslint-disable-next-line no-console - console.warn(`finetuningService.retrieveFineTuningJob - attempt ${attempt} failed: ${msg}`) - const isTransient = msg && ( msg.toLowerCase().includes('socket hang up') || msg.toLowerCase().includes('econnreset') || @@ -459,17 +456,20 @@ const retrieveFineTuningJob = async (fineTuningJobId: string) => { if (attempt < maxAttempts && isTransient) { const delay = baseDelayMs * Math.pow(2, attempt - 1) - // eslint-disable-next-line no-console - console.debug(`finetuningService.retrieveFineTuningJob - retrying in ${delay}ms`) + // back off and retry silently // eslint-disable-next-line no-await-in-loop await sleep(delay) continue } + // Only log a concise warning when this is the final attempt + if (attempt === maxAttempts) { + logger.warn(`finetuningService.retrieveFineTuningJob - final attempt ${attempt} failed for job ${fineTuningJobId}: ${msg}`) + } + // Final failure: log details and throw try { - // eslint-disable-next-line no-console - console.error('finetuningService.retrieveFineTuningJob - error details:', { + logger.error('finetuningService.retrieveFineTuningJob - error details:', { message: error?.message, status: error?.response?.status, responseData: error?.response?.data @@ -499,7 +499,7 @@ const cancelFineTuningJob = async (fineTuningJobId: string) => { const response = await axiosClient.post('/v1/fine_tuning/jobs/cancel', { fine_tuning_job_id: fineTuningJobId }) - // Best-effort: update local DB to reflect cancelled status + // Update local DB to reflect cancelled status try { await updateJobInDb(fineTuningJobId, { status: 'cancelled', finishedDate: new Date() }) } catch (e) { @@ -516,11 +516,11 @@ const cancelFineTuningJob = async (fineTuningJobId: string) => { /** * Delete a fine-tuning job locally and attempt to cancel it remotely. - * This will cancel the external job (best-effort) and remove DB records for the job and checkpoints. + * This will cancel the external job and remove DB records for the job and checkpoints. */ const deleteFineTuningJob = async (fineTuningJobId: string) => { try { - // Attempt to cancel external job (best-effort) + // Attempt to cancel external job try { await axiosClient.post('/v1/fine_tuning/jobs/cancel', { fine_tuning_job_id: fineTuningJobId @@ -533,7 +533,7 @@ const deleteFineTuningJob = async (fineTuningJobId: string) => { } catch (logErr) {} } - // Remove local DB records (best-effort) + // Remove local DB records try { const appServer = getRunningExpressApp() const repo = appServer.AppDataSource.getRepository(FineTuningJob) @@ -547,6 +547,51 @@ const deleteFineTuningJob = async (fineTuningJobId: string) => { } catch (logErr) {} } + // Attempt to remove any output files/directories for this job under /tmp/finetuning/output + try { + const OUTPUT_BASE_DIR = '/tmp/finetuning/output' + const jobOutputDir = path.join(OUTPUT_BASE_DIR, String(fineTuningJobId)) + const resolvedJobDir = path.resolve(jobOutputDir) + const resolvedBaseDir = path.resolve(OUTPUT_BASE_DIR) + + // Safety: ensure the resolved path is within the expected base directory + if (resolvedJobDir.startsWith(resolvedBaseDir)) { + // Remove directory recursively if it exists + if (fs.existsSync(resolvedJobDir)) { + try { + // Use fs.rmSync when available; fallback to recursive unlink if necessary + if (typeof fs.rmSync === 'function') { + fs.rmSync(resolvedJobDir, { recursive: true, force: true }) + } else { + // older Node versions: remove files inside then rmdir + const rimraf = require('rimraf') + rimraf.sync(resolvedJobDir) + } + // eslint-disable-next-line no-console + console.debug(`finetuningService.deleteFineTuningJob - removed output dir: ${resolvedJobDir}`) + } catch (rmErr) { + try { console.warn('finetuningService.deleteFineTuningJob - failed to remove output dir', rmErr) } catch (ignore) {} + } + } + + // Also remove zip file if present + const zipPath = `${resolvedJobDir}.zip` + if (fs.existsSync(zipPath)) { + try { + fs.unlinkSync(zipPath) + // eslint-disable-next-line no-console + console.debug(`finetuningService.deleteFineTuningJob - removed zip: ${zipPath}`) + } catch (zipErr) { + try { console.warn('finetuningService.deleteFineTuningJob - failed to remove zip file', zipErr) } catch (ignore) {} + } + } + } else { + try { console.warn('finetuningService.deleteFineTuningJob - output path outside base dir, skipping removal:', resolvedJobDir) } catch (ignore) {} + } + } catch (e) { + try { console.warn('finetuningService.deleteFineTuningJob - error while removing output files', e) } catch (ignore) {} + } + return { success: true } } catch (error: any) { throw new InternalFlowiseError( diff --git a/studio-frontend/packages/server/src/ws/finetuningDownload.ts b/studio-frontend/packages/server/src/ws/finetuningDownload.ts index f07ae53..484d2e8 100644 --- a/studio-frontend/packages/server/src/ws/finetuningDownload.ts +++ b/studio-frontend/packages/server/src/ws/finetuningDownload.ts @@ -2,24 +2,66 @@ import { Server, Socket } from 'socket.io' import finetuningService from '../services/finetuning' import logger from '../utils/logger' +// Declare timer globals so this file compiles regardless of lib settings +declare function setTimeout(cb: (...args: any[]) => void, ms?: number): any +declare function clearTimeout(id: any): void + /** * Setup WebSocket handlers for fine-tuning output downloads * This allows non-blocking, asynchronous zip creation and download */ export const setupFineTuningDownloadHandlers = (io: Server) => { - logger.info('[WS] Setting up fine-tuning download handlers') - - io.on('connection', (socket: Socket) => { - logger.info(`[WS] Client connected - Socket ID: ${socket.id}`) - - /** - * Handle fine-tuning output download request - * Client sends: { jobId: string } - * Server emits progress updates and final download URL - */ + + logger.info('[WS Download] Setting up fine-tuning download namespace: /finetuning-download') + + // Create a dedicated namespace so download sockets don't mix with other WS handlers + const nsp = io.of('/finetuning-download') + + /** + * In-memory tracking of ongoing download tasks so multiple sockets can + * subscribe to the same job and reconnect (page refresh) without losing state. + * + * Map, + * downloadUrl?: string, + * fileName?: string, + * error?: string, + * timeoutHandle?: any + * }> + */ + const downloadTasks = new Map() + + // Grace period to keep completed task info for late reconnects (ms) + const COMPLETED_TASK_RETENTION_MS = 60 * 1000 // 60s + + nsp.on('connection', (socket: Socket) => { + logger.info(`[WS Download] Client connected - Socket ID: ${socket.id}`) + + const attachSubscriber = (jobId: string) => { + let task = downloadTasks.get(jobId) + if (!task) { + task = { + status: 'starting', + subscribers: new Set(), + downloadUrl: null, + fileName: null, + error: null, + timeoutHandle: null + } + downloadTasks.set(jobId, task) + } + + task.subscribers.add(socket) + return task + } + + // Handle fine-tuning output download request + // Client sends: { jobId: string } socket.on('download-finetuning-output', async (data: { jobId: string }) => { try { const { jobId } = data + logger.info(`[WS Download] Download requested - Socket ID: ${socket.id}, Job ID: ${jobId}`) if (!jobId) { socket.emit('download-finetuning-error', { @@ -29,40 +71,85 @@ export const setupFineTuningDownloadHandlers = (io: Server) => { return } - logger.info(`[WS] Starting download preparation for job: ${jobId}`) - - // Emit starting status - socket.emit('download-finetuning-progress', { - jobId, - status: 'starting', - message: 'Preparing download...' - }) - - // Call the service to prepare the zip file - // This may take time, so we do it asynchronously - const zipFilePath = await finetuningService.downloadFineTuningOutput(jobId) + // Attach this socket as a subscriber for this job + const task = attachSubscriber(jobId) - if (!zipFilePath) { - socket.emit('download-finetuning-error', { + // If task already completed, reply immediately with complete event + if (task.status === 'complete') { + socket.emit('download-finetuning-complete', { jobId, - error: 'Failed to create output archive' + downloadUrl: task.downloadUrl, + fileName: task.fileName }) return } - logger.info(`[WS] Download ready for job: ${jobId}`) - - // Emit completion with download URL - socket.emit('download-finetuning-complete', { + // Emit current progress state to the newly connected socket + socket.emit('download-finetuning-progress', { jobId, - downloadUrl: `/api/v1/finetuning/download-ft/${jobId}`, - fileName: `${jobId}-output.zip` + status: task.status, + message: task.status === 'starting' ? 'Preparing download...' : 'Creating zip archive (this may take a few minutes)' }) + // If task is already zipping or starting and has a running promise, do nothing else + if (task.promise) { + // existing background work will notify subscribers when done + return + } + + // Kick off the async preparation and store the promise so others can join + task.status = 'zipping' + task.promise = (async () => { + try { + // Call the service to prepare the zip file (returns path) + const zipFilePath = await finetuningService.downloadFineTuningOutput(jobId) + + if (!zipFilePath) { + task.status = 'error' + task.error = 'Failed to create output archive' + // Notify all subscribers + task.subscribers.forEach((s: Socket) => { + s.emit('download-finetuning-error', { jobId, error: task.error }) + }) + return + } + + task.status = 'complete' + task.downloadUrl = `/api/v1/finetuning/download-ft/${jobId}` + task.fileName = `${jobId}-output.zip` + + logger.info(`[WS Download] Download ready for job: ${jobId}`) + + // Emit completion to all current subscribers + task.subscribers.forEach((s: Socket) => { + s.emit('download-finetuning-complete', { + jobId, + downloadUrl: task.downloadUrl, + fileName: task.fileName + }) + }) + + // Schedule cleanup of the completed task after retention period + task.timeoutHandle = setTimeout(() => { + downloadTasks.delete(jobId) + }, COMPLETED_TASK_RETENTION_MS) + + } catch (error: any) { + task.status = 'error' + task.error = error?.message || String(error) + logger.error(`[WS Download] Error preparing download for job ${jobId}: ${task.error}`) + task.subscribers.forEach((s: Socket) => { + s.emit('download-finetuning-error', { jobId, error: task.error }) + }) + // cleanup soon + task.timeoutHandle = setTimeout(() => { + downloadTasks.delete(jobId) + }, 5000) + } + })() } catch (error: any) { const errorMessage = error?.message || String(error) || 'Unknown error' - logger.error(`[WS] Error preparing download: ${errorMessage}`) - + logger.error(`[WS Download] Handler error: ${errorMessage}`) socket.emit('download-finetuning-error', { jobId: data?.jobId || null, error: errorMessage @@ -70,10 +157,16 @@ export const setupFineTuningDownloadHandlers = (io: Server) => { } }) - socket.on('disconnect', (reason) => { - logger.info(`[WS] Client disconnected - Socket ID: ${socket.id}, Reason: ${reason}`) + socket.on('disconnect', (reason: any) => { + logger.info(`[WS Download] Client disconnected - Socket ID: ${socket.id}, Reason: ${reason}`) + // Remove this socket from all task subscriber lists + downloadTasks.forEach((task, jobId) => { + if (task.subscribers && task.subscribers.has(socket)) { + task.subscribers.delete(socket) + } + }) }) - logger.debug(`[WS] Fine-tuning download handlers attached to socket ${socket.id}`) + logger.debug(`[WS Download] Fine-tuning download handlers attached to socket ${socket.id}`) }) } diff --git a/studio-frontend/packages/server/src/ws/finetuningStatus.ts b/studio-frontend/packages/server/src/ws/finetuningStatus.ts new file mode 100644 index 0000000..3a78788 --- /dev/null +++ b/studio-frontend/packages/server/src/ws/finetuningStatus.ts @@ -0,0 +1,226 @@ +import { Server, Socket } from 'socket.io' +import finetuningService from '../services/finetuning' +import logger from '../utils/logger' + +// Declare timer globals so this file compiles regardless of lib settings +declare function setInterval(cb: (...args: any[]) => void, ms?: number): any +declare function clearInterval(id: any): void +declare function setTimeout(cb: (...args: any[]) => void, ms?: number): any + +// Store active job subscriptions: jobId -> Set of socket IDs +const jobSubscriptions = new Map>() + +// Background monitoring state +let monitoringInterval: any | null = null +const POLLING_INTERVAL = 5000 // 5 seconds - backend polls Ray API + +/** + * Setup WebSocket handlers for fine-tuning job status monitoring + * Clients can subscribe to specific job updates and receive real-time status changes + */ +export const setupFineTuningStatusHandlers = (io: Server) => { + io.on('connection', (socket: Socket) => { + logger.info(`[WS Status] Client connected - Socket ID: ${socket.id}`) + + /** + * Subscribe to job status updates + * Client sends: { jobIds: string[] } + * Server will emit 'job-status-update' events for these jobs + */ + socket.on('subscribe-job-status', (data: { jobIds: string[] }) => { + try { + const { jobIds } = data + + if (!Array.isArray(jobIds) || jobIds.length === 0) { + return + } + + // Add this socket to each job's subscription set + jobIds.forEach(jobId => { + if (!jobSubscriptions.has(jobId)) { + jobSubscriptions.set(jobId, new Set()) + } + jobSubscriptions.get(jobId)!.add(socket.id) + }) + + // Start background monitoring if not already running + startBackgroundMonitoring(io) + + // Send immediate acknowledgment + socket.emit('subscription-confirmed', { + jobIds, + message: 'Subscribed to job updates' + }) + + } catch (error: any) { + socket.emit('subscription-error', { + error: error?.message || 'Failed to subscribe' + }) + } + }) + + /** + * Unsubscribe from job status updates + * Client sends: { jobIds: string[] } + */ + socket.on('unsubscribe-job-status', (data: { jobIds: string[] }) => { + try { + const { jobIds } = data + + if (!Array.isArray(jobIds)) return + + jobIds.forEach(jobId => { + const subscribers = jobSubscriptions.get(jobId) + if (subscribers) { + subscribers.delete(socket.id) + if (subscribers.size === 0) { + jobSubscriptions.delete(jobId) + } + } + }) + + // Stop monitoring if no more subscriptions + if (jobSubscriptions.size === 0) { + stopBackgroundMonitoring() + } + + } catch (error: any) { + // Silent error handling + } + }) + + /** + * Handle client disconnect - clean up subscriptions + */ + socket.on('disconnect', (reason: any) => { + logger.info(`[WS Status] Client disconnected - Socket ID: ${socket.id}`) + + // Remove this socket from all job subscriptions + let removedCount = 0 + jobSubscriptions.forEach((subscribers, jobId) => { + if (subscribers.has(socket.id)) { + subscribers.delete(socket.id) + removedCount++ + if (subscribers.size === 0) { + jobSubscriptions.delete(jobId) + } + } + }) + + // Stop monitoring if no more subscriptions + if (jobSubscriptions.size === 0) { + stopBackgroundMonitoring() + } + }) + }) +} + +/** + * Start background monitoring of subscribed jobs + * Polls the fine-tuning service and emits updates via WebSocket + */ +function startBackgroundMonitoring(io: Server) { + // Already running + if (monitoringInterval) return + + // Poll immediately, then at regular intervals + checkJobStatuses(io) + + // Use global.setInterval to satisfy TypeScript without depending on DOM lib + // store as any to avoid NodeJS type issues in this repository's tsconfig + monitoringInterval = (setInterval(() => { + checkJobStatuses(io) + }, POLLING_INTERVAL) as unknown) as any +} + +/** + * Stop background monitoring + */ +function stopBackgroundMonitoring() { + if (!monitoringInterval) return + + clearInterval(monitoringInterval as any) + monitoringInterval = null +} + +/** + * Check status of all subscribed jobs and emit updates + */ +async function checkJobStatuses(io: Server) { + const jobIds = Array.from(jobSubscriptions.keys()) + + if (jobIds.length === 0) { + stopBackgroundMonitoring() + return + } + + // Keep routine checks quiet - debug level only + logger.debug(`[WS Status] Checking ${jobIds.length} subscribed jobs`) + + // Retrieve all subscribed jobs in parallel (non-blocking) + const promises = jobIds.map(async (jobId) => { + try { + const jobData = await finetuningService.retrieveFineTuningJob(jobId) + return { jobId, jobData, error: null } + } catch (error: any) { + logger.error(`[WS Status] Error retrieving job ${jobId}: ${error?.message || error}`) + return { jobId, jobData: null, error: error?.message || 'Failed to retrieve job' } + } + }) + + const results = await Promise.allSettled(promises) + + // Emit updates to subscribed clients + results.forEach((result) => { + if (result.status === 'rejected') { + logger.error(`[WS Status] Promise rejected: ${result.reason}`) + return + } + + const { jobId, jobData, error } = result.value + + // Get subscribers for this job + const subscribers = jobSubscriptions.get(jobId) + if (!subscribers || subscribers.size === 0) return + + if (error || !jobData) { + // Emit error to subscribers + subscribers.forEach(socketId => { + io.to(socketId).emit('job-status-error', { + jobId, + error: error || 'No data returned' + }) + }) + return + } + + // Normalize job data + const normalizedJob = { + id: jobData.id || jobData.job_id || jobData.fine_tuning_job_id || jobId, + name: jobData.name || jobData.id || jobId, + status: jobData.status || jobData.state || 'unknown', + model: jobData.model || 'N/A', + dataset: jobData.dataset || jobData.training_file || jobData.trainingFile || 'N/A', + createdDate: jobData.createdDate || jobData.created_at || jobData.createdAt || new Date().toISOString(), + // Include all original data + ...jobData + } + + // Emit update to all subscribers + subscribers.forEach(socketId => { + io.to(socketId).emit('job-status-update', normalizedJob) + }) + + // If job is no longer running, automatically unsubscribe after a delay + const finalStatuses = ['succeeded', 'completed', 'failed', 'cancelled', 'canceled'] + if (finalStatuses.includes((normalizedJob.status || '').toLowerCase())) { + // Delay cleanup slightly to allow any final events to be delivered + setTimeout(() => { + const subs = jobSubscriptions.get(jobId) + if (subs) { + jobSubscriptions.delete(jobId) + } + }, 10000) // Keep sending updates for 10 more seconds, then clean up + } + }) +} diff --git a/studio-frontend/packages/ui/src/api/finetuning.js b/studio-frontend/packages/ui/src/api/finetuning.js index 1bae6fe..b3f4e4a 100644 --- a/studio-frontend/packages/ui/src/api/finetuning.js +++ b/studio-frontend/packages/ui/src/api/finetuning.js @@ -8,6 +8,9 @@ const getSocketUrl = () => { return apiBaseURL || window.location.origin } +// Track active download sockets per jobId to avoid duplicate connections +const downloadSocketMap = new Map() + const finetuningApi = { // Upload training file uploadFile: (file, purpose = 'fine-tune', onUploadProgress) => { @@ -87,78 +90,192 @@ const finetuningApi = { return client.get(`/finetuning/download-ft/${encodeURIComponent(jobId)}`, cfg) }, - /** + /** * Download fine-tuning output using WebSocket for async zip preparation + * Each download gets its own dedicated WebSocket connection + * No timeout - waits indefinitely until zip is ready * @param {string} jobId - The fine-tuning job ID * @param {Object} callbacks - Callback functions { onProgress, onComplete, onError } - * @returns {Function} Cleanup function to disconnect socket + * @returns {Function} Cleanup function to disconnect this socket */ downloadFinetuningOutputWS: (jobId, callbacks = {}) => { const { onProgress, onComplete, onError } = callbacks - // Get socket URL + // Reuse existing socket for this jobId if present (even if not yet connected). + // This prevents duplicate sockets when React StrictMode mounts components twice. + const existingSocket = downloadSocketMap.get(jobId) + if (existingSocket) { + console.log(`[WS Download ${jobId}] Reusing existing socket (id: ${existingSocket.id || 'pending'})`) + // Attach provided callbacks to the existing socket + if (onProgress) existingSocket.on('download-finetuning-progress', onProgress) + if (onComplete) existingSocket.on('download-finetuning-complete', onComplete) + if (onError) existingSocket.on('download-finetuning-error', onError) + + // Return cleanup that detaches these listeners + return () => { + try { + if (onProgress) existingSocket.off('download-finetuning-progress', onProgress) + if (onComplete) existingSocket.off('download-finetuning-complete', onComplete) + if (onError) existingSocket.off('download-finetuning-error', onError) + } catch (e) {} + } + } + + // Connect specifically to the '/finetuning-download' namespace so server-side + // download handlers are isolated from status sockets. const socketUrl = getSocketUrl() - console.log('[WS] Connecting to:', socketUrl) - - // Create socket connection - const socket = io(socketUrl, { + + // Create dedicated socket for this download namespace + // Append the namespace to the URL so socket.io-client connects to it directly + const socket = io(`${socketUrl}/finetuning-download`, { transports: ['websocket', 'polling'], - reconnection: false, - timeout: 10000 + reconnection: true, + reconnectionDelay: 10000, + reconnectionAttempts: 5, + timeout: false }) + // Store socket for this job so future calls reuse it + try { downloadSocketMap.set(jobId, socket) } catch (e) {} + // Handle connection socket.on('connect', () => { - console.log('[WS] Connected to server for download, socket ID:', socket.id) + // Notify that the download socket is connected for this jobId + console.log(`[WS Download ${jobId}] Connected (socket id: ${socket.id})`) // Request download preparation socket.emit('download-finetuning-output', { jobId }) }) - // Handle progress updates - socket.on('download-finetuning-progress', (data) => { - console.log('[WS] Download progress:', data) - if (onProgress) onProgress(data) - }) - // Handle completion socket.on('download-finetuning-complete', (data) => { - console.log('[WS] Download ready:', data) + console.log(`[WS Download ${jobId}] Complete`) if (onComplete) onComplete(data) // Disconnect after completion - socket.disconnect() + try { socket.disconnect() } catch (e) {} + // remove from map + try { downloadSocketMap.delete(jobId) } catch (e) {} }) // Handle errors socket.on('download-finetuning-error', (data) => { - console.error('[WS] Download error:', data) + console.error(`[WS Download ${jobId}] Error:`, data) if (onError) onError(data) - socket.disconnect() + try { socket.disconnect() } catch (e) {} + try { downloadSocketMap.delete(jobId) } catch (e) {} }) // Handle connection errors socket.on('connect_error', (error) => { - console.error('[WS] Connection error:', error.message, error) - if (onError) onError({ error: `WebSocket connection failed: ${error.message}` }) - socket.disconnect() + console.error(`[WS Download ${jobId}] Connection error:`, error.message) + // Don't call onError for connection errors - let it retry }) // Handle disconnect socket.on('disconnect', (reason) => { - console.log('[WS] Disconnected:', reason) + console.log(`[WS Download ${jobId}] Disconnected:`, reason) + try { downloadSocketMap.delete(jobId) } catch (e) {} + }) + + // Return cleanup function + return () => { + try { + if (onProgress) socket.off('download-finetuning-progress', onProgress) + if (onComplete) socket.off('download-finetuning-complete', onComplete) + if (onError) socket.off('download-finetuning-error', onError) + } catch (e) {} + try { + if (socket && socket.connected) { + console.log(`[WS Download ${jobId}] Manually disconnecting`) + socket.disconnect() + } + } catch (e) {} + try { downloadSocketMap.delete(jobId) } catch (e) {} + } + }, + + /** + * Subscribe to real-time job status updates via WebSocket + * Creates a dedicated WebSocket connection per job ID + * @param {string} jobId - Single job ID to monitor + * @param {Object} callbacks - Callback functions { onUpdate, onError, onConnected } + * @returns {Function} Cleanup function to disconnect + */ + subscribeToJobStatus: (jobId, callbacks = {}) => { + const { onUpdate, onError, onConnected } = callbacks + + if (!jobId) { + if (onError) onError({ error: 'No job ID provided' }) + return () => {} + } + + const socketUrl = getSocketUrl() + const socket = io(socketUrl, { + transports: ['websocket', 'polling'], + reconnection: true, + reconnectionDelay: 1000, + reconnectionAttempts: Infinity, + timeout: false + }) + + const FINAL_STATUSES = ['succeeded', 'completed', 'failed', 'cancelled', 'canceled'] + + socket.on('connect', () => { + console.log(`[WS] Connected for job ${jobId}`) + socket.emit('subscribe-job-status', { jobIds: [jobId] }) + }) + + socket.on('subscription-confirmed', (data) => { + if (onConnected) onConnected(data) + }) + + socket.on('subscription-error', (data) => { + if (onError) onError(data) + }) + + socket.on('job-status-update', (jobData) => { + // Only process updates for this specific job + if (jobData.id === jobId) { + if (onUpdate) onUpdate(jobData) + + // Check if job reached final status + const status = (jobData.status || '').toString().toLowerCase() + if (FINAL_STATUSES.includes(status)) { + // Auto-disconnect after final status + setTimeout(() => { + if (socket && socket.connected) { + socket.disconnect() + } + }, 1000) + } + } + }) + + socket.on('job-status-error', (err) => { + if (err.jobId === jobId && onError) { + onError(err) + } + }) + + socket.on('disconnect', () => { + console.log(`[WS] Disconnected for job ${jobId}`) + }) + + socket.on('connect_error', (error) => { + console.error(`[WS] Connection error for job ${jobId}:`, error.message) }) // Return cleanup function return () => { - if (socket.connected) { - console.log('[WS] Manually disconnecting socket') + if (socket && socket.connected) { + socket.emit('unsubscribe-job-status', { jobIds: [jobId] }) socket.disconnect() } } }, - // Legacy compatibility methods + // Delete job API deleteJob: (jobId) => { - // Call the backend delete endpoint which will cancel remote job (best-effort) and remove local DB records + // Call the backend delete endpoint which will cancel remote job and remove local DB records return client.post('/finetuning/jobs/delete', { fine_tuning_job_id: jobId }) }, diff --git a/studio-frontend/packages/ui/src/ui-component/table/FlowListTable.jsx b/studio-frontend/packages/ui/src/ui-component/table/FlowListTable.jsx index 86db21d..28703c4 100644 --- a/studio-frontend/packages/ui/src/ui-component/table/FlowListTable.jsx +++ b/studio-frontend/packages/ui/src/ui-component/table/FlowListTable.jsx @@ -367,7 +367,7 @@ export const FlowListTable = ({ data, images, isLoading, filterFunction, updateF setDeployWebSocketForId(id, wsInstance); wsInstance.onopen = () => { - console.log('[WebSocket] Connected for click deployment monitoring', id); + console.log('[WS] Connected for click deployment monitoring', id); wsInstance.send(JSON.stringify({ hostname: deploymentConfig.hostname, username: deploymentConfig.username, @@ -378,7 +378,7 @@ export const FlowListTable = ({ data, images, isLoading, filterFunction, updateF wsInstance.onmessage = (event) => { let data; try { data = JSON.parse(event.data); } catch { return; } - console.log('[WebSocket] Click deployment message:', data); + console.log('[WS] Click deployment message:', data); if (data.status === 'Success') { setDeployStatusForId(id, ['Success', data.message]); @@ -431,19 +431,19 @@ export const FlowListTable = ({ data, images, isLoading, filterFunction, updateF }; wsInstance.onerror = (error) => { - console.error('[WebSocket] Click deployment error:', error); + console.error('[WS] Click deployment error:', error); setDeployStatusForId(id, ['Error', 'Connection error during deployment monitoring']); wsInstance.close(); setDeployWebSocketForId(id, null); }; wsInstance.onclose = (event) => { - console.log(`[WebSocket] Click deployment closed: code=${event.code}, reason='${event.reason}', wasClean=${event.wasClean}`); + console.log(`[WS] Click deployment closed: code=${event.code}, reason='${event.reason}', wasClean=${event.wasClean}`); setDeployWebSocketForId(id, null); // Check deployment status if abnormal closure if (event.code !== 1000 && event.code !== 1001) { - console.log('[WebSocket] Abnormal closure detected, checking deployment status...'); + console.log('[WS] Abnormal closure detected, checking deployment status...'); setTimeout(async () => { try { const response = await chatflowsApi.getSpecificChatflow(id); diff --git a/studio-frontend/packages/ui/src/views/finetuning/FinetuningJobsTable.jsx b/studio-frontend/packages/ui/src/views/finetuning/FinetuningJobsTable.jsx index eae60d8..6cf8608 100644 --- a/studio-frontend/packages/ui/src/views/finetuning/FinetuningJobsTable.jsx +++ b/studio-frontend/packages/ui/src/views/finetuning/FinetuningJobsTable.jsx @@ -217,10 +217,20 @@ const FinetuningJobsTable = ({ data, isLoading = false, onRefresh = null, filter setDownloadingJobs((prev) => ({ ...(prev || {}), [id]: { progress: 0 } })) setDownloadDialogOpen(true) + // Persist pending download so we can recover on page refresh + try { + const pending = JSON.parse(sessionStorage.getItem('ft_pending_downloads') || '[]') + if (!pending.includes(id)) { + pending.push(id) + sessionStorage.setItem('ft_pending_downloads', JSON.stringify(pending)) + } + } catch (e) { + // ignore sessionStorage errors + } + // Use WebSocket-based download for non-blocking zip creation const cleanup = finetuningApi.downloadFinetuningOutputWS(job.id, { onProgress: (data) => { - console.log('Download progress:', data) // Update UI to show preparation is in progress setDownloadingJobs((prev) => ({ ...(prev || {}), @@ -228,19 +238,22 @@ const FinetuningJobsTable = ({ data, isLoading = false, onRefresh = null, filter })) }, onComplete: async (data) => { - console.log('Download complete:', data) - // File is ready - trigger native browser download // No authentication needed (endpoint is whitelisted) const downloadUrl = data.downloadUrl || `/api/v1/finetuning/download-ft/${job.id}` - const fileName = data.fileName || `${job.id}-output.zip` - console.log('Starting native browser download:', downloadUrl) // Use window.location.href to trigger native browser download // Browser will show download in download manager with progress bar window.location.href = downloadUrl + // Remove from pending list + try { + const pending = JSON.parse(sessionStorage.getItem('ft_pending_downloads') || '[]') + const filtered = (pending || []).filter((x) => x !== id) + sessionStorage.setItem('ft_pending_downloads', JSON.stringify(filtered)) + } catch (e) {} + // Mark this job finished and close dialog setDownloadingJobs((prev) => ({ ...(prev || {}), [id]: { progress: 100 } })) setDownloadProgress(100) @@ -256,6 +269,13 @@ const FinetuningJobsTable = ({ data, isLoading = false, onRefresh = null, filter onError: (data) => { console.error('Download preparation error:', data) alert('Failed to prepare download: ' + (data.error || 'Unknown error')) + // Remove from pending list + try { + const pending = JSON.parse(sessionStorage.getItem('ft_pending_downloads') || '[]') + const filtered = (pending || []).filter((x) => x !== id) + sessionStorage.setItem('ft_pending_downloads', JSON.stringify(filtered)) + } catch (e) {} + // Clear downloading state setDownloadingJobs((prev) => { const copy = { ...(prev || {}) } @@ -375,6 +395,61 @@ const FinetuningJobsTable = ({ data, isLoading = false, onRefresh = null, filter } } + // On mount: re-establish any pending download WS connections saved in sessionStorage + useEffect(() => { + try { + const pending = JSON.parse(sessionStorage.getItem('ft_pending_downloads') || '[]') + if (Array.isArray(pending) && pending.length > 0) { + // For each pending job id, re-attach a download WS to get status + pending.forEach((jobId) => { + // avoid duplicate entries in state + if (!downloadingJobs || !downloadingJobs[jobId]) { + setDownloadingJobs((prev) => ({ ...(prev || {}), [jobId]: { progress: 0 } })) + } + finetuningApi.downloadFinetuningOutputWS(jobId, { + onProgress: (data) => { + setDownloadingJobs((prev) => ({ ...(prev || {}), [jobId]: { progress: 0, status: data.status, message: data.message } })) + setDownloadDialogOpen(true) + }, + onComplete: (data) => { + // Trigger native download + const downloadUrl = data.downloadUrl || `/api/v1/finetuning/download-ft/${jobId}` + window.location.href = downloadUrl + // cleanup pending + try { + const pending2 = JSON.parse(sessionStorage.getItem('ft_pending_downloads') || '[]') + const filtered = (pending2 || []).filter((x) => x !== jobId) + sessionStorage.setItem('ft_pending_downloads', JSON.stringify(filtered)) + } catch (e) {} + setDownloadingJobs((prev) => { + const copy = { ...(prev || {}) } + delete copy[jobId] + return copy + }) + setDownloadDialogOpen(false) + }, + onError: (data) => { + console.error('Recovered download preparation error:', data) + try { + const pending2 = JSON.parse(sessionStorage.getItem('ft_pending_downloads') || '[]') + const filtered = (pending2 || []).filter((x) => x !== jobId) + sessionStorage.setItem('ft_pending_downloads', JSON.stringify(filtered)) + } catch (e) {} + setDownloadingJobs((prev) => { + const copy = { ...(prev || {}) } + delete copy[jobId] + return copy + }) + setDownloadDialogOpen(false) + } + }) + }) + } + } catch (e) { + // ignore sessionStorage parse errors + } + }, []) + const getStatusColor = (status) => { switch (status?.toLowerCase()) { case 'completed': @@ -634,7 +709,6 @@ const FinetuningJobsTable = ({ data, isLoading = false, onRefresh = null, filter The server is preparing the job output for download. This may take a few moments for large outputs. - {downloadProgress > 0 ? ` (${downloadProgress}%)` : ''} 0 ? 'determinate' : 'indeterminate'} value={downloadProgress} /> @@ -701,10 +775,4 @@ FinetuningJobsTable.propTypes = { filterFunction: PropTypes.func } -FinetuningJobsTable.defaultProps = { - filterFunction: null -} - -// default props handled via function default parameters - export default FinetuningJobsTable \ No newline at end of file diff --git a/studio-frontend/packages/ui/src/views/finetuning/index.jsx b/studio-frontend/packages/ui/src/views/finetuning/index.jsx index b8b42d0..617da61 100644 --- a/studio-frontend/packages/ui/src/views/finetuning/index.jsx +++ b/studio-frontend/packages/ui/src/views/finetuning/index.jsx @@ -52,113 +52,54 @@ const Finetuning = () => { getAllJobsApi = useApi(finetuningApi.getAllJobs) } - const pollingRef = useRef(null) - useEffect(() => { - // Load fine-tuning jobs loadJobs() - }, []) - - // Polling: when there are running jobs, poll backend until all jobs are completed/failed - useEffect(() => { - // helper to clear existing interval - const stopPolling = () => { - if (pollingRef.current) { - clearInterval(pollingRef.current) - pollingRef.current = null - } + + // Cleanup all WebSocket connections on unmount + return () => { + Object.keys(jobSocketsRef.current).forEach(jobId => { + if (jobSocketsRef.current[jobId]) { + jobSocketsRef.current[jobId]() + } + }) + jobSocketsRef.current = {} } + }, []) - // Only start polling if user is authenticated - if (!keycloak?.authenticated) { - stopPolling() - return - } + // Store cleanup functions for each job's WebSocket connection + const jobSocketsRef = useRef({}) - const hasRunning = (jobs || []).some(j => (j?.status || '').toString().toLowerCase() === 'running') - if (!hasRunning) { - stopPolling() + // Function to start monitoring a specific job + const startJobMonitoring = (jobId) => { + // Don't create duplicate connections + if (jobSocketsRef.current[jobId]) { return } - // If already polling, keep it - if (pollingRef.current) return - - // Start polling every 5 seconds โ€” for each running job call the retrieve endpoint - console.debug('[finetuning] starting polling for running jobs') - pollingRef.current = setInterval(async () => { - console.debug('[finetuning] poll tick - checking running jobs') - try { - // find running jobs from current state - const runningJobs = (jobs || []).filter(j => (j?.status || '').toString().toLowerCase() === 'running') - if (runningJobs.length === 0) { - console.debug('[finetuning] no running jobs found, stopping polling') - stopPolling() - return - } - - // Retrieve updated details for each running job in parallel - const promises = runningJobs.map(j => { - // finetuningApi.getJob returns an axios promise; we want the response.data - console.debug('[finetuning] retrieving job:', j.id) - return finetuningApi.getJob(j.id).then(res => res.data).catch(err => { - console.error('Error retrieving job', j.id, err) - return null - }) - }) - - const updated = await Promise.all(promises) - - // normalize updated jobs and merge into current jobs list - const normalizeJob = (j) => { - if (!j) return null - const id = j.id || j.job_id || j.fine_tuning_job_id || String(Date.now()) - const name = j.name || id - const status = j.status || j.state || 'pending' - const model = j.model || 'N/A' - const dataset = j.dataset || j.training_file || j.trainingFile || 'N/A' - const progress = typeof j.progress === 'number' ? `${j.progress}%` : (j.progress || '0%') - const createdDate = j.createdDate || j.created_at || j.createdAt || new Date().toISOString() - return { - ...j, - id, - name, - status, - model, - dataset, - progress, - createdDate - } - } - - setJobs(prev => { - const updatedMap = {} - updated.forEach(u => { - if (!u) return - const n = normalizeJob(u) - if (n) updatedMap[n.id] = n - }) - - const newList = (prev || []).map(p => updatedMap[p.id] ? { ...p, ...updatedMap[p.id] } : p) - - // determine whether to stop polling based on the merged list - const stillRunningLocal = newList.some(j => (j?.status || '').toString().toLowerCase() === 'running') - if (!stillRunningLocal) { - console.debug('[finetuning] no running jobs remain after merge, stopping polling') - // stopPolling will clear the interval; call asynchronously to avoid interfering with state update - setTimeout(() => stopPolling(), 0) - } - - return newList - }) - } catch (err) { - console.error('Error while polling fine-tuning jobs (retrieve):', err) + // Subscribe to this job's status updates + const cleanup = finetuningApi.subscribeToJobStatus(jobId, { + onUpdate: (jobData) => { + // Update the job in state + setJobs(prev => prev.map(j => + j.id === jobData.id ? { ...j, ...jobData } : j + )) + }, + onError: (error) => { + console.error(`[Job ${jobId}] WebSocket error:`, error) } - }, 5000) + }) + + // Store cleanup function + jobSocketsRef.current[jobId] = cleanup + } - // cleanup on unmount or dependency change - return () => stopPolling() - }, [jobs, keycloak?.authenticated]) + // Function to stop monitoring a specific job + const stopJobMonitoring = (jobId) => { + if (jobSocketsRef.current[jobId]) { + jobSocketsRef.current[jobId]() + delete jobSocketsRef.current[jobId] + } + } const loadJobs = async () => { if (!getAllJobsApi) return @@ -191,6 +132,15 @@ const Finetuning = () => { const jobsData = Array.isArray(response) ? response.map(normalizeJob).filter(Boolean) : [] setJobs(jobsData) setLoading(false) + + // Start monitoring any active jobs + const activeStatuses = ['pending', 'validating_files', 'running'] + jobsData.forEach(job => { + const status = (job?.status || '').toString().toLowerCase() + if (activeStatuses.includes(status)) { + startJobMonitoring(job.id) + } + }) } catch (error) { console.error('Error loading fine-tuning jobs:', error) setJobs([]) @@ -213,6 +163,13 @@ const Finetuning = () => { const handleJobCreated = (newJob) => { setJobs(prev => [...prev, newJob]) setJobModalOpen(false) + + // Start monitoring the new job if it's in an active state + const activeStatuses = ['pending', 'validating_files', 'running'] + const status = (newJob?.status || '').toString().toLowerCase() + if (activeStatuses.includes(status)) { + startJobMonitoring(newJob.id) + } } const onSearchChange = (event) => { diff --git a/studio-frontend/packages/ui/vite.config.js b/studio-frontend/packages/ui/vite.config.js index 3a2fae0..a2591b2 100644 --- a/studio-frontend/packages/ui/vite.config.js +++ b/studio-frontend/packages/ui/vite.config.js @@ -9,17 +9,17 @@ export default defineConfig(async ({ mode }) => { const serverEnv = dotenv.config({ processEnv: {}, path: '../server/.env' }).parsed const serverHost = serverEnv?.['HOST'] ?? 'localhost' const serverPort = parseInt(serverEnv?.['PORT'] ?? '3000') - if (!Number.isNaN(serverPort) && serverPort > 0 && serverPort < 65535) { - proxy = { - '/api': { - target: `http://${serverHost}:${serverPort}`, - changeOrigin: true - }, - '/socket.io': { - target: `http://${serverHost}:${serverPort}`, - changeOrigin: true, - ws: true - } + proxy = { + '/api': { + target: `http://${serverHost}:${serverPort}`, + changeOrigin: true, + secure: false + }, + '/socket.io': { + target: `http://${serverHost}:${serverPort}`, + changeOrigin: true, + ws: true, + secure: false } } } From 66122be6724eaf1b969c6d35e43217b1b5bea9c9 Mon Sep 17 00:00:00 2001 From: wwanarif Date: Fri, 31 Oct 2025 08:06:17 +0000 Subject: [PATCH 14/23] updated the ft configs UI Signed-off-by: wwanarif --- .../views/finetuning/FinetuningJobModal.jsx | 1154 +++++++++++------ .../views/finetuning/FinetuningJobsTable.jsx | 44 +- 2 files changed, 755 insertions(+), 443 deletions(-) diff --git a/studio-frontend/packages/ui/src/views/finetuning/FinetuningJobModal.jsx b/studio-frontend/packages/ui/src/views/finetuning/FinetuningJobModal.jsx index 39fd4ea..cb8bcc3 100644 --- a/studio-frontend/packages/ui/src/views/finetuning/FinetuningJobModal.jsx +++ b/studio-frontend/packages/ui/src/views/finetuning/FinetuningJobModal.jsx @@ -14,13 +14,12 @@ import { InputLabel, Select, MenuItem, - Typography, - Stack, Checkbox, FormControlLabel, + Typography, + Stack, IconButton, CircularProgress, - Grid } from '@mui/material' import Autocomplete from '@mui/material/Autocomplete' import { useTheme } from '@mui/material/styles' @@ -59,12 +58,25 @@ const FinetuningJobModal = ({ open, onClose, onJobCreated }) => { }, dataset: { max_length: 512, + block_size: 512, validation_split_percentage: 5, padding_side: 'right', truncation_side: 'right', max_source_length: 384, + pad_to_max: false, + query_max_len: 128, + passage_max_len: 128, + train_group_size: 8, + query_instruction_for_retrieval: '', + passage_instruction_for_retrieval: '', + reasoning_dataset_keys: ['Question', 'Complex_CoT', 'Response'], + // raw input string to preserve trailing commas/spaces while editing + reasoning_dataset_keys_input: 'Question, Complex_CoT, Response', max_prompt_length: 512, data_preprocess_type: 'neural_chat', + data_preprocess_neural_chat: true, + padding: 'true', + truncation: true, mask_input: true, mask_response: true }, @@ -94,9 +106,6 @@ const FinetuningJobModal = ({ open, onClose, onJobCreated }) => { const [isSubmitting, setIsSubmitting] = useState(false) const [loraEnabled, setLoraEnabled] = useState(false) - const [datasetEnabled, setDatasetEnabled] = useState(true) - const [generalEnabled, setGeneralEnabled] = useState(true) - const [trainingEnabled, setTrainingEnabled] = useState(true) const baseModels = [ @@ -196,52 +205,40 @@ const FinetuningJobModal = ({ open, onClose, onJobCreated }) => { newErrors.trainingDataset = 'Training dataset is required' } - // OpenAI parameters validation (only when training enabled) - if (trainingEnabled) { - if (formData.openai_params.learning_rate_multiplier <= 0) { - newErrors.learning_rate_multiplier = 'Learning rate multiplier must be greater than 0' - } + // OpenAI parameters validation + if (formData.openai_params.learning_rate_multiplier <= 0) { + newErrors.learning_rate_multiplier = 'Learning rate multiplier must be greater than 0' + } - if (formData.openai_params.batch_size <= 0) { - newErrors.batch_size = 'Batch size must be greater than 0' - } + if (formData.openai_params.batch_size <= 0) { + newErrors.batch_size = 'Batch size must be greater than 0' + } - if (formData.openai_params.n_epochs <= 0) { - newErrors.n_epochs = 'Number of epochs must be greater than 0' - } + if (formData.openai_params.n_epochs <= 0) { + newErrors.n_epochs = 'Number of epochs must be greater than 0' } - // Training parameters validation (only when enabled) - if (trainingEnabled) { - if (formData.training.learning_rate <= 0) { - newErrors.learning_rate = 'Learning rate must be greater than 0' - } + // Training parameters validation + if (formData.training.learning_rate <= 0) { + newErrors.learning_rate = 'Learning rate must be greater than 0' + } - if (formData.training.epochs <= 0) { - newErrors.epochs = 'Epochs must be greater than 0' - } + if (formData.training.epochs <= 0) { + newErrors.epochs = 'Epochs must be greater than 0' + } - if (formData.training.logging_steps <= 0) { - newErrors.logging_steps = 'Logging steps must be greater than 0' - } + if (formData.training.logging_steps <= 0) { + newErrors.logging_steps = 'Logging steps must be greater than 0' } - // General validation (only when enabled) - if (generalEnabled) { - if (!formData.general.output_dir) { - newErrors.output_dir = 'Output directory is required' - } + // General validation + if (!formData.general.output_dir) { + newErrors.output_dir = 'Output directory is required' } - // Dataset validation (only when enabled) - if (datasetEnabled) { - if (!formData.dataset) { - newErrors.dataset = 'Dataset configuration is required' - } else { - if (formData.dataset.max_length <= 0) { - newErrors.dataset_max_length = 'Max length must be greater than 0' - } - } + // Dataset validation + if (formData.dataset.max_length <= 0) { + newErrors.dataset_max_length = 'Max length must be greater than 0' } // LoRA parameters validation (only when enabled) @@ -292,47 +289,51 @@ const FinetuningJobModal = ({ open, onClose, onJobCreated }) => { } } - // Build payload and only include sections that are enabled + // Build payload const jobPayload = { model: formData.baseModel, training_file: trainingFileName } - if (generalEnabled) { - // If user enabled LoRA, include the object; otherwise send explicit null inside General - const gen = { ...formData.general } - gen.lora_config = loraEnabled ? formData.lora : null - // Ensure config exists and place hf_token if provided - gen.config = gen.config || {} - if (formData.hf_token) { - gen.config.token = formData.hf_token - } - jobPayload.General = gen - jobPayload.task = gen.task || 'instruction_tuning' - } else { - jobPayload.task = 'instruction_tuning' - // If HF token was provided while General is disabled, create minimal General with config.token - if (formData.hf_token) { - jobPayload.General = { config: { token: formData.hf_token } } - } + // General configuration with LoRA config + const gen = { ...formData.general } + gen.lora_config = loraEnabled ? formData.lora : null + gen.config = gen.config || {} + if (formData.hf_token) { + gen.config.token = formData.hf_token } + jobPayload.General = gen + jobPayload.task = gen.task || 'instruction_tuning' - if (datasetEnabled) { - jobPayload.Dataset = { - max_length: formData.dataset.max_length, - // fallback keys if some are undefined - query_max_len: formData.dataset.query_max_len, - passage_max_len: formData.dataset.passage_max_len, - padding: formData.dataset.padding_side - } + // Dataset configuration + jobPayload.Dataset = { + max_length: formData.dataset.max_length, + block_size: formData.dataset.block_size, + max_source_length: formData.dataset.max_source_length, + padding_side: formData.dataset.padding_side, + truncation_side: formData.dataset.truncation_side, + padding: formData.dataset.padding, + truncation: formData.dataset.truncation, + mask_input: formData.dataset.mask_input, + mask_response: formData.dataset.mask_response, + query_max_len: formData.dataset.query_max_len, + passage_max_len: formData.dataset.passage_max_len, + train_group_size: formData.dataset.train_group_size, + query_instruction_for_retrieval: formData.dataset.query_instruction_for_retrieval, + passage_instruction_for_retrieval: formData.dataset.passage_instruction_for_retrieval, + pad_to_max: formData.dataset.pad_to_max, + data_preprocess_type: formData.dataset.data_preprocess_neural_chat ? 'neural_chat' : null } - if (trainingEnabled) { - jobPayload.Training = { - epochs: formData.training.epochs, - batch_size: formData.openai_params.batch_size, - gradient_accumulation_steps: formData.training.gradient_accumulation_steps - } + // Training configuration + jobPayload.Training = { + epochs: formData.training.epochs, + batch_size: formData.openai_params.batch_size, + gradient_accumulation_steps: formData.training.gradient_accumulation_steps, + learning_rate: formData.training.learning_rate, + optimizer: formData.training.optimizer, + device: formData.training.device, + mixed_precision: formData.training.mixed_precision } // Call the actual API @@ -343,28 +344,19 @@ const FinetuningJobModal = ({ open, onClose, onJobCreated }) => { id: response.data?.id || response.data?.fine_tuning_job_id || Date.now().toString(), status: response.data?.status || 'pending', model: formData.baseModel, - task: jobPayload.task || (loraEnabled ? formData.lora?.task_type : 'instruction_tuning'), + task: jobPayload.task || 'instruction_tuning', dataset: formData.trainingDataset?.suffixedName || formData.trainingDataset?.name || 'Unknown', progress: '0%', createdDate: response.data?.created_at || new Date().toISOString(), - training_file: jobPayload.training_file - } - - // Mirror payload sections in the newJob object for UI - if (trainingEnabled) { - newJob.openai_params = formData.openai_params - newJob.training = formData.training - } - - if (generalEnabled) { - newJob.general = formData.general - if (formData.hf_token) { - newJob.general = { ...newJob.general, config: { ...(newJob.general.config || {}), token: formData.hf_token } } - } + training_file: jobPayload.training_file, + openai_params: formData.openai_params, + training: formData.training, + general: formData.general, + dataset_config: formData.dataset } - if (datasetEnabled) { - newJob.dataset_config = formData.dataset + if (formData.hf_token) { + newJob.general = { ...newJob.general, config: { ...(newJob.general.config || {}), token: formData.hf_token } } } onJobCreated(newJob) @@ -381,6 +373,7 @@ const FinetuningJobModal = ({ open, onClose, onJobCreated }) => { setFormData({ baseModel: '', trainingDataset: null, + hf_token: '', // OpenAI standard parameters openai_params: { n_epochs: 3, @@ -399,12 +392,24 @@ const FinetuningJobModal = ({ open, onClose, onJobCreated }) => { }, dataset: { max_length: 512, + block_size: 512, validation_split_percentage: 5, padding_side: 'right', truncation_side: 'right', max_source_length: 384, + pad_to_max: false, + query_max_len: 128, + passage_max_len: 128, + train_group_size: 8, + query_instruction_for_retrieval: '', + passage_instruction_for_retrieval: '', + reasoning_dataset_keys: ['Question', 'Complex_CoT', 'Response'], + reasoning_dataset_keys_input: 'Question, Complex_CoT, Response', max_prompt_length: 512, data_preprocess_type: 'neural_chat', + data_preprocess_neural_chat: true, + padding: 'true', + truncation: true, mask_input: true, mask_response: true }, @@ -421,12 +426,15 @@ const FinetuningJobModal = ({ open, onClose, onJobCreated }) => { accelerate_mode: 'DDP', hpu_execution_mode: 'lazy', num_training_workers: 1 + }, + lora: { + r: 8, + lora_alpha: 32, + lora_dropout: 0.1, + task_type: 'CAUSAL_LM' } }) - // reset token as well - setFormData(prev => ({ ...prev, hf_token: '' })) setLoraEnabled(false) - setFormData(prev => ({ ...prev, lora: { r: 8, lora_alpha: 32, lora_dropout: 0.1, task_type: 'CAUSAL_LM' } })) setErrors({}) setIsSubmitting(false) onClose() @@ -440,33 +448,36 @@ const FinetuningJobModal = ({ open, onClose, onJobCreated }) => { fullWidth PaperProps={{ sx: { + position: 'relative', borderRadius: 2, - maxHeight: '90vh', - height: '90vh' + maxHeight: '95vh', + height: '95vh' } }} > - - - - Create New Fine-tuning Job - - - - - + + + Create New Fine-tuning Job + + {/* Close button moved out of title: absolutely positioned within the dialog Paper */} + + + + - - {/* Top Left Quadrant: Model Configuration and Dataset Configuration */} - - - {/* Model Configuration */} - - - Model Configuration* - + + {/* Left Column: Model & Task Setup */} + + + {/* Base Model */} + { )} /> {errors.baseModel && ( - + {errors.baseModel} )} - handleInputChange('hf_token', e.target.value)} - fullWidth - size="medium" - sx={{ mt: 2 }} - /> + + {/* HF Token */} + handleInputChange('hf_token', e.target.value)} + fullWidth + size="medium" + sx={{ '& .MuiInputBase-root': { minHeight: 48 } }} + /> - {/* Dataset Configuration */} - - - - Dataset Configuration - - setDatasetEnabled(e.target.checked)} />} label="Enable" /> - - - - - handleConfigChange('dataset', 'max_length', parseInt(e.target.value))} - inputProps={{ min: 128, max: 4096, step: 1 }} - size="medium" - fullWidth - sx={{ '& .MuiInputBase-root': { minHeight: 48 } }} - disabled={!datasetEnabled} - /> - - - handleConfigChange('dataset', 'max_source_length', parseInt(e.target.value))} - inputProps={{ min: 128, max: 2048, step: 1 }} - size="medium" - fullWidth - sx={{ '& .MuiInputBase-root': { minHeight: 48 } }} - disabled={!datasetEnabled} - /> - - - - Preprocess Type - - - - - handleConfigChange('dataset', 'validation_split_percentage', parseInt(e.target.value))} - inputProps={{ min: 1, max: 50, step: 1 }} - size="medium" - fullWidth - sx={{ '& .MuiInputBase-root': { minHeight: 48 } }} - disabled={!datasetEnabled} - /> - - - - + {/* Task Type */} + + Task Type + + + + {/* Inline Instruction Tuning config shown right under Task Type */} + {formData.general.task === 'instruction_tuning' && ( + + + {/* 2-column responsive CSS grid for short-value fields */} + + + handleConfigChange('dataset', 'max_length', e.target.value)} + size="small" + sx={{ width: '100%' }} + fullWidth + /> + + + + handleConfigChange('dataset', 'block_size', e.target.value)} + size="small" + sx={{ width: '100%' }} + fullWidth + /> + + + + handleConfigChange('dataset', 'max_source_length', e.target.value)} + size="small" + sx={{ width: '100%' }} + fullWidth + /> + + + + handleConfigChange('dataset', 'max_prompt_length', e.target.value)} + size="small" + sx={{ width: '100%' }} + fullWidth + /> + + + + + handleConfigChange('dataset', 'padding_side', e.target.value)} + size="small" + sx={{ width: '100%' }} + /> + + + + handleConfigChange('dataset', 'truncation_side', e.target.value)} + size="small" + sx={{ width: '100%' }} + /> + + + + handleConfigChange('dataset', 'padding', e.target.value)} + size="small" + sx={{ width: '100%' }} + /> + + + + handleConfigChange('dataset', 'truncation', e.target.value)} + size="small" + sx={{ width: '100%' }} + /> + + + + handleConfigChange('dataset', 'mask_input', e.target.value)} + size="small" + sx={{ width: '100%' }} + /> + + + + handleConfigChange('dataset', 'mask_response', e.target.value)} + size="small" + sx={{ width: '100%' }} + /> + + + + handleConfigChange('dataset', 'data_preprocess_neural_chat', e.target.checked)} + /> + } + label="Use neural_chat for data preprocess type" + size="small" + sx={{ mt: 0 }} + /> + + + )} + + {/* Reasoning task dataset config (mirrors instruction tuning controls) */} + {formData.general.task === 'reasoning' && ( + + + {/* Comma-separated keys field that maps to array */} + { + const raw = e.target.value + // update raw input so trailing separators are preserved while typing + handleConfigChange('dataset', 'reasoning_dataset_keys_input', raw) + // allow comma or whitespace as separators to derive the array + const arr = raw.split(/[,\s]+/).map(s => s.trim()).filter(Boolean) + handleConfigChange('dataset', 'reasoning_dataset_keys', arr) + }} + size="small" + fullWidth + /> + + {/* Numeric fields: inline+scroll on small screens, 3-column fluid layout on md+ (no scrollbar) */} + + + handleConfigChange('dataset', 'max_length', e.target.value)} + size="small" + sx={{ width: '100%' }} + fullWidth + /> + + + + handleConfigChange('dataset', 'max_source_length', e.target.value)} + size="small" + sx={{ width: '100%' }} + fullWidth + /> + + + + handleConfigChange('dataset', 'max_prompt_length', e.target.value)} + size="small" + sx={{ width: '100%' }} + fullWidth + /> + + + + {/* 2-column responsive CSS grid for short-value fields */} + + + handleConfigChange('dataset', 'padding_side', e.target.value)} + size="small" + sx={{ width: '100%', maxWidth: 240 }} + /> + + + + handleConfigChange('dataset', 'truncation_side', e.target.value)} + size="small" + sx={{ width: '100%', maxWidth: 240 }} + /> + + + + handleConfigChange('dataset', 'padding', e.target.value)} + size="small" + sx={{ width: '100%', maxWidth: 240 }} + /> + + + + handleConfigChange('dataset', 'truncation', e.target.value)} + size="small" + sx={{ width: '100%', maxWidth: 240 }} + /> + + + + handleConfigChange('dataset', 'mask_input', e.target.value)} + size="small" + sx={{ width: '100%', maxWidth: 240 }} + /> + + + + handleConfigChange('dataset', 'mask_response', e.target.value)} + size="small" + sx={{ width: '100%', maxWidth: 240 }} + /> + + + + + )} + + {/* Pretraining task dataset config: minimal fields (max_length, truncation, padding) */} + {formData.general.task === 'pretraining' && ( + + + + + handleConfigChange('dataset', 'max_length', e.target.value)} + size="small" + sx={{ width: '100%' }} + fullWidth + /> + + + + handleConfigChange('dataset', 'block_size', e.target.value)} + size="small" + sx={{ width: '100%' }} + /> + + + + handleConfigChange('dataset', 'padding', e.target.value)} + size="small" + sx={{ width: '100%' }} + /> + + + + handleConfigChange('dataset', 'truncation', e.target.value)} + size="small" + sx={{ width: '100%' }} + /> + + + + + + )} + + {/* Rerank task dataset config */} + {formData.general.task === 'rerank' && ( + + + + + handleConfigChange('dataset', 'max_length', e.target.value)} + size="small" + fullWidth + /> + + + handleConfigChange('dataset', 'train_group_size', e.target.value)} + size="small" + fullWidth + /> + + + + + )} + + {/* Embedding task dataset config */} + {formData.general.task === 'embedding' && ( + + + handleConfigChange('dataset', 'query_instruction_for_retrieval', e.target.value)} + size="small" + fullWidth + /> + + handleConfigChange('dataset', 'passage_instruction_for_retrieval', e.target.value)} + size="small" + fullWidth + /> + + + + handleConfigChange('dataset', 'query_max_len', e.target.value)} + size="small" + fullWidth + /> + + + handleConfigChange('dataset', 'passage_max_len', e.target.value)} + size="small" + fullWidth + /> + + + + handleConfigChange('dataset', 'padding', e.target.value)} + size="small" + fullWidth + /> + + handleConfigChange('dataset', 'train_group_size', e.target.value)} + size="small" + fullWidth + /> + + + )} + + {/* DPO task dataset config: max_length, max_prompt_length, pad_to_max */} + {formData.general.task === 'dpo' && ( + + + + + handleConfigChange('dataset', 'max_length', e.target.value)} + size="small" + sx={{ width: '100%' }} + fullWidth + /> + + + + handleConfigChange('dataset', 'max_prompt_length', e.target.value)} + size="small" + sx={{ width: '100%' }} + fullWidth + /> + + + + handleConfigChange('dataset', 'pad_to_max', e.target.value)} + size="small" + sx={{ width: '100%' }} + /> + + + + + )} - - - {/* Top Right Quadrant: Training Dataset Upload */} - - - - Training Dataset * - - + + + {/* Right Column: Training Dataset & Training Parameters */} + + + {/* Training Dataset Upload */} + handleFileUpload('trainingDataset', file)} acceptedTypes={['.json', '.jsonl', '.csv']} @@ -584,217 +1013,96 @@ const FinetuningJobModal = ({ open, onClose, onJobCreated }) => { error={errors.trainingDataset} /> - - - - {/* Bottom Left Quadrant: General Configuration + LoRA */} - - - - - General Configuration - - setGeneralEnabled(e.target.checked)} />} label="Enable" /> - - - - Task Type - - - - Report To - - - handleConfigChange('general', 'output_dir', e.target.value)} - fullWidth - size="medium" - disabled={!generalEnabled} - /> - - - LoRA Configuration - setLoraEnabled(e.target.checked)} - /> - } - label="Enable LoRA" - /> - - - - + + {/* Training Parameters */} + + + + handleConfigChange('lora', 'r', parseInt(e.target.value))} - error={!!errors.lora_r} - inputProps={{ min: 1, max: 128, step: 1 }} + value={formData.training.epochs} + onChange={(e) => handleConfigChange('training', 'epochs', parseInt(e.target.value))} + error={!!errors.epochs} + inputProps={{ min: 1, max: 50, step: 1 }} size="medium" fullWidth - disabled={!loraEnabled} + sx={{ '& .MuiInputBase-root': { minHeight: 48 } }} /> - - + + handleConfigChange('lora', 'lora_alpha', parseInt(e.target.value))} - error={!!errors.lora_alpha} + value={formData.openai_params.batch_size} + onChange={(e) => handleOpenAIParamChange('batch_size', parseInt(e.target.value))} + error={!!errors.batch_size} inputProps={{ min: 1, max: 256, step: 1 }} size="medium" fullWidth - disabled={!loraEnabled} - /> - - - handleConfigChange('lora', 'lora_dropout', parseFloat(e.target.value))} - error={!!errors.lora_dropout} - inputProps={{ min: 0, max: 1, step: 0.01 }} - size="medium" - fullWidth - disabled={!loraEnabled} - /> - - - - - - - - {/* Bottom Right Quadrant: Training Configuration + OpenAI */} - - - - - Training Configuration - - setTrainingEnabled(e.target.checked)} />} label="Enable" /> - - - - - - handleConfigChange('training', 'epochs', parseInt(e.target.value))} - error={!!errors.epochs} - inputProps={{ min: 1, max: 50, step: 1 }} - size="medium" - fullWidth sx={{ '& .MuiInputBase-root': { minHeight: 48 } }} - disabled={!trainingEnabled} /> - - - handleOpenAIParamChange('batch_size', parseInt(e.target.value))} - error={!!errors.batch_size} - inputProps={{ min: 1, max: 256, step: 1 }} - size="medium" - fullWidth - sx={{ '& .MuiInputBase-root': { minHeight: 48 } }} - disabled={!trainingEnabled} - /> - - - - handleConfigChange('training', 'learning_rate', parseFloat(e.target.value))} - error={!!errors.learning_rate} - inputProps={{ min: 0.00001, max: 0.01, step: 0.00001 }} - size="medium" - fullWidth - sx={{ '& .MuiInputBase-root': { minHeight: 48 } }} - disabled={!trainingEnabled} - /> - + + + + handleConfigChange('training', 'learning_rate', parseFloat(e.target.value))} + error={!!errors.learning_rate} + inputProps={{ min: 0.00001, max: 0.01, step: 0.00001 }} + size="medium" + fullWidth + sx={{ '& .MuiInputBase-root': { minHeight: 48 } }} + /> + + Optimizer - - - - - Device - - - - - - Mixed Precision - - - - - + + + + + Device + + + + + + Mixed Precision + + + + + { inputProps={{ min: 1, step: 1 }} size="medium" fullWidth - disabled={!trainingEnabled} + sx={{ '& .MuiInputBase-root': { minHeight: 48 } }} /> - - - - handleOpenAIParamChange('learning_rate_multiplier', parseFloat(e.target.value))} - error={!!errors.learning_rate_multiplier} - inputProps={{ min: 0.02, max: 2, step: 0.01 }} - size="medium" - fullWidth - disabled={!trainingEnabled} - /> - - - handleOpenAIParamChange('prompt_loss_weight', parseFloat(e.target.value))} - inputProps={{ min: 0, max: 1, step: 0.01 }} - size="medium" - fullWidth - disabled={!trainingEnabled} - /> - - - - - - - + + + + handleOpenAIParamChange('learning_rate_multiplier', parseFloat(e.target.value))} + error={!!errors.learning_rate_multiplier} + inputProps={{ min: 0.02, max: 2, step: 0.01 }} + size="medium" + fullWidth + sx={{ '& .MuiInputBase-root': { minHeight: 48 } }} + /> + + + handleOpenAIParamChange('prompt_loss_weight', parseFloat(e.target.value))} + inputProps={{ min: 0, max: 1, step: 0.01 }} + size="medium" + fullWidth + sx={{ '& .MuiInputBase-root': { minHeight: 48 } }} + /> + + + + + + + diff --git a/studio-frontend/packages/ui/src/views/finetuning/FinetuningJobsTable.jsx b/studio-frontend/packages/ui/src/views/finetuning/FinetuningJobsTable.jsx index 6cf8608..7137603 100644 --- a/studio-frontend/packages/ui/src/views/finetuning/FinetuningJobsTable.jsx +++ b/studio-frontend/packages/ui/src/views/finetuning/FinetuningJobsTable.jsx @@ -571,27 +571,31 @@ const FinetuningJobsTable = ({ data, isLoading = false, onRefresh = null, filter
{/* Status with blinking indicator when running; show Chip only for other statuses */} - {String(job.status).toLowerCase() === 'running' ? ( - - { + const s = String(job.status || '').toLowerCase() + return (s === 'running' || s === 'pending') ? ( + + + {job.status} + + ) : ( + - {job.status} - - ) : ( - - )} + ) + })()} + From 61df6c23a295fc56d7ea3e3ebd0a2fbfc6eb60d7 Mon Sep 17 00:00:00 2001 From: wwanarif Date: Wed, 26 Nov 2025 09:43:56 +0000 Subject: [PATCH 15/23] updated the ft training configs UI Signed-off-by: wwanarif --- .../src/views/finetuning/FileUploadArea.jsx | 19 +- .../views/finetuning/FinetuningJobModal.jsx | 326 +++++++++++++----- 2 files changed, 253 insertions(+), 92 deletions(-) diff --git a/studio-frontend/packages/ui/src/views/finetuning/FileUploadArea.jsx b/studio-frontend/packages/ui/src/views/finetuning/FileUploadArea.jsx index 493f1bb..e3c1d54 100644 --- a/studio-frontend/packages/ui/src/views/finetuning/FileUploadArea.jsx +++ b/studio-frontend/packages/ui/src/views/finetuning/FileUploadArea.jsx @@ -189,13 +189,19 @@ const FileUploadArea = ({ ? theme.palette.error.main : theme.palette.divider, borderRadius: 2, - p: 4, + p: 3, textAlign: 'center', cursor: 'pointer', transition: 'all 0.2s ease-in-out', backgroundColor: dragActive ? alpha(theme.palette.primary.main, 0.05) : 'transparent', + width: '100%', + minHeight: 150, + mx: 'auto', + display: 'flex', + alignItems: 'center', + justifyContent: 'center', '&:hover': { borderColor: theme.palette.primary.main, backgroundColor: alpha(theme.palette.primary.main, 0.02) @@ -207,18 +213,15 @@ const FileUploadArea = ({ onDragOver={handleDrag} onDrop={handleDrop} > - + - + - Drop your file here or click to browse + Drop your training dataset file here or click to browse - Supported formats: {acceptedTypes.join(', ')} - - - Maximum file size: {maxSizeMB}MB + Supported formats: {acceptedTypes.join(', ')} (Max filesize: {maxSizeMB}MB) diff --git a/studio-frontend/packages/ui/src/views/finetuning/FinetuningJobModal.jsx b/studio-frontend/packages/ui/src/views/finetuning/FinetuningJobModal.jsx index cb8bcc3..e9c9b13 100644 --- a/studio-frontend/packages/ui/src/views/finetuning/FinetuningJobModal.jsx +++ b/studio-frontend/packages/ui/src/views/finetuning/FinetuningJobModal.jsx @@ -82,17 +82,28 @@ const FinetuningJobModal = ({ open, onClose, onJobCreated }) => { }, training: { optimizer: 'adamw_torch', - epochs: 3, + device: 'cpu', + batch_size: 2, + epochs: 1, + max_train_steps: null, learning_rate: 5.0e-5, lr_scheduler: 'linear', weight_decay: 0.0, - device: 'cpu', + num_training_workers: 1, + accelerate_mode: 'DDP', mixed_precision: 'no', gradient_accumulation_steps: 1, logging_steps: 10, - accelerate_mode: 'DDP', - hpu_execution_mode: 'lazy', - num_training_workers: 1 + dpo_beta: 0.1 + , + // Embedding-specific training config (only used when task === 'embedding') + embedding_training_config: { + + temperature: 0.02, + sentence_pooling_method: 'cls', + normalized: true, + use_inbatch_neg: true + } }, lora: { r: 8, @@ -327,13 +338,26 @@ const FinetuningJobModal = ({ open, onClose, onJobCreated }) => { // Training configuration jobPayload.Training = { - epochs: formData.training.epochs, - batch_size: formData.openai_params.batch_size, - gradient_accumulation_steps: formData.training.gradient_accumulation_steps, - learning_rate: formData.training.learning_rate, optimizer: formData.training.optimizer, device: formData.training.device, - mixed_precision: formData.training.mixed_precision + batch_size: formData.training.batch_size, + epochs: formData.training.epochs, + max_train_steps: formData.training.max_train_steps, + learning_rate: formData.training.learning_rate, + lr_scheduler: formData.training.lr_scheduler, + weight_decay: formData.training.weight_decay, + num_training_workers: formData.training.num_training_workers, + accelerate_mode: formData.training.accelerate_mode, + mixed_precision: formData.training.mixed_precision, + gradient_accumulation_steps: formData.training.gradient_accumulation_steps, + logging_steps: formData.training.logging_steps, + // embedding_training_config will be attached below only for embedding task + dpo_beta: formData.training.dpo_beta + } + + // If embedding task, attach embedding_training_config + if (jobPayload.task === 'embedding') { + jobPayload.Training.embedding_training_config = formData.training.embedding_training_config } // Call the actual API @@ -415,17 +439,26 @@ const FinetuningJobModal = ({ open, onClose, onJobCreated }) => { }, training: { optimizer: 'adamw_torch', - epochs: 3, + device: 'cpu', + batch_size: 2, + epochs: 1, + max_train_steps: null, learning_rate: 5.0e-5, lr_scheduler: 'linear', weight_decay: 0.0, - device: 'cpu', + num_training_workers: 1, + accelerate_mode: 'DDP', mixed_precision: 'no', gradient_accumulation_steps: 1, logging_steps: 10, - accelerate_mode: 'DDP', - hpu_execution_mode: 'lazy', - num_training_workers: 1 + dpo_beta: 0.1 + , + embedding_training_config: { + temperature: 0.02, + sentence_pooling_method: 'cls', + normalized: true, + use_inbatch_neg: true + } }, lora: { r: 8, @@ -474,7 +507,7 @@ const FinetuningJobModal = ({ open, onClose, onJobCreated }) => { {/* Left Column: Model & Task Setup */} - + {/* Base Model */} @@ -1002,10 +1035,10 @@ const FinetuningJobModal = ({ open, onClose, onJobCreated }) => { {/* Right Column: Training Dataset & Training Parameters */} - + {/* Training Dataset Upload */} - + handleFileUpload('trainingDataset', file)} acceptedTypes={['.json', '.jsonl', '.csv']} @@ -1015,9 +1048,10 @@ const FinetuningJobModal = ({ open, onClose, onJobCreated }) => { {/* Training Parameters */} - + - + {/* compact grid similar to task-type configs */} + { value={formData.training.epochs} onChange={(e) => handleConfigChange('training', 'epochs', parseInt(e.target.value))} error={!!errors.epochs} - inputProps={{ min: 1, max: 50, step: 1 }} - size="medium" + inputProps={{ min: 1, step: 1 }} + size="small" fullWidth - sx={{ '& .MuiInputBase-root': { minHeight: 48 } }} + sx={{ '& .MuiInputBase-root': { minHeight: 40 } }} /> handleOpenAIParamChange('batch_size', parseInt(e.target.value))} + value={formData.training.batch_size} + onChange={(e) => handleConfigChange('training', 'batch_size', parseInt(e.target.value))} error={!!errors.batch_size} inputProps={{ min: 1, max: 256, step: 1 }} - size="medium" + size="small" fullWidth - sx={{ '& .MuiInputBase-root': { minHeight: 48 } }} + sx={{ '& .MuiInputBase-root': { minHeight: 40 } }} /> - - handleConfigChange('training', 'learning_rate', parseFloat(e.target.value))} - error={!!errors.learning_rate} - inputProps={{ min: 0.00001, max: 0.01, step: 0.00001 }} - size="medium" - fullWidth - sx={{ '& .MuiInputBase-root': { minHeight: 48 } }} - /> - - - Optimizer - - - + + + + handleConfigChange('training', 'learning_rate', parseFloat(e.target.value))} + error={!!errors.learning_rate} + inputProps={{ min: 0.00001, max: 0.01, step: 0.00001 }} + size="small" + fullWidth + sx={{ '& .MuiInputBase-root': { minHeight: 40 } }} + /> + + + handleConfigChange('training', 'max_train_steps', e.target.value ? parseInt(e.target.value) : null)} + inputProps={{ min: 1, step: 1 }} + size="small" + fullWidth + sx={{ '& .MuiInputBase-root': { minHeight: 40 } }} + /> + + + - + handleConfigChange('training', 'optimizer', e.target.value)} + size="small" + fullWidth + sx={{ '& .MuiInputBase-root': { minHeight: 40 } }} + /> + + + handleConfigChange('training', 'gradient_accumulation_steps', parseInt(e.target.value))} + inputProps={{ min: 1, step: 1 }} + size="small" + fullWidth + sx={{ '& .MuiInputBase-root': { minHeight: 40 } }} + /> + + + + + + Device - + Mixed Precision + + + Accelerate Mode + + + - - handleConfigChange('training', 'gradient_accumulation_steps', parseInt(e.target.value))} - inputProps={{ min: 1, step: 1 }} - size="medium" - fullWidth - sx={{ '& .MuiInputBase-root': { minHeight: 48 } }} - /> - - + + handleOpenAIParamChange('learning_rate_multiplier', parseFloat(e.target.value))} - error={!!errors.learning_rate_multiplier} - inputProps={{ min: 0.02, max: 2, step: 0.01 }} - size="medium" + value={formData.training.weight_decay} + onChange={(e) => handleConfigChange('training', 'weight_decay', parseFloat(e.target.value))} + inputProps={{ min: 0, step: 0.01 }} + size="small" fullWidth - sx={{ '& .MuiInputBase-root': { minHeight: 48 } }} + sx={{ '& .MuiInputBase-root': { minHeight: 40 } }} /> handleOpenAIParamChange('prompt_loss_weight', parseFloat(e.target.value))} - inputProps={{ min: 0, max: 1, step: 0.01 }} - size="medium" + value={formData.training.logging_steps} + onChange={(e) => handleConfigChange('training', 'logging_steps', parseInt(e.target.value))} + error={!!errors.logging_steps} + inputProps={{ min: 1, step: 1 }} + size="small" + fullWidth + sx={{ '& .MuiInputBase-root': { minHeight: 40 } }} + /> + + + handleConfigChange('training', 'lr_scheduler', e.target.value)} + size="small" fullWidth - sx={{ '& .MuiInputBase-root': { minHeight: 48 } }} + sx={{ '& .MuiInputBase-root': { minHeight: 40 } }} /> + + + + {}} + InputProps={{ readOnly: true }} + inputProps={{ min: 1, step: 1, 'aria-readonly': true }} + disabled + size="small" + fullWidth + sx={{ '& .MuiInputBase-root': { minHeight: 40 } }} + /> + + + handleConfigChange('training', 'dpo_beta', parseFloat(e.target.value))} + inputProps={{ min: 0, step: 0.01 }} + size="small" + fullWidth + sx={{ '& .MuiInputBase-root': { minHeight: 40 } }} + /> + + + + {formData.general.task === 'embedding' ? ( + + + + handleConfigChange('training', 'embedding_training_config', { + ...formData.training.embedding_training_config, + temperature: e.target.value === '' ? null : parseFloat(e.target.value) + })} + inputProps={{ step: 0.01 }} + size="small" + fullWidth + /> + + + handleConfigChange('training', 'embedding_training_config', { + ...formData.training.embedding_training_config, + sentence_pooling_method: e.target.value + })} + size="small" + fullWidth + /> + + + + handleConfigChange('training', 'embedding_training_config', { + ...formData.training.embedding_training_config, + normalized: e.target.checked + })} + /> + } + label="Normalized embeddings" + /> + + handleConfigChange('training', 'embedding_training_config', { + ...formData.training.embedding_training_config, + use_inbatch_neg: e.target.checked + })} + /> + } + label="Use in-batch negatives" + /> + + ) : null } From 722206d92cf50b50f6bd4146244017105cfbc67a Mon Sep 17 00:00:00 2001 From: wwanarif Date: Thu, 27 Nov 2025 10:04:09 +0000 Subject: [PATCH 16/23] fixes for depreciated components and hardfix the genaicomp images versions Signed-off-by: wwanarif --- app-frontend/Dockerfile | 4 +- .../helm-values/mysqldb.yaml | 13 +++++++ .../manifests/studio-manifest.yaml | 20 +++++----- .../playbooks/deploy-mysqldb.yml | 19 +--------- .../app/templates/app/app.manifest.yaml | 2 +- .../packages/ui/public/silent-check-sso.html | 16 ++++++++ .../packages/ui/src/views/debuglogs/index.jsx | 37 +++++++++++++------ .../packages/ui/src/views/tracer/index.jsx | 31 +++++++++++----- 8 files changed, 91 insertions(+), 51 deletions(-) create mode 100644 studio-frontend/packages/ui/public/silent-check-sso.html diff --git a/app-frontend/Dockerfile b/app-frontend/Dockerfile index 4f89930..f1f41c6 100644 --- a/app-frontend/Dockerfile +++ b/app-frontend/Dockerfile @@ -2,7 +2,7 @@ # SPDX-License-Identifier: Apache-2.0 # Use node 20.11.1 as the base image -FROM node:20.11.1 as vite-app +FROM node:20.11.1 AS vite-app # Accept proxy build arguments ARG http_proxy @@ -18,7 +18,7 @@ COPY ./react /usr/app/react WORKDIR /usr/app/react -RUN ["npm", "install"] +RUN ["npm", "install", "--legacy-peer-deps"] RUN ["npm", "run", "build"] diff --git a/setup-scripts/setup-genai-studio/helm-values/mysqldb.yaml b/setup-scripts/setup-genai-studio/helm-values/mysqldb.yaml index 1a0e46b..9abe0c2 100644 --- a/setup-scripts/setup-genai-studio/helm-values/mysqldb.yaml +++ b/setup-scripts/setup-genai-studio/helm-values/mysqldb.yaml @@ -2,6 +2,8 @@ auth: rootPassword: root image: + registry: docker.io + repository: bitnamilegacy/mysql tag: "8.0" primary: @@ -14,6 +16,17 @@ primary: - ReadWriteOnce size: 8Gi +volumePermissions: + enabled: true + image: + registry: docker.io + repository: bitnamilegacy/os-shell + tag: "latest" + +global: + security: + allowInsecureImages: true + initdbScripts: 01-create-users-and-dbs.sql: | -- Create 'studio' user for '%' host diff --git a/setup-scripts/setup-genai-studio/manifests/studio-manifest.yaml b/setup-scripts/setup-genai-studio/manifests/studio-manifest.yaml index 7746830..27b57e4 100644 --- a/setup-scripts/setup-genai-studio/manifests/studio-manifest.yaml +++ b/setup-scripts/setup-genai-studio/manifests/studio-manifest.yaml @@ -347,17 +347,17 @@ spec: spec: containers: - name: studio-backend - image: ${REGISTRY}/studio-backend:${TAG} + image: "${REGISTRY}/studio-backend:${TAG}" imagePullPolicy: Always env: - name: APP_FRONTEND_IMAGE - value: ${REGISTRY}/app-frontend:${TAG} + value: "${REGISTRY}/app-frontend:${TAG}" - name: APP_BACKEND_IMAGE - value: ${REGISTRY}/app-backend:${TAG} + value: "${REGISTRY}/app-backend:${TAG}" - name: REGISTRY - value: ${REGISTRY} + value: "opea" - name: TAG - value: ${TAG} + value: "1.4" - name: SBX_HTTP_PROXY value: ${HTTP_PROXY} - name: SBX_NO_PROXY @@ -403,7 +403,8 @@ metadata: namespace: studio spec: accessModes: - - ReadWriteMany + - ReadWriteOnce + storageClassName: local-path resources: requests: storage: 1Gi @@ -444,7 +445,7 @@ spec: containers: - name: studio-frontend securityContext: {} - image: ${REGISTRY}/studio-frontend:${TAG} + image: "${REGISTRY}/studio-frontend:${TAG}" imagePullPolicy: Always envFrom: - configMapRef: @@ -548,7 +549,7 @@ spec: runAsGroup: 0 containers: - name: keycloak - image: quay.io/keycloak/keycloak:latest + image: quay.io/keycloak/keycloak:26.2.5 volumeMounts: - name: app-tls mountPath: /etc/ssl @@ -673,8 +674,7 @@ spec: spec: containers: - name: finetuning-server - # image: opea/finetuning:latest - image: ${REGISTRY}/finetuning:${TAG} + image: opea/finetuning:latest imagePullPolicy: IfNotPresent ports: - containerPort: 8015 diff --git a/setup-scripts/setup-genai-studio/playbooks/deploy-mysqldb.yml b/setup-scripts/setup-genai-studio/playbooks/deploy-mysqldb.yml index 745438d..89cb0fe 100644 --- a/setup-scripts/setup-genai-studio/playbooks/deploy-mysqldb.yml +++ b/setup-scripts/setup-genai-studio/playbooks/deploy-mysqldb.yml @@ -12,25 +12,8 @@ meta: end_play when: namespace_check.stdout != "" - - name: Add bitnami Helm repository - command: helm repo add bitnami https://charts.bitnami.com/bitnami - environment: - http_proxy: "{{ http_proxy }}" - https_proxy: "{{ http_proxy }}" - no_proxy: "{{ no_proxy }}" - - - name: Update Helm repositories - command: helm repo update - environment: - http_proxy: "{{ http_proxy }}" - https_proxy: "{{ http_proxy }}" - no_proxy: "{{ no_proxy }}" - - - name: Create 'mysql' namespace - command: kubectl create ns mysql - - name: Install MySQL using Helm - command: helm install mysql bitnami/mysql -n mysql -f ../helm-values/mysqldb.yaml + command: helm install mysql oci://registry-1.docker.io/bitnamicharts/mysql -n mysql --create-namespace -f ../helm-values/mysqldb.yaml environment: http_proxy: "{{ http_proxy }}" https_proxy: "{{ http_proxy }}" diff --git a/studio-backend/app/templates/app/app.manifest.yaml b/studio-backend/app/templates/app/app.manifest.yaml index 0395a6e..06d48a1 100644 --- a/studio-backend/app/templates/app/app.manifest.yaml +++ b/studio-backend/app/templates/app/app.manifest.yaml @@ -225,7 +225,7 @@ spec: spec: containers: - name: chathistory-mongo - image: opea/chathistory-mongo:latest + image: opea/chathistory-mongo:1.3 imagePullPolicy: IfNotPresent ports: - containerPort: 6012 diff --git a/studio-frontend/packages/ui/public/silent-check-sso.html b/studio-frontend/packages/ui/public/silent-check-sso.html new file mode 100644 index 0000000..fb906ac --- /dev/null +++ b/studio-frontend/packages/ui/public/silent-check-sso.html @@ -0,0 +1,16 @@ + + + + Silent Check SSO + + + + + diff --git a/studio-frontend/packages/ui/src/views/debuglogs/index.jsx b/studio-frontend/packages/ui/src/views/debuglogs/index.jsx index fb92fc2..0201e18 100644 --- a/studio-frontend/packages/ui/src/views/debuglogs/index.jsx +++ b/studio-frontend/packages/ui/src/views/debuglogs/index.jsx @@ -10,6 +10,7 @@ import { Paper, Button, Box, + Stack, Typography, Divider, Fade, @@ -127,17 +128,30 @@ export default function PodLogsView() { const selectedEventPod = podsData.pods.find(p => p.name === selectedPodEvents); return ( - // - - - {workflowName && ( - - Workflow name: {workflowName} - - )} - {/* Namespace: {podsData.namespace} */} - - + + + + + Debug Logs + + {workflowName && ( + + Workflow name: {workflowName} + + )} + + {/* Namespace: {podsData.namespace} */} + + Auto refresh: From c3cf59b396fe8f9cdc3befd61eab639e64d992b8 Mon Sep 17 00:00:00 2001 From: wwanarif Date: Thu, 27 Nov 2025 10:48:19 +0000 Subject: [PATCH 17/23] added ws connection for finetuning in studio nginx Signed-off-by: wwanarif --- .../manifests/studio-manifest.yaml | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/setup-scripts/setup-genai-studio/manifests/studio-manifest.yaml b/setup-scripts/setup-genai-studio/manifests/studio-manifest.yaml index 27b57e4..4b5440e 100644 --- a/setup-scripts/setup-genai-studio/manifests/studio-manifest.yaml +++ b/setup-scripts/setup-genai-studio/manifests/studio-manifest.yaml @@ -150,6 +150,25 @@ data: proxy_set_header Connection "upgrade"; } + # WebSocket connection for fine-tuning job monitoring and downloads + # Routes directly to studio-frontend's socket.io handlers + location /socket.io { + proxy_pass http://${STUDIO_FRONTEND_DNS}; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + + # Ensure WebSocket upgrade headers + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + + # Disable buffering for WebSocket + proxy_buffering off; + proxy_cache off; + } + location /studio-backend { proxy_pass http://${STUDIO_BACKEND_DNS}; proxy_set_header Host $host; From df91e7a9466f25eaaebae85c040a82c997ac1f91 Mon Sep 17 00:00:00 2001 From: wwanarif Date: Thu, 27 Nov 2025 20:52:06 +0800 Subject: [PATCH 18/23] add proxy for github workflows Signed-off-by: wwanarif --- .github/workflows/_build-image-to-registry.yml | 2 +- .github/workflows/_e2e-test.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/_build-image-to-registry.yml b/.github/workflows/_build-image-to-registry.yml index 40778c5..3bf7828 100644 --- a/.github/workflows/_build-image-to-registry.yml +++ b/.github/workflows/_build-image-to-registry.yml @@ -39,5 +39,5 @@ jobs: - name: Build Image and Push Image run: | sudo apt install ansible -y - ansible-playbook buildpush-genaistudio-images.yml -e "container_registry=${OPEA_IMAGE_REPO}opea" -e "container_tag=${{ inputs.tag }}" + ansible-playbook buildpush-genaistudio-images.yml -e "container_registry=${OPEA_IMAGE_REPO}opea" -e "container_tag=${{ inputs.tag }}" -e "http_proxy=${http_proxy}" working-directory: ${{ github.workspace }}/setup-scripts/build-image-to-registry/ \ No newline at end of file diff --git a/.github/workflows/_e2e-test.yml b/.github/workflows/_e2e-test.yml index 6f367bf..3e203a4 100644 --- a/.github/workflows/_e2e-test.yml +++ b/.github/workflows/_e2e-test.yml @@ -39,7 +39,7 @@ jobs: - name: Deploy GenAI Studio run: | sudo apt install ansible -y - ansible-playbook genai-studio.yml -e "container_registry=${OPEA_IMAGE_REPO}opea" -e "container_tag=${{ inputs.tag }}" -e "mysql_host=mysql.mysql.svc.cluster.local" + ansible-playbook genai-studio.yml -e "container_registry=${OPEA_IMAGE_REPO}opea" -e "container_tag=${{ inputs.tag }}" -e "mysql_host=mysql.mysql.svc.cluster.local" -e "http_proxy=${http_proxy}" sleep 5 kubectl wait --for=condition=ready pod --all --namespace=studio --timeout=300s --field-selector=status.phase!=Succeeded kubectl wait --for=condition=ready pod --all --namespace=monitoring --timeout=300s --field-selector=status.phase!=Succeeded From 44211dc8b1d271a53a3d2b3f581f1e7c900d928d Mon Sep 17 00:00:00 2001 From: wwanarif Date: Thu, 27 Nov 2025 22:57:02 +0800 Subject: [PATCH 19/23] udpate studio-frontend image Signed-off-by: wwanarif --- studio-frontend/Dockerfile | 6 ++---- studio-frontend/package.json | 7 ++++++- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/studio-frontend/Dockerfile b/studio-frontend/Dockerfile index b30aed4..e59da67 100644 --- a/studio-frontend/Dockerfile +++ b/studio-frontend/Dockerfile @@ -1,4 +1,4 @@ -FROM node:23-alpine +FROM node:20-alpine # Accept proxy build arguments ARG http_proxy @@ -17,10 +17,8 @@ RUN apk update && apk upgrade && \ build-base cairo-dev pango-dev \ # Install Chromium chromium && \ - # Update npm to the latest version - npm install -g npm@latest && \ # Install PNPM globally - npm install -g pnpm@latest + npm install -g pnpm@9 # Debug step to verify git installation RUN git --version diff --git a/studio-frontend/package.json b/studio-frontend/package.json index 115e3d0..0205332 100644 --- a/studio-frontend/package.json +++ b/studio-frontend/package.json @@ -60,6 +60,10 @@ "overrides": { "set-value": "^3.0.3", "form-data": "4.0.4" + }, + "peerDependencyRules": { + "ignoreMissing": [], + "allowAny": [] } }, "engines": { @@ -81,7 +85,8 @@ "cross-spawn": ">=7.0.5", "solid-js": ">=1.9.4", "tar-fs": ">=3.0.8", - "form-data": "4.0.4" + "form-data": "4.0.4", + "zod": ">=3.23.0" }, "eslintIgnore": [ "**/dist", From 58da2fbffcb528774ef6f8ed5f7d49027689273d Mon Sep 17 00:00:00 2001 From: wwanarif Date: Fri, 28 Nov 2025 09:47:59 +0800 Subject: [PATCH 20/23] add kubectl logs when deploy studio fail in e2e wf Signed-off-by: wwanarif --- .github/workflows/_e2e-test.yml | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/.github/workflows/_e2e-test.yml b/.github/workflows/_e2e-test.yml index 3e203a4..a0a6656 100644 --- a/.github/workflows/_e2e-test.yml +++ b/.github/workflows/_e2e-test.yml @@ -39,10 +39,18 @@ jobs: - name: Deploy GenAI Studio run: | sudo apt install ansible -y - ansible-playbook genai-studio.yml -e "container_registry=${OPEA_IMAGE_REPO}opea" -e "container_tag=${{ inputs.tag }}" -e "mysql_host=mysql.mysql.svc.cluster.local" -e "http_proxy=${http_proxy}" + ansible-playbook genai-studio.yml -e "container_registry=${OPEA_IMAGE_REPO}opea" -e "container_tag=${{ inputs.tag }}" -e "mysql_host=mysql.mysql.svc.cluster.local" -e "http_proxy=${http_proxy}" || { + echo "Ansible playbook failed. Checking pod status..." + echo "=== Pods in studio namespace ===" + kubectl get pods -n studio + echo "=== Logs and events for non-Ready pods in studio namespace ===" + for pod in $(kubectl get pods -n studio --field-selector=status.phase!=Running,status.phase!=Succeeded -o jsonpath='{.items[*].metadata.name}'); do + echo "--- Pod: $pod ---" + kubectl describe pod $pod -n studio + kubectl logs $pod -n studio --all-containers=true --tail=100 || echo "Could not fetch logs for $pod" + done + } sleep 5 - kubectl wait --for=condition=ready pod --all --namespace=studio --timeout=300s --field-selector=status.phase!=Succeeded - kubectl wait --for=condition=ready pod --all --namespace=monitoring --timeout=300s --field-selector=status.phase!=Succeeded working-directory: ${{ github.workspace }}/setup-scripts/setup-genai-studio/ - name: Set up Node.js From 900f86bd311fd5c338d6e2594967711103c78029 Mon Sep 17 00:00:00 2001 From: wwanarif Date: Mon, 1 Dec 2025 10:11:20 +0800 Subject: [PATCH 21/23] update otel collector contrib version and remove redundant IconApps Signed-off-by: wwanarif --- .../setup-genai-studio/helm-values/otel-collector.yaml | 2 +- studio-frontend/packages/ui/src/menu-items/dashboard.js | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/setup-scripts/setup-genai-studio/helm-values/otel-collector.yaml b/setup-scripts/setup-genai-studio/helm-values/otel-collector.yaml index 0e2c9d4..c025f7b 100644 --- a/setup-scripts/setup-genai-studio/helm-values/otel-collector.yaml +++ b/setup-scripts/setup-genai-studio/helm-values/otel-collector.yaml @@ -2,7 +2,7 @@ mode: deployment image: repository: "otel/opentelemetry-collector-contrib" - tag: "latest" + tag: "0.139.0" config: receivers: diff --git a/studio-frontend/packages/ui/src/menu-items/dashboard.js b/studio-frontend/packages/ui/src/menu-items/dashboard.js index 09bbb15..ba88227 100644 --- a/studio-frontend/packages/ui/src/menu-items/dashboard.js +++ b/studio-frontend/packages/ui/src/menu-items/dashboard.js @@ -14,7 +14,7 @@ import { } from '@tabler/icons-react' // constant -const icons = { IconUsersGroup, IconHierarchy, IconBuildingStore, IconKey, IconTool, IconLock, IconRobot, IconVariable, IconFiles, IconApps, IconBrain, IconApps } +const icons = { IconUsersGroup, IconHierarchy, IconBuildingStore, IconKey, IconTool, IconLock, IconRobot, IconVariable, IconFiles, IconApps, IconBrain } // ==============================|| DASHBOARD MENU ITEMS ||============================== // From d1b234c9ea145169d422e3ea29a22beebdbcb777 Mon Sep 17 00:00:00 2001 From: wwanarif Date: Mon, 1 Dec 2025 15:09:55 +0800 Subject: [PATCH 22/23] update otel deployment Signed-off-by: wwanarif --- .../setup-genai-studio/helm-values/otel-collector.yaml | 2 ++ setup-scripts/setup-genai-studio/playbooks/deploy-tracing.yml | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/setup-scripts/setup-genai-studio/helm-values/otel-collector.yaml b/setup-scripts/setup-genai-studio/helm-values/otel-collector.yaml index c025f7b..a962e82 100644 --- a/setup-scripts/setup-genai-studio/helm-values/otel-collector.yaml +++ b/setup-scripts/setup-genai-studio/helm-values/otel-collector.yaml @@ -22,6 +22,8 @@ config: clickhouse: endpoint: tcp://clickhouse.tracing.svc.cluster.local:9000 database: otel + username: default + password: "" ttl: 72h traces_table_name: otel_traces logs_table_name: otel_logs diff --git a/setup-scripts/setup-genai-studio/playbooks/deploy-tracing.yml b/setup-scripts/setup-genai-studio/playbooks/deploy-tracing.yml index 6a4859a..07ebcd1 100644 --- a/setup-scripts/setup-genai-studio/playbooks/deploy-tracing.yml +++ b/setup-scripts/setup-genai-studio/playbooks/deploy-tracing.yml @@ -31,7 +31,7 @@ command: kubectl create ns tracing - name: Install Clickhouse Helm chart in 'tracing' namespace - command: helm install clickhouse pascaliske/clickhouse -n tracing --set persistentVolumeClaim.storageClassName=local-path + command: helm install clickhouse pascaliske/clickhouse --version 0.3.1 -n tracing --set persistentVolumeClaim.storageClassName=local-path environment: http_proxy: "{{ http_proxy }}" https_proxy: "{{ http_proxy }}" @@ -58,7 +58,7 @@ no_proxy: "{{ no_proxy }}" - name: Install OpenTelemetry Collector Helm chart in 'tracing' namespace - command: helm install tracing open-telemetry/opentelemetry-collector -n tracing -f ../helm-values/otel-collector.yaml + command: helm install tracing open-telemetry/opentelemetry-collector --version 0.139.1 -n tracing -f ../helm-values/otel-collector.yaml environment: http_proxy: "{{ http_proxy }}" https_proxy: "{{ http_proxy }}" From aeaf0ff0322bedd790300214de2b9d5a6a37bc0a Mon Sep 17 00:00:00 2001 From: wwanarif Date: Mon, 1 Dec 2025 15:28:39 +0800 Subject: [PATCH 23/23] fix e2e test to make sure it exits properly when ansible fails Signed-off-by: wwanarif --- .github/workflows/_e2e-test.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/_e2e-test.yml b/.github/workflows/_e2e-test.yml index a0a6656..9441d84 100644 --- a/.github/workflows/_e2e-test.yml +++ b/.github/workflows/_e2e-test.yml @@ -49,6 +49,7 @@ jobs: kubectl describe pod $pod -n studio kubectl logs $pod -n studio --all-containers=true --tail=100 || echo "Could not fetch logs for $pod" done + exit 1 } sleep 5 working-directory: ${{ github.workspace }}/setup-scripts/setup-genai-studio/