diff --git a/.github/workflows/_build-image-to-registry.yml b/.github/workflows/_build-image-to-registry.yml index 40778c5..3bf7828 100644 --- a/.github/workflows/_build-image-to-registry.yml +++ b/.github/workflows/_build-image-to-registry.yml @@ -39,5 +39,5 @@ jobs: - name: Build Image and Push Image run: | sudo apt install ansible -y - ansible-playbook buildpush-genaistudio-images.yml -e "container_registry=${OPEA_IMAGE_REPO}opea" -e "container_tag=${{ inputs.tag }}" + ansible-playbook buildpush-genaistudio-images.yml -e "container_registry=${OPEA_IMAGE_REPO}opea" -e "container_tag=${{ inputs.tag }}" -e "http_proxy=${http_proxy}" working-directory: ${{ github.workspace }}/setup-scripts/build-image-to-registry/ \ No newline at end of file diff --git a/.github/workflows/_e2e-test.yml b/.github/workflows/_e2e-test.yml index f19bdff..9441d84 100644 --- a/.github/workflows/_e2e-test.yml +++ b/.github/workflows/_e2e-test.yml @@ -39,10 +39,19 @@ jobs: - name: Deploy GenAI Studio run: | sudo apt install ansible -y - ansible-playbook genai-studio.yml -e "container_registry=${OPEA_IMAGE_REPO}opea" -e "container_tag=${{ inputs.tag }}" -e "mysql_host=mysql.mysql.svc.cluster.local" + ansible-playbook genai-studio.yml -e "container_registry=${OPEA_IMAGE_REPO}opea" -e "container_tag=${{ inputs.tag }}" -e "mysql_host=mysql.mysql.svc.cluster.local" -e "http_proxy=${http_proxy}" || { + echo "Ansible playbook failed. Checking pod status..." + echo "=== Pods in studio namespace ===" + kubectl get pods -n studio + echo "=== Logs and events for non-Ready pods in studio namespace ===" + for pod in $(kubectl get pods -n studio --field-selector=status.phase!=Running,status.phase!=Succeeded -o jsonpath='{.items[*].metadata.name}'); do + echo "--- Pod: $pod ---" + kubectl describe pod $pod -n studio + kubectl logs $pod -n studio --all-containers=true --tail=100 || echo "Could not fetch logs for $pod" + done + exit 1 + } sleep 5 - kubectl wait --for=condition=ready pod --all --namespace=studio --timeout=300s --field-selector=status.phase!=Succeeded - kubectl wait --for=condition=ready pod --all --namespace=monitoring --timeout=300s --field-selector=status.phase!=Succeeded working-directory: ${{ github.workspace }}/setup-scripts/setup-genai-studio/ - name: Set up Node.js @@ -74,25 +83,10 @@ jobs: name: playwright-test-results path: ${{ github.workspace }}/tests/playwright/playwright-report - - name: Cleanup sandbox namespaces + - name: Cleanup GenAI Studio if: always() run: | - if kubectl get namespace mysql; then - kubectl delete ns mysql || true - fi - if kubectl get namespace tracing; then - kubectl delete ns tracing || true - fi - for ns in $(kubectl get namespaces -o jsonpath='{.items[*].metadata.name}' | tr ' ' '\n' | grep '^sandbox-'); do - kubectl delete namespace $ns || true - done - if kubectl get namespace studio; then - kubectl delete -f manifests/studio-manifest.yaml || true - kubectl wait --for=delete pod --all --namespace=studio --timeout=300s - fi - if kubectl get namespace monitoring; then - kubectl delete -f manifests/monitoring-manifest.yaml || true - kubectl wait --for=delete pod --all --namespace=monitoring --timeout=300s - fi + echo "Running GenAI Studio cleanup script..." + ./cleanup-genai-studio.sh || echo "Cleanup script completed with warnings" working-directory: ${{ github.workspace }}/setup-scripts/setup-genai-studio/ diff --git a/app-backend/Dockerfile b/app-backend/Dockerfile index eddbb4d..ee9995b 100644 --- a/app-backend/Dockerfile +++ b/app-backend/Dockerfile @@ -1,5 +1,15 @@ FROM python:3.11-slim +# Accept proxy build arguments +ARG http_proxy +ARG https_proxy +ARG no_proxy + +# Set proxy environment variables for package managers +ENV http_proxy=${http_proxy} +ENV https_proxy=${https_proxy} +ENV no_proxy=${no_proxy} + RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ libsqlite3-0 \ libjemalloc-dev \ diff --git a/app-backend/megaservice.py b/app-backend/megaservice.py index 3335323..4cb7e13 100644 --- a/app-backend/megaservice.py +++ b/app-backend/megaservice.py @@ -170,7 +170,7 @@ def add_remote_service(self): microservice_name = node['name'].split('@')[1] if "docsum" in microservice_name: self.is_docsum = True - service_node_ip = node_id.split('@')[1].replace('_','-') if USE_NODE_ID_AS_IP else HOST_IP + service_node_ip = f"opea-{node_id.split('@')[1].replace('_','-')}" if USE_NODE_ID_AS_IP else HOST_IP microservice = templates[microservice_name].get_service(host_ip=service_node_ip, node_id_as_ip=USE_NODE_ID_AS_IP, port=os.getenv(f"{node_id.split('@')[1]}_port", None)) microservice.name = node_id self.services[node_id] = microservice diff --git a/app-backend/orchestrator.py b/app-backend/orchestrator.py index f4b949b..f6357f6 100644 --- a/app-backend/orchestrator.py +++ b/app-backend/orchestrator.py @@ -272,7 +272,6 @@ async def execute( url=endpoint, data=json.dumps(inputs), headers={"Content-type": "application/json", "Authorization": f"Bearer {access_token}"}, - proxies={"http": None}, stream=True, timeout=2000, ) @@ -283,7 +282,6 @@ async def execute( headers={ "Content-type": "application/json", }, - proxies={"http": None}, stream=True, timeout=2000, ) @@ -316,7 +314,6 @@ def generate(): "Content-type": "application/json", "Authorization": f"Bearer {access_token}", }, - proxies={"http": None}, timeout=2000, ) else: @@ -326,7 +323,6 @@ def generate(): headers={ "Content-type": "application/json", }, - proxies={"http": None}, timeout=2000, ) res_json = res.json() diff --git a/app-frontend/Dockerfile b/app-frontend/Dockerfile index 4c4d727..f1f41c6 100644 --- a/app-frontend/Dockerfile +++ b/app-frontend/Dockerfile @@ -2,13 +2,23 @@ # SPDX-License-Identifier: Apache-2.0 # Use node 20.11.1 as the base image -FROM node:20.11.1 as vite-app +FROM node:20.11.1 AS vite-app + +# Accept proxy build arguments +ARG http_proxy +ARG https_proxy +ARG no_proxy + +# Set proxy environment variables for package managers +ENV http_proxy=${http_proxy} +ENV https_proxy=${https_proxy} +ENV no_proxy=${no_proxy} COPY ./react /usr/app/react WORKDIR /usr/app/react -RUN ["npm", "install"] +RUN ["npm", "install", "--legacy-peer-deps"] RUN ["npm", "run", "build"] diff --git a/app-frontend/react/src/components/SideBar/SideBar.tsx b/app-frontend/react/src/components/SideBar/SideBar.tsx index ee356c9..26873e8 100644 --- a/app-frontend/react/src/components/SideBar/SideBar.tsx +++ b/app-frontend/react/src/components/SideBar/SideBar.tsx @@ -79,7 +79,6 @@ export const LinkedMenuItem: React.FC = ({ to={toWithQuery(to)} onClick={onClick} tabIndex={open ? 0 : -1} - aria-hidden={!open} > {children} diff --git a/setup-scripts/build-image-to-registry/buildpush-genaicomps-images.yml b/setup-scripts/build-image-to-registry/buildpush-genaicomps-images.yml index 55207b0..02e1d4a 100755 --- a/setup-scripts/build-image-to-registry/buildpush-genaicomps-images.yml +++ b/setup-scripts/build-image-to-registry/buildpush-genaicomps-images.yml @@ -15,13 +15,21 @@ dest: /tmp/GenAIComps clone: yes update: no + environment: + http_proxy: "{{ http_proxy }}" + https_proxy: "{{ http_proxy }}" + no_proxy: "{{ no_proxy }}" when: not genaicomp_dir.stat.exists - - name: Pull latest changes in GenAIComps repo + - name: Checkout specific GenAIComps tag git: repo: https://github.com/opea-project/GenAIComps.git dest: /tmp/GenAIComps - update: yes + version: "{{ genaicomps_tag }}" + environment: + http_proxy: "{{ http_proxy }}" + https_proxy: "{{ http_proxy }}" + no_proxy: "{{ no_proxy }}" when: genaicomp_dir.stat.exists - name: Build and push GenAIComps images @@ -38,11 +46,25 @@ - { name: 'asr', dockerfile: 'comps/asr/src/Dockerfile' } block: - name: Build image - command: docker build -t {{ container_registry }}/{{ item.name }}:{{ container_tag }} -f {{ item.dockerfile }} . + command: > + docker build + --build-arg http_proxy="{{ http_proxy }}" + --build-arg https_proxy="{{ http_proxy }}" + --build-arg no_proxy="{{ no_proxy }}" + -t {{ container_registry }}/{{ item.name }}:{{ container_tag }} + -f {{ item.dockerfile }} . args: chdir: /tmp/GenAIComps + environment: + http_proxy: "{{ http_proxy }}" + https_proxy: "{{ http_proxy }}" + no_proxy: "{{ no_proxy }}" loop: "{{ genaicomp_images }}" - name: Push image command: docker push {{ container_registry }}/{{ item.name }}:{{ container_tag }} + environment: + http_proxy: "{{ http_proxy }}" + https_proxy: "{{ http_proxy }}" + no_proxy: "{{ no_proxy }}" loop: "{{ genaicomp_images }}" diff --git a/setup-scripts/build-image-to-registry/buildpush-genaistudio-images.yml b/setup-scripts/build-image-to-registry/buildpush-genaistudio-images.yml index e4f916f..20839a2 100755 --- a/setup-scripts/build-image-to-registry/buildpush-genaistudio-images.yml +++ b/setup-scripts/build-image-to-registry/buildpush-genaistudio-images.yml @@ -5,9 +5,18 @@ - vars.yml tasks: - name: Build Docker image - command: docker build -t "{{ container_registry }}/{{ item.image_name }}:{{ container_tag }}" . + command: > + docker build + --build-arg http_proxy="{{ http_proxy }}" + --build-arg https_proxy="{{ http_proxy }}" + --build-arg no_proxy="{{ no_proxy }}" + -t "{{ container_registry }}/{{ item.image_name }}:{{ container_tag }}" . args: chdir: "{{ item.directory }}" + environment: + http_proxy: "{{ http_proxy }}" + https_proxy: "{{ http_proxy }}" + no_proxy: "{{ no_proxy }}" loop: - { directory: '../../studio-frontend/', image_name: 'studio-frontend' } - { directory: '../../studio-backend/', image_name: 'studio-backend' } @@ -17,6 +26,10 @@ - name: Push Docker image command: docker push "{{ container_registry }}/{{ item.image_name }}:{{ container_tag }}" + environment: + http_proxy: "{{ http_proxy }}" + https_proxy: "{{ http_proxy }}" + no_proxy: "{{ no_proxy }}" loop: - { image_name: 'studio-frontend' } - { image_name: 'studio-backend' } diff --git a/setup-scripts/build-image-to-registry/vars.yml b/setup-scripts/build-image-to-registry/vars.yml index 2c97a25..7646ae8 100644 --- a/setup-scripts/build-image-to-registry/vars.yml +++ b/setup-scripts/build-image-to-registry/vars.yml @@ -1,2 +1,7 @@ -container_registry: 'opea' -container_tag: 'latest' \ No newline at end of file +# Container registry configuration +# Replace {{ ansible_default_ipv4.address }} with your Kubernetes master/API endpoint IP if needed +container_registry: '{{ ansible_default_ipv4.address }}:5000/opea' +container_tag: 'latest' +genaicomps_tag: 'main' +http_proxy: '' +no_proxy: 'localhost,127.0.0.1,.local,.svc.cluster.local,{{ ansible_default_ipv4.address }}' \ No newline at end of file diff --git a/setup-scripts/setup-genai-studio/cleanup-genai-studio.sh b/setup-scripts/setup-genai-studio/cleanup-genai-studio.sh new file mode 100755 index 0000000..dc4fcb4 --- /dev/null +++ b/setup-scripts/setup-genai-studio/cleanup-genai-studio.sh @@ -0,0 +1,218 @@ +#!/bin/bash + +# GenAI Studio Complete Cleanup Script +# This script ensures all namespaces and resources are properly deleted + +set -e + +echo "========================================" +echo "GenAI Studio Complete Cleanup Script" +echo "========================================" +echo + +# Function to check if namespace exists +namespace_exists() { + kubectl get namespace "$1" &>/dev/null +} + +# Function to delete namespace with comprehensive cleanup +cleanup_namespace() { + local ns=$1 + local manifest_file=$2 + + if ! namespace_exists "$ns"; then + echo "โœ… Namespace '$ns' does not exist, skipping..." + return 0 + fi + + echo "๐Ÿงน Cleaning up namespace: $ns" + + # Delete resources using manifest if provided + echo " - Deleting resources using manifest: $manifest_file" + if [ ! -z "$manifest_file" ] && [ -f "$manifest_file" ]; then + kubectl delete -f "$manifest_file" --timeout=120s --ignore-not-found=true || true + elif [ ! -z "$manifest_file" ]; then + echo " - Namespace $ns has no manifest" + fi + + # Delete Helm releases in the namespace + echo " - Checking for Helm releases in $ns..." + helm list -n "$ns" -q 2>/dev/null | xargs -r -I {} helm delete {} -n "$ns" --timeout=120s || true + + # Wait for pods to terminate gracefully + echo " - Waiting for pods to terminate gracefully..." + if kubectl get pods -n "$ns" --no-headers 2>/dev/null | grep -q .; then + kubectl wait --for=delete pod --all --namespace="$ns" --timeout=180s || true + else + echo " No pods found in namespace $ns" + fi + + # Force delete any remaining pods + echo " - Force deleting any remaining pods..." + REMAINING_PODS=$(kubectl get pods -n "$ns" --no-headers 2>/dev/null | awk '{print $1}' || true) + if [ ! -z "$REMAINING_PODS" ]; then + echo " Found remaining pods: $REMAINING_PODS" + echo "$REMAINING_PODS" | xargs -r kubectl delete pod -n "$ns" --force --grace-period=0 || true + else + echo " No remaining pods to force delete" + fi + + # Delete PVCs + echo " - Deleting PersistentVolumeClaims..." + PVCS=$(kubectl get pvc -n "$ns" --no-headers 2>/dev/null | awk '{print $1}' || true) + if [ ! -z "$PVCS" ]; then + echo " Found PVCs: $PVCS" + echo "$PVCS" | xargs -r kubectl delete pvc -n "$ns" --timeout=60s || true + else + echo " No PVCs found in namespace $ns" + fi + + # Delete secrets (except default service account token) + echo " - Deleting secrets..." + SECRETS=$(kubectl get secrets -n "$ns" --no-headers 2>/dev/null | grep -v "default-token" | awk '{print $1}' || true) + if [ ! -z "$SECRETS" ]; then + echo " Found secrets: $SECRETS" + echo "$SECRETS" | xargs -r kubectl delete secret -n "$ns" || true + else + echo " No custom secrets found in namespace $ns" + fi + + # Delete configmaps + echo " - Deleting configmaps..." + CONFIGMAPS=$(kubectl get configmaps -n "$ns" --no-headers 2>/dev/null | grep -v "kube-root-ca.crt" | awk '{print $1}' || true) + if [ ! -z "$CONFIGMAPS" ]; then + echo " Found configmaps: $CONFIGMAPS" + echo "$CONFIGMAPS" | xargs -r kubectl delete configmap -n "$ns" || true + else + echo " No custom configmaps found in namespace $ns" + fi + + # Finally delete the namespace + echo " - Deleting namespace..." + kubectl delete namespace "$ns" --timeout=120s || true + + # If namespace still exists, patch it to remove finalizers + if namespace_exists "$ns"; then + echo " - Namespace still exists, removing finalizers..." + kubectl patch namespace "$ns" -p '{"metadata":{"finalizers":[]}}' --type=merge || true + kubectl delete namespace "$ns" --force --grace-period=0 || true + fi + + # Final check + if namespace_exists "$ns"; then + echo " โŒ WARNING: Namespace '$ns' still exists after cleanup" + return 1 + else + echo " โœ… SUCCESS: Namespace '$ns' has been deleted" + return 0 + fi +} + +# Main cleanup process +echo "Starting comprehensive cleanup..." +echo + +# Change to the setup directory +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +cd "$SCRIPT_DIR" + +# Clean up sandbox namespaces first +echo "๐Ÿ” Looking for sandbox namespaces..." +SANDBOX_NAMESPACES=$(kubectl get namespaces -o jsonpath='{.items[*].metadata.name}' | tr ' ' '\n' | grep '^sandbox-' || true) + +if [ ! -z "$SANDBOX_NAMESPACES" ]; then + echo "Found sandbox namespaces: $SANDBOX_NAMESPACES" + for ns in $SANDBOX_NAMESPACES; do + cleanup_namespace "$ns" + done +else + echo "โœ… No sandbox namespaces found" +fi + +echo + +# Clean up main namespaces +MAIN_NAMESPACES=( + "studio:manifests/studio-manifest.yaml" + "monitoring:manifests/monitoring-manifest.yaml" + "tracing:" + "mysql:" +) + +for ns_info in "${MAIN_NAMESPACES[@]}"; do + IFS=':' read -r ns manifest <<< "$ns_info" + cleanup_namespace "$ns" "$manifest" + echo +done + +# # Clean up any remaining Helm releases globally +# echo "๐Ÿงน Cleaning up any remaining Helm releases..." +# helm list --all-namespaces --filter="mysql|kube-prometheus-stack|clickhouse|pascaliske" -q 2>/dev/null | \ +# while read -r release; do +# if [ ! -z "$release" ]; then +# echo " - Deleting Helm release: $release" +# helm delete "$release" --timeout=60s || true +# fi +# done + +# Check local-path-storage namespace and explain why it's preserved +echo "๐Ÿ”’ Checking local-path-storage namespace..." +if namespace_exists "local-path-storage"; then + echo " โœ… INTENTIONALLY PRESERVED: local-path-storage namespace exists" + echo " ๐Ÿ“ This namespace provides storage provisioning and is NOT cleaned up because:" + echo " - It may be used by other applications beyond GenAI Studio" + echo " - Deleting it would break any existing PVCs using local-path storage" + echo " - The local-path StorageClass would become non-functional" + echo " - It's a cluster-wide infrastructure component" + echo "" + echo " ๐Ÿ’ก To manually remove local-path-storage later (if you're sure it's safe):" + echo " kubectl delete namespace local-path-storage" + echo " kubectl delete storageclass local-path" +else + echo " โ„น๏ธ local-path-storage namespace does not exist" +fi + + +echo +echo "========================================" +echo "Cleanup Summary" +echo "========================================" + +# Final verification +FAILED_CLEANUP=() +NAMESPACES_TO_CHECK="studio monitoring tracing mysql" + +for ns in $NAMESPACES_TO_CHECK; do + if namespace_exists "$ns"; then + echo "โŒ FAILED: Namespace '$ns' still exists" + FAILED_CLEANUP+=("$ns") + else + echo "โœ… SUCCESS: Namespace '$ns' deleted" + fi +done + +# Special handling for local-path-storage (intentionally preserved) +if namespace_exists "local-path-storage"; then + echo "๐Ÿ”’ PRESERVED: Namespace 'local-path-storage' intentionally kept" +else + echo "โ„น๏ธ INFO: Namespace 'local-path-storage' was not present" +fi + +# Check for remaining sandbox namespaces +REMAINING_SANDBOX=$(kubectl get namespaces -o jsonpath='{.items[*].metadata.name}' | tr ' ' '\n' | grep '^sandbox-' || true) +if [ ! -z "$REMAINING_SANDBOX" ]; then + echo "โŒ FAILED: Remaining sandbox namespaces: $REMAINING_SANDBOX" + FAILED_CLEANUP+=("sandbox namespaces") +else + echo "โœ… SUCCESS: All sandbox namespaces deleted" +fi + +echo +if [ ${#FAILED_CLEANUP[@]} -eq 0 ]; then + echo "๐ŸŽ‰ All namespaces have been successfully cleaned up!" + exit 0 +else + echo "โš ๏ธ Some namespaces failed to cleanup: ${FAILED_CLEANUP[*]}" + echo "You may need to manually investigate and clean up these namespaces." + exit 1 +fi diff --git a/setup-scripts/setup-genai-studio/genai-studio.yml b/setup-scripts/setup-genai-studio/genai-studio.yml index 10fdc80..0c86d5b 100644 --- a/setup-scripts/setup-genai-studio/genai-studio.yml +++ b/setup-scripts/setup-genai-studio/genai-studio.yml @@ -1,3 +1,9 @@ +- name: Install prerequisites (Helm, etc.) + import_playbook: playbooks/install-prerequisites.yml + +- name: Setup local path storage provisioner + import_playbook: playbooks/setup-local-storageclass.yml + - name: Create ssh secrets import_playbook: playbooks/create-ssh-secrets.yml diff --git a/setup-scripts/setup-genai-studio/helm-values/mysqldb.yaml b/setup-scripts/setup-genai-studio/helm-values/mysqldb.yaml index 1a0e46b..9abe0c2 100644 --- a/setup-scripts/setup-genai-studio/helm-values/mysqldb.yaml +++ b/setup-scripts/setup-genai-studio/helm-values/mysqldb.yaml @@ -2,6 +2,8 @@ auth: rootPassword: root image: + registry: docker.io + repository: bitnamilegacy/mysql tag: "8.0" primary: @@ -14,6 +16,17 @@ primary: - ReadWriteOnce size: 8Gi +volumePermissions: + enabled: true + image: + registry: docker.io + repository: bitnamilegacy/os-shell + tag: "latest" + +global: + security: + allowInsecureImages: true + initdbScripts: 01-create-users-and-dbs.sql: | -- Create 'studio' user for '%' host diff --git a/setup-scripts/setup-genai-studio/helm-values/otel-collector.yaml b/setup-scripts/setup-genai-studio/helm-values/otel-collector.yaml index 0e2c9d4..a962e82 100644 --- a/setup-scripts/setup-genai-studio/helm-values/otel-collector.yaml +++ b/setup-scripts/setup-genai-studio/helm-values/otel-collector.yaml @@ -2,7 +2,7 @@ mode: deployment image: repository: "otel/opentelemetry-collector-contrib" - tag: "latest" + tag: "0.139.0" config: receivers: @@ -22,6 +22,8 @@ config: clickhouse: endpoint: tcp://clickhouse.tracing.svc.cluster.local:9000 database: otel + username: default + password: "" ttl: 72h traces_table_name: otel_traces logs_table_name: otel_logs diff --git a/setup-scripts/setup-genai-studio/manifests/studio-manifest.yaml b/setup-scripts/setup-genai-studio/manifests/studio-manifest.yaml index 721f860..4b5440e 100644 --- a/setup-scripts/setup-genai-studio/manifests/studio-manifest.yaml +++ b/setup-scripts/setup-genai-studio/manifests/studio-manifest.yaml @@ -150,6 +150,25 @@ data: proxy_set_header Connection "upgrade"; } + # WebSocket connection for fine-tuning job monitoring and downloads + # Routes directly to studio-frontend's socket.io handlers + location /socket.io { + proxy_pass http://${STUDIO_FRONTEND_DNS}; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + + # Ensure WebSocket upgrade headers + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + + # Disable buffering for WebSocket + proxy_buffering off; + proxy_cache off; + } + location /studio-backend { proxy_pass http://${STUDIO_BACKEND_DNS}; proxy_set_header Host $host; @@ -347,21 +366,21 @@ spec: spec: containers: - name: studio-backend - image: ${REGISTRY}/studio-backend:${TAG} + image: "${REGISTRY}/studio-backend:${TAG}" imagePullPolicy: Always env: - name: APP_FRONTEND_IMAGE - value: ${REGISTRY}/app-frontend:${TAG} + value: "${REGISTRY}/app-frontend:${TAG}" - name: APP_BACKEND_IMAGE - value: ${REGISTRY}/app-backend:${TAG} + value: "${REGISTRY}/app-backend:${TAG}" - name: REGISTRY - value: ${REGISTRY} + value: "opea" - name: TAG - value: ${TAG} + value: "1.4" - name: SBX_HTTP_PROXY value: ${HTTP_PROXY} - name: SBX_NO_PROXY - value: ${NO_PROXY} + value: ${NO_PROXY} envFrom: - configMapRef: name: studio-config @@ -397,6 +416,19 @@ spec: serviceAccountName: studio-backend-sa --- apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: shared-finetuning-output + namespace: studio +spec: + accessModes: + - ReadWriteOnce + storageClassName: local-path + resources: + requests: + storage: 1Gi +--- +apiVersion: v1 kind: Service metadata: name: studio-frontend @@ -432,8 +464,11 @@ spec: containers: - name: studio-frontend securityContext: {} - image: ${REGISTRY}/studio-frontend:${TAG} + image: "${REGISTRY}/studio-frontend:${TAG}" imagePullPolicy: Always + envFrom: + - configMapRef: + name: studio-config env: - name: DATABASE_TYPE value: mysql @@ -449,6 +484,12 @@ spec: value: studio - name: DATABASE_SSL value: "false" + - name: HTTP_PROXY + value: "${HTTP_PROXY}" + - name: HTTPS_PROXY + value: "${HTTP_PROXY}" + - name: NO_PROXY + value: "${NO_PROXY}" ports: - name: studio-frontend containerPort: 8080 @@ -460,6 +501,8 @@ spec: - name: ssh-key-volume mountPath: /root/.ssh readOnly: true + - name: shared-output-volume + mountPath: /tmp/finetuning/output volumes: - name: tmp emptyDir: {} @@ -470,6 +513,9 @@ spec: - key: studio-id_rsa.pub path: id_rsa.pub mode: 0644 + - name: shared-output-volume + persistentVolumeClaim: + claimName: shared-finetuning-output --- apiVersion: apps/v1 kind: Deployment @@ -502,6 +548,13 @@ spec: DOWNLOAD_URL="https://codeload.github.com/${OWNER}/${REPO}/tar.gz/${BRANCH}" curl "${DOWNLOAD_URL}" | tar -xz --strip-components=4 -C /opt/keycloak/themes "${REPO}-${BRANCH}/${KC_ASSETS_DIR}/themes" curl "${DOWNLOAD_URL}" | tar -xz --strip-components=4 -C /opt/keycloak/data "${REPO}-${BRANCH}/${KC_ASSETS_DIR}/data" + env: + - name: http_proxy + value: ${HTTP_PROXY} + - name: https_proxy + value: ${HTTP_PROXY} + - name: NO_PROXY + value: ${NO_PROXY} envFrom: - configMapRef: name: studio-config @@ -515,7 +568,7 @@ spec: runAsGroup: 0 containers: - name: keycloak - image: quay.io/keycloak/keycloak:latest + image: quay.io/keycloak/keycloak:26.2.5 volumeMounts: - name: app-tls mountPath: /etc/ssl @@ -598,4 +651,65 @@ spec: port: 8443 targetPort: 8443 selector: - app: keycloak \ No newline at end of file + app: keycloak + +--- +apiVersion: v1 +kind: Service +metadata: + name: finetuning-server + namespace: studio +spec: + type: ClusterIP + ports: + - name: api + protocol: TCP + port: 8015 + targetPort: 8015 + - name: dashboard + protocol: TCP + port: 8265 + targetPort: 8265 + selector: + app: finetuning-server + +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: finetuning-server + namespace: studio + labels: + app: finetuning-server +spec: + replicas: 1 + selector: + matchLabels: + app: finetuning-server + template: + metadata: + labels: + app: finetuning-server + spec: + containers: + - name: finetuning-server + image: opea/finetuning:latest + imagePullPolicy: IfNotPresent + ports: + - containerPort: 8015 + - containerPort: 8265 + env: + - name: http_proxy + value: ${HTTP_PROXY} + - name: https_proxy + value: ${HTTP_PROXY} + - name: NO_PROXY + value: ${NO_PROXY} + resources: {} + volumeMounts: + - name: shared-output-volume + mountPath: /home/user/comps/finetuning/src/output + volumes: + - name: shared-output-volume + persistentVolumeClaim: + claimName: shared-finetuning-output \ No newline at end of file diff --git a/setup-scripts/setup-genai-studio/playbooks/create-ssh-secrets.yml b/setup-scripts/setup-genai-studio/playbooks/create-ssh-secrets.yml index a4532e5..7841188 100644 --- a/setup-scripts/setup-genai-studio/playbooks/create-ssh-secrets.yml +++ b/setup-scripts/setup-genai-studio/playbooks/create-ssh-secrets.yml @@ -1,5 +1,7 @@ - name: Create ssh keys in k8 secrets using shell and kubectl commands hosts: localhost + vars_files: + - ../vars.yml tasks: @@ -23,9 +25,17 @@ command: kubectl wait --for=condition=Ready pod/ubuntu-ssh-keygen -n studio --timeout=60s when: "'NotFound' in kubectl_secret_check.stderr" + - name: Install openssh-client in pod + shell: | + kubectl exec -n studio ubuntu-ssh-keygen -- bash -c " + export http_proxy='{{ http_proxy }}' + export https_proxy='{{ http_proxy }}' + export no_proxy='{{ no_proxy }}' + apt-get update && apt-get install -y openssh-client" + when: "'NotFound' in kubectl_secret_check.stderr" + - name: Generate SSH key inside pod shell: | - kubectl exec -n studio ubuntu-ssh-keygen -- bash -c "apt-get update && apt-get install -y openssh-client" kubectl exec -n studio ubuntu-ssh-keygen -- bash -c "ssh-keygen -t rsa -b 2048 -f /tmp/id_rsa -N '' -C ''" when: "'NotFound' in kubectl_secret_check.stderr" diff --git a/setup-scripts/setup-genai-studio/playbooks/deploy-monitoring.yml b/setup-scripts/setup-genai-studio/playbooks/deploy-monitoring.yml index f64b1cd..5bfe0f2 100644 --- a/setup-scripts/setup-genai-studio/playbooks/deploy-monitoring.yml +++ b/setup-scripts/setup-genai-studio/playbooks/deploy-monitoring.yml @@ -1,38 +1,39 @@ - name: Deploy prometheus and grafana with local-path-storage hosts: localhost + vars_files: + - ../vars.yml tasks: - - name: Check if local-path-storage namespace exists - shell: kubectl get namespace local-path-storage --ignore-not-found - register: namespace_check - ignore_errors: yes + - name: Check if monitoring namespace exists + shell: kubectl get namespace monitoring --ignore-not-found + register: monitoring_namespace_check changed_when: false - - name: Install local-path-provisioner if namespace does not exist - shell: kubectl apply -f https://raw.githubusercontent.com/rancher/local-path-provisioner/v0.0.30/deploy/local-path-storage.yaml - when: namespace_check.stdout == "" - register: apply_output - - - name: Wait for local-path-provisioner to be ready - shell: kubectl wait --for=condition=Ready pod -l app=local-path-provisioner -n local-path-storage --timeout=120s - when: namespace_check.stdout == "" - - name: Create monitoring namespace command: kubectl create namespace monitoring - ignore_errors: yes - - - name: Install Helm - shell: curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash + when: monitoring_namespace_check.stdout == "" - name: Add Prometheus Helm repository command: helm repo add prometheus-community https://prometheus-community.github.io/helm-charts + environment: + http_proxy: "{{ http_proxy }}" + https_proxy: "{{ http_proxy }}" + no_proxy: "{{ no_proxy }}" - name: Update Helm repositories command: helm repo update + environment: + http_proxy: "{{ http_proxy }}" + https_proxy: "{{ http_proxy }}" + no_proxy: "{{ no_proxy }}" # Installing the CRDs needed - name: Helm install kube-prometheus-stack command: helm install kube-prometheus-stack prometheus-community/kube-prometheus-stack -n monitoring ignore_errors: yes + environment: + http_proxy: "{{ http_proxy }}" + https_proxy: "{{ http_proxy }}" + no_proxy: "{{ no_proxy }}" - name: Delete kube-prometheus-stack command: helm delete kube-prometheus-stack -n monitoring diff --git a/setup-scripts/setup-genai-studio/playbooks/deploy-mysqldb.yml b/setup-scripts/setup-genai-studio/playbooks/deploy-mysqldb.yml index 591d826..89cb0fe 100644 --- a/setup-scripts/setup-genai-studio/playbooks/deploy-mysqldb.yml +++ b/setup-scripts/setup-genai-studio/playbooks/deploy-mysqldb.yml @@ -1,42 +1,23 @@ - name: Deploy mysql database hosts: localhost + vars_files: + - ../vars.yml tasks: - - name: Check if local-path-storage namespace exists - shell: kubectl get namespace local-path-storage --ignore-not-found - register: namespace_check - ignore_errors: yes - changed_when: false - - - name: Install local-path-provisioner if namespace does not exist - shell: kubectl apply -f https://raw.githubusercontent.com/rancher/local-path-provisioner/v0.0.30/deploy/local-path-storage.yaml - when: namespace_check.stdout == "" - register: apply_output - - - name: Wait for local-path-provisioner to be ready - shell: kubectl wait --for=condition=Ready pod -l app=local-path-provisioner -n local-path-storage --timeout=120s - when: namespace_check.stdout == "" - - name: Check if mysql namespace exists shell: kubectl get namespace mysql --ignore-not-found register: namespace_check - ignore_errors: yes changed_when: false - name: End playbook if mysql namespace exists meta: end_play when: namespace_check.stdout != "" - - name: Add bitnami Helm repository - command: helm repo add bitnami https://charts.bitnami.com/bitnami - - - name: Update Helm repositories - command: helm repo update - - - name: Create 'mysql' namespace - command: kubectl create ns mysql - - name: Install MySQL using Helm - command: helm install mysql bitnami/mysql -n mysql -f ../helm-values/mysqldb.yaml + command: helm install mysql oci://registry-1.docker.io/bitnamicharts/mysql -n mysql --create-namespace -f ../helm-values/mysqldb.yaml + environment: + http_proxy: "{{ http_proxy }}" + https_proxy: "{{ http_proxy }}" + no_proxy: "{{ no_proxy }}" - name: Wait for mysql-0 pod to be ready command: kubectl wait --for=condition=ready pod -l app.kubernetes.io/instance=mysql -n mysql --timeout=300s \ No newline at end of file diff --git a/setup-scripts/setup-genai-studio/playbooks/deploy-studio.yml b/setup-scripts/setup-genai-studio/playbooks/deploy-studio.yml index 1d2ecef..f28c231 100644 --- a/setup-scripts/setup-genai-studio/playbooks/deploy-studio.yml +++ b/setup-scripts/setup-genai-studio/playbooks/deploy-studio.yml @@ -3,29 +3,14 @@ vars_files: - ../vars.yml tasks: - - name: Check if local-path-storage namespace exists - shell: kubectl get namespace local-path-storage --ignore-not-found - register: namespace_check - ignore_errors: yes - changed_when: false - - - name: Install local-path-provisioner if namespace does not exist - shell: kubectl apply -f https://raw.githubusercontent.com/rancher/local-path-provisioner/v0.0.30/deploy/local-path-storage.yaml - when: namespace_check.stdout == "" - register: apply_output - - - name: Wait for local-path-provisioner to be ready - shell: kubectl wait --for=condition=Ready pod -l app=local-path-provisioner -n local-path-storage --timeout=120s - when: namespace_check.stdout == "" - - name: Check if studio namespace exists - command: kubectl get namespace studio - register: studio_namespace - ignore_errors: yes + shell: kubectl get namespace studio --ignore-not-found + register: studio_namespace_check + changed_when: false - name: Create studio namespace command: kubectl create namespace studio - when: studio_namespace.rc != 0 + when: studio_namespace_check.stdout == "" - name: Check for coredns service shell: kubectl get svc coredns -n kube-system --ignore-not-found @@ -38,16 +23,16 @@ when: coredns_check.stdout != '' - name: Check if app-tls exists in studio namespace - command: kubectl get secret app-tls -n studio + shell: kubectl get secret app-tls -n studio --ignore-not-found register: app_tls_secret_check - ignore_errors: yes + changed_when: false - name: Generate TLS certificate and create app-tls shell: | openssl req -x509 -nodes -days 365 -newkey rsa:4096 -keyout app-tls.key -out app-tls.crt -subj "/CN=studio/O=studio" kubectl create secret generic app-tls --from-file=app-tls.crt --from-file=app-tls.key -n studio rm app-tls.key app-tls.crt - when: app_tls_secret_check.rc != 0 + when: app_tls_secret_check.stdout == "" - name: Apply studio configuration command: kubectl apply -f ../studio-config.yaml diff --git a/setup-scripts/setup-genai-studio/playbooks/deploy-tracing.yml b/setup-scripts/setup-genai-studio/playbooks/deploy-tracing.yml index 75bed58..07ebcd1 100644 --- a/setup-scripts/setup-genai-studio/playbooks/deploy-tracing.yml +++ b/setup-scripts/setup-genai-studio/playbooks/deploy-tracing.yml @@ -1,26 +1,12 @@ --- - name: Deploy clickhouse and otel collector for tracing hosts: localhost + vars_files: + - ../vars.yml tasks: - - name: Check if local-path-storage namespace exists - shell: kubectl get namespace local-path-storage --ignore-not-found - register: namespace_check - ignore_errors: yes - changed_when: false - - - name: Install local-path-provisioner if namespace does not exist - shell: kubectl apply -f https://raw.githubusercontent.com/rancher/local-path-provisioner/v0.0.30/deploy/local-path-storage.yaml - when: namespace_check.stdout == "" - register: apply_output - - - name: Wait for local-path-provisioner to be ready - shell: kubectl wait --for=condition=Ready pod -l app=local-path-provisioner -n local-path-storage --timeout=120s - when: namespace_check.stdout == "" - - name: Check if tracing namespace exists shell: kubectl get namespace tracing --ignore-not-found register: namespace_check - ignore_errors: yes changed_when: false - name: End playbook if tracing namespace exists @@ -29,15 +15,27 @@ - name: Add Pascaliske Helm repository command: helm repo add pascaliske https://charts.pascaliske.dev + environment: + http_proxy: "{{ http_proxy }}" + https_proxy: "{{ http_proxy }}" + no_proxy: "{{ no_proxy }}" - name: Update Helm repositories command: helm repo update + environment: + http_proxy: "{{ http_proxy }}" + https_proxy: "{{ http_proxy }}" + no_proxy: "{{ no_proxy }}" - name: Create 'tracing' namespace command: kubectl create ns tracing - name: Install Clickhouse Helm chart in 'tracing' namespace - command: helm install clickhouse pascaliske/clickhouse -n tracing --set persistentVolumeClaim.storageClassName=local-path + command: helm install clickhouse pascaliske/clickhouse --version 0.3.1 -n tracing --set persistentVolumeClaim.storageClassName=local-path + environment: + http_proxy: "{{ http_proxy }}" + https_proxy: "{{ http_proxy }}" + no_proxy: "{{ no_proxy }}" - name: Wait for Clickhouse pod to be ready command: kubectl wait --namespace tracing --for=condition=ready pod -l app.kubernetes.io/name=clickhouse --timeout=120s @@ -47,12 +45,24 @@ - name: Add OpenTelemetry Helm repository command: helm repo add open-telemetry https://open-telemetry.github.io/opentelemetry-helm-charts + environment: + http_proxy: "{{ http_proxy }}" + https_proxy: "{{ http_proxy }}" + no_proxy: "{{ no_proxy }}" - name: Update Helm repositories command: helm repo update + environment: + http_proxy: "{{ http_proxy }}" + https_proxy: "{{ http_proxy }}" + no_proxy: "{{ no_proxy }}" - name: Install OpenTelemetry Collector Helm chart in 'tracing' namespace - command: helm install tracing open-telemetry/opentelemetry-collector -n tracing -f ../helm-values/otel-collector.yaml + command: helm install tracing open-telemetry/opentelemetry-collector --version 0.139.1 -n tracing -f ../helm-values/otel-collector.yaml + environment: + http_proxy: "{{ http_proxy }}" + https_proxy: "{{ http_proxy }}" + no_proxy: "{{ no_proxy }}" - name: Wait for OpenTelemetry Collector pod to be ready command: kubectl wait --namespace tracing --for=condition=ready pod -l app.kubernetes.io/name=opentelemetry-collector --timeout=120s \ No newline at end of file diff --git a/setup-scripts/setup-genai-studio/playbooks/install-prerequisites.yml b/setup-scripts/setup-genai-studio/playbooks/install-prerequisites.yml new file mode 100644 index 0000000..67ad486 --- /dev/null +++ b/setup-scripts/setup-genai-studio/playbooks/install-prerequisites.yml @@ -0,0 +1,29 @@ +--- +- name: Install prerequisites for GenAI Studio + hosts: localhost + vars_files: + - ../vars.yml + tasks: + - name: Check if Helm is installed + command: helm version --short + register: helm_check + failed_when: false + changed_when: false + + - name: Install Helm + shell: curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash + environment: + http_proxy: "{{ http_proxy }}" + https_proxy: "{{ http_proxy }}" + no_proxy: "{{ no_proxy }}" + when: helm_check.rc != 0 + + - name: Verify Helm installation + command: helm version --short + changed_when: false + + - name: Check if kubectl is available + command: kubectl version --client + register: kubectl_check + failed_when: kubectl_check.rc != 0 + changed_when: false diff --git a/setup-scripts/setup-genai-studio/playbooks/setup-local-storageclass.yml b/setup-scripts/setup-genai-studio/playbooks/setup-local-storageclass.yml new file mode 100644 index 0000000..af504c1 --- /dev/null +++ b/setup-scripts/setup-genai-studio/playbooks/setup-local-storageclass.yml @@ -0,0 +1,24 @@ +--- +- name: Setup local path storage provisioner + hosts: localhost + vars_files: + - ../vars.yml + tasks: + - name: Check if local-path-storage namespace exists + shell: kubectl get namespace local-path-storage --ignore-not-found + register: namespace_check + ignore_errors: yes + changed_when: false + + - name: Install local-path-provisioner if namespace does not exist + shell: kubectl apply -f https://raw.githubusercontent.com/rancher/local-path-provisioner/v0.0.30/deploy/local-path-storage.yaml + environment: + http_proxy: "{{ http_proxy }}" + https_proxy: "{{ http_proxy }}" + no_proxy: "{{ no_proxy }}" + when: namespace_check.stdout == "" + register: apply_output + + - name: Wait for local-path-provisioner to be ready + shell: kubectl wait --for=condition=Ready pod -l app=local-path-provisioner -n local-path-storage --timeout=120s + when: namespace_check.stdout == "" diff --git a/setup-scripts/setup-genai-studio/readme.md b/setup-scripts/setup-genai-studio/readme.md index 58815c6..328e1e8 100644 --- a/setup-scripts/setup-genai-studio/readme.md +++ b/setup-scripts/setup-genai-studio/readme.md @@ -21,7 +21,6 @@ The genai-studio playbook script will: Run below commands: ```sh -curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash sudo apt install ansible -y ansible-playbook genai-studio.yml ``` @@ -32,3 +31,32 @@ Run below commands to do a /health test: ```sh curl http://localhost:30007/studio-backend/health ``` + +## Cleanup + +To completely remove GenAI Studio and all its components: + +```sh +./cleanup-genai-studio.sh +``` + +This script will: +- Delete all GenAI Studio namespaces (studio, monitoring, tracing, mysql) +- Remove all sandbox namespaces +- Clean up Helm releases +- Remove PVCs, secrets, and configmaps +- Provide detailed feedback on the cleanup process + +### Important Notes + +**Local Path Storage Preservation:** +The cleanup script intentionally **does NOT** remove the `local-path-storage` namespace because: +- It may be used by other applications beyond GenAI Studio +- Deleting it would break existing PVCs that use the `local-path` StorageClass +- It's a cluster-wide infrastructure component that should be managed separately + +If you need to remove local-path-storage after ensuring it's safe to do so: +```sh +kubectl delete namespace local-path-storage +kubectl delete storageclass local-path +``` \ No newline at end of file diff --git a/setup-scripts/setup-genai-studio/studio-config.yaml b/setup-scripts/setup-genai-studio/studio-config.yaml index 8574900..e876692 100644 --- a/setup-scripts/setup-genai-studio/studio-config.yaml +++ b/setup-scripts/setup-genai-studio/studio-config.yaml @@ -13,5 +13,6 @@ data: APP_FRONTEND_DNS: "app-frontend.$namespace.svc.cluster.local:5275" APP_BACKEND_DNS: "app-backend.$namespace.svc.cluster.local:8899" APP_CHATHISTORY_DNS: "chathistory-mongo.$namespace.svc.cluster.local:6012" - PREPARE_DOC_REDIS_PREP_DNS: "prepare-doc-redis-prep-0.$namespace.svc.cluster.local:6007" - STUDIO_BACKEND_DNS: "studio-backend.studio.svc.cluster.local:5000" \ No newline at end of file + PREPARE_DOC_REDIS_PREP_DNS: "opea-prepare-doc-redis-prep-0.$namespace.svc.cluster.local:6007" + STUDIO_BACKEND_DNS: "studio-backend.studio.svc.cluster.local:5000" + FINETUNING_HOST: "finetuning-server.studio.svc.cluster.local" \ No newline at end of file diff --git a/setup-scripts/setup-genai-studio/vars.yml b/setup-scripts/setup-genai-studio/vars.yml index d53d819..b277acf 100644 --- a/setup-scripts/setup-genai-studio/vars.yml +++ b/setup-scripts/setup-genai-studio/vars.yml @@ -1,5 +1,7 @@ -container_registry: 'opea' +# Container registry configuration +# Replace {{ ansible_default_ipv4.address }} with your Kubernetes master/API endpoint IP if needed +container_registry: '{{ ansible_default_ipv4.address }}:5000/opea' container_tag: 'latest' +mysql_host: 'mysql.mysql.svc.cluster.local' http_proxy: '' -no_proxy: '' -mysql_host: 'mysql.mysql.svc.cluster.local' \ No newline at end of file +no_proxy: 'localhost,127.0.0.1,.local,.svc.cluster.local,{{ ansible_default_ipv4.address }}' \ No newline at end of file diff --git a/studio-backend/Dockerfile b/studio-backend/Dockerfile index 00b557d..71848f0 100644 --- a/studio-backend/Dockerfile +++ b/studio-backend/Dockerfile @@ -1,6 +1,16 @@ # Use an official Python runtime as a parent image FROM python:3.11-slim +# Accept proxy build arguments +ARG http_proxy +ARG https_proxy +ARG no_proxy + +# Set proxy environment variables for package managers +ENV http_proxy=${http_proxy} +ENV https_proxy=${https_proxy} +ENV no_proxy=${no_proxy} + # Set the working directory in the container WORKDIR /usr/src/ diff --git a/studio-backend/app/routers/debuglog_router.py b/studio-backend/app/routers/debuglog_router.py index 0d62100..760a072 100644 --- a/studio-backend/app/routers/debuglog_router.py +++ b/studio-backend/app/routers/debuglog_router.py @@ -85,6 +85,34 @@ def find_pod_dependencies(pod, all_pods, services, namespace, core_v1_api): # Combine all environment variables for further analysis all_env_vars = env_vars + init_env_vars + configmap_env_vars + # Special handling for app-backend pods - filter out dependent services + is_app_backend = pod.metadata.name and 'app-backend' in pod.metadata.name + if is_app_backend: + # For app-backend, we want to exclude references from dependent_services + # but keep direct OPEA service references + filtered_env_vars = [] + for env_val in all_env_vars: + # Skip if this looks like workflow-info.json content with dependent_services + if isinstance(env_val, str) and '"dependent_services"' in env_val: + # Parse the JSON to extract only direct service references, not dependent ones + try: + import json + workflow_data = json.loads(env_val) + if 'nodes' in workflow_data: + # Only include OPEA service names, not their dependencies + opea_services = [] + for node_id, node_data in workflow_data['nodes'].items(): + if node_data.get('name', '').startswith('opea_service@'): + opea_services.append(node_data['name']) + # Add these as simple strings for pattern matching + filtered_env_vars.extend(opea_services) + except: + # If JSON parsing fails, skip this env var + pass + else: + filtered_env_vars.append(env_val) + all_env_vars = filtered_env_vars + # # Debug output # print(f"Analyzing dependencies for pod: {pod.metadata.name}") # print(f"ConfigMap refs: {configmap_refs}") diff --git a/studio-backend/app/services/exporter_service.py b/studio-backend/app/services/exporter_service.py index 31d6911..0dd0d49 100644 --- a/studio-backend/app/services/exporter_service.py +++ b/studio-backend/app/services/exporter_service.py @@ -24,7 +24,21 @@ def convert_proj_info_to_manifest(proj_info_json, output_file=None): with open(service_file_path, "r") as service_file: service_manifest_read = service_file.read() service_manifest_raw = list(ordered_load_all(replace_dynamic_manifest_placeholder(service_manifest_read, service_info, proj_info_json), yaml.SafeLoader)) - service_manifest = [replace_manifest_placeholders(doc, service_info) for doc in service_manifest_raw] + # For app-backend, include all service endpoints in variables so it can connect to all services + if service_info.get('service_type') == 'app': + # Add only OPEA service endpoints to app-backend's variables + opea_service_endpoints = {} + for svc_name, svc_info in opea_services["services"].items(): + if 'endpoint' in svc_info and svc_info['endpoint'].startswith('opea-'): + # Clean the service name for use as variable key (remove @ symbols) + clean_svc_name = svc_name.replace('@', '_').replace('opea_service_', '') + opea_service_endpoints[f"{clean_svc_name}_endpoint"] = svc_info['endpoint'] + + # Merge with existing service_info + enhanced_service_info = {**service_info, **opea_service_endpoints} + service_manifest = [replace_manifest_placeholders(doc, enhanced_service_info) for doc in service_manifest_raw] + else: + service_manifest = [replace_manifest_placeholders(doc, service_info) for doc in service_manifest_raw] output_manifest.extend((doc, service_name) for doc in service_manifest) # print("Manifest generation completed.") diff --git a/studio-backend/app/templates/app/app.manifest.yaml b/studio-backend/app/templates/app/app.manifest.yaml index 9b43420..06d48a1 100644 --- a/studio-backend/app/templates/app/app.manifest.yaml +++ b/studio-backend/app/templates/app/app.manifest.yaml @@ -53,6 +53,12 @@ spec: value: 'true' - name: LOGFLAG value: 'True' + - name: http_proxy + value: "${HTTP_PROXY}" + - name: https_proxy + value: "${HTTP_PROXY}" + - name: no_proxy + value: "${NO_PROXY}" __TELEMETRY_ENDPOINT__ securityContext: allowPrivilegeEscalation: false @@ -219,7 +225,7 @@ spec: spec: containers: - name: chathistory-mongo - image: opea/chathistory-mongo:latest + image: opea/chathistory-mongo:1.3 imagePullPolicy: IfNotPresent ports: - containerPort: 6012 diff --git a/studio-backend/app/templates/microsvc-manifests/asr-usvc.yaml b/studio-backend/app/templates/microsvc-manifests/asr-usvc.yaml index cff1ece..868643b 100644 --- a/studio-backend/app/templates/microsvc-manifests/asr-usvc.yaml +++ b/studio-backend/app/templates/microsvc-manifests/asr-usvc.yaml @@ -10,9 +10,9 @@ metadata: data: HEALTHCHECK_ENDPOINT: "{whisper_endpoint}:{whisper_port}" ASR_ENDPOINT: "http://{whisper_endpoint}:{whisper_port}" - http_proxy: "" - https_proxy: "" - no_proxy: "" + http_proxy: "${HTTP_PROXY}" + https_proxy: "${HTTP_PROXY}" + no_proxy: "${NO_PROXY}" LOGFLAG: "True" --- diff --git a/studio-backend/app/templates/microsvc-manifests/data-prep.yaml b/studio-backend/app/templates/microsvc-manifests/data-prep.yaml index 734821a..31337b6 100644 --- a/studio-backend/app/templates/microsvc-manifests/data-prep.yaml +++ b/studio-backend/app/templates/microsvc-manifests/data-prep.yaml @@ -17,9 +17,9 @@ data: SEARCH_BATCH_SIZE: "10" HF_TOKEN: "{huggingFaceToken}" HF_HOME: "/tmp/.cache/huggingface" - http_proxy: "" - https_proxy: "" - no_proxy: "" + http_proxy: "${HTTP_PROXY}" + https_proxy: "${HTTP_PROXY}" + no_proxy: "${NO_PROXY}" LOGFLAG: "True" --- # Source: data-prep/templates/service.yaml diff --git a/studio-backend/app/templates/microsvc-manifests/embedding-usvc.yaml b/studio-backend/app/templates/microsvc-manifests/embedding-usvc.yaml index 997d13c..e6befed 100644 --- a/studio-backend/app/templates/microsvc-manifests/embedding-usvc.yaml +++ b/studio-backend/app/templates/microsvc-manifests/embedding-usvc.yaml @@ -10,9 +10,9 @@ metadata: data: HEALTHCHECK_ENDPOINT: "{tei_endpoint}:{tei_port}" TEI_EMBEDDING_ENDPOINT: "http://{tei_endpoint}:{tei_port}" - http_proxy: "" - https_proxy: "" - no_proxy: "" + http_proxy: "${HTTP_PROXY}" + https_proxy: "${HTTP_PROXY}" + no_proxy: "${NO_PROXY}" TRANSFORMERS_CACHE: "/tmp/transformers_cache" HF_HOME: "/tmp/.cache/huggingface" LOGFLAG: "True" diff --git a/studio-backend/app/templates/microsvc-manifests/reranking-usvc.yaml b/studio-backend/app/templates/microsvc-manifests/reranking-usvc.yaml index 07cec32..4916907 100644 --- a/studio-backend/app/templates/microsvc-manifests/reranking-usvc.yaml +++ b/studio-backend/app/templates/microsvc-manifests/reranking-usvc.yaml @@ -10,9 +10,9 @@ metadata: data: HEALTHCHECK_ENDPOINT: "{tei_endpoint}:{tei_port}" TEI_RERANKING_ENDPOINT: "http://{tei_endpoint}:{tei_port}" - http_proxy: "" - https_proxy: "" - no_proxy: "" + http_proxy: "${HTTP_PROXY}" + https_proxy: "${HTTP_PROXY}" + no_proxy: "${NO_PROXY}" LOGFLAG: "True" --- # Source: reranking-usvc/templates/service.yaml diff --git a/studio-backend/app/templates/microsvc-manifests/retriever-usvc.yaml b/studio-backend/app/templates/microsvc-manifests/retriever-usvc.yaml index a508bb1..7b09a81 100644 --- a/studio-backend/app/templates/microsvc-manifests/retriever-usvc.yaml +++ b/studio-backend/app/templates/microsvc-manifests/retriever-usvc.yaml @@ -14,9 +14,9 @@ data: REDIS_URL: "redis://{redis_vector_store_endpoint}:{redis_vector_store_port}" INDEX_NAME: "rag-redis" EASYOCR_MODULE_PATH: "/tmp/.EasyOCR" - http_proxy: "" - https_proxy: "" - no_proxy: "" + http_proxy: "${HTTP_PROXY}" + https_proxy: "${HTTP_PROXY}" + no_proxy: "${NO_PROXY}" HF_HOME: "/tmp/.cache/huggingface" HF_TOKEN: "{huggingFaceToken}" LOGFLAG: "True" diff --git a/studio-backend/app/utils/exporter_utils.py b/studio-backend/app/utils/exporter_utils.py index 0fd6fe7..766034a 100644 --- a/studio-backend/app/utils/exporter_utils.py +++ b/studio-backend/app/utils/exporter_utils.py @@ -206,7 +206,7 @@ def process_opea_services(proj_info_json): # Remove the 'opea_service@' prefix and append the node_name suffix if any node_suffix = node_name.split('_')[-1] if '_' in node_name else '' service_type_cleaned = node_info['service_type'].replace('opea_service@', '') - opea_service_endpoint = f"{service_type_cleaned.replace('_','-')}-{node_suffix}".strip('-') + opea_service_endpoint = f"opea-{service_type_cleaned.replace('_','-')}-{node_suffix}".strip('-') # Iterate through the dependent_services to map to the service info for service_type, service_info in node_info.get('dependent_services', {}).items(): diff --git a/studio-backend/app/utils/placeholders_utils.py b/studio-backend/app/utils/placeholders_utils.py index 6e686fd..17d4fca 100644 --- a/studio-backend/app/utils/placeholders_utils.py +++ b/studio-backend/app/utils/placeholders_utils.py @@ -63,7 +63,23 @@ def replace_manifest_placeholders(obj, variables): value = value.replace("${REGISTRY}", os.getenv("REGISTRY", "opea")) value = value.replace("${TAG}", os.getenv("TAG", "latest")) value = value.replace("${HTTP_PROXY}", os.getenv("SBX_HTTP_PROXY", "")) - value = value.replace("${NO_PROXY}", os.getenv("SBX_NO_PROXY", "")) + + # Enhanced NO_PROXY handling - extract service hostnames from variables + base_no_proxy = os.getenv("SBX_NO_PROXY", "") + if "${NO_PROXY}" in value and variables: + service_hostnames = [] + # Extract hostnames from all services in variables + for var_key, var_value in variables.items(): + if var_key.endswith('_endpoint') and isinstance(var_value, str): + service_hostnames.append(var_value) + + if service_hostnames: + enhanced_no_proxy = f"{base_no_proxy},{','.join(service_hostnames)}" if base_no_proxy else ','.join(service_hostnames) + value = value.replace("${NO_PROXY}", enhanced_no_proxy) + else: + value = value.replace("${NO_PROXY}", base_no_proxy) + else: + value = value.replace("${NO_PROXY}", base_no_proxy) # Attempt to replace placeholders in the string formatted_value = value.format(**variables) # If the key is a port-related field and the formatted value is a digit, convert to int diff --git a/studio-frontend/.env.development b/studio-frontend/.env.development new file mode 100644 index 0000000..0a6398e --- /dev/null +++ b/studio-frontend/.env.development @@ -0,0 +1,6 @@ +NODE_TLS_REJECT_UNAUTHORIZED=0 +VITE_DISABLE_KEYCLOAK=true +NODE_ENV=development +VITE_HOST=0.0.0.0 +VITE_PORT=8088 +FINETUNING_HOST= # Command to get your host ip: ip route get 1.1.1.1 | awk '{print $7}' \ No newline at end of file diff --git a/studio-frontend/Dockerfile b/studio-frontend/Dockerfile index e3079f4..e59da67 100644 --- a/studio-frontend/Dockerfile +++ b/studio-frontend/Dockerfile @@ -1,4 +1,14 @@ -FROM node:23-alpine +FROM node:20-alpine + +# Accept proxy build arguments +ARG http_proxy +ARG https_proxy +ARG no_proxy + +# Set proxy environment variables for package managers +ENV http_proxy=${http_proxy} +ENV https_proxy=${https_proxy} +ENV no_proxy=${no_proxy} # Install necessary packages RUN apk update && apk upgrade && \ @@ -7,10 +17,8 @@ RUN apk update && apk upgrade && \ build-base cairo-dev pango-dev \ # Install Chromium chromium && \ - # Update npm to the latest version - npm install -g npm@latest && \ # Install PNPM globally - npm install -g pnpm@latest + npm install -g pnpm@9 # Debug step to verify git installation RUN git --version diff --git a/studio-frontend/docker-compose.dev.yml b/studio-frontend/docker-compose.dev.yml new file mode 100644 index 0000000..3624dcb --- /dev/null +++ b/studio-frontend/docker-compose.dev.yml @@ -0,0 +1,47 @@ +version: '3.8' + +services: + finetuning-server: + image: opea/finetuning:latest + container_name: finetuning-server + user: "0:0" + ipc: host + ports: + - "8015:8015" + - "8265:8265" + environment: + - http_proxy=${http_proxy} + - https_proxy=${https_proxy} + - no_proxy=${no_proxy} + volumes: + - finetune-output:/home/user/comps/finetuning/src/output + restart: unless-stopped + + studio-frontend: + image: studio-frontend:latest + build: + context: . + dockerfile: Dockerfile + container_name: studio-frontend-dev + ports: + - "3000:3000" + - "8088:8088" + volumes: + - .:/usr/src + - node_modules:/usr/src/node_modules + - pnpm_store:/usr/src/.pnpm-store + - /usr/src/packages/ui/build + - finetune-output:/tmp/finetuning/output + command: ["sh", "-c", "pnpm install && pnpm dev"] + environment: + - http_proxy=${http_proxy} + - https_proxy=${https_proxy} + - no_proxy=${no_proxy} + stdin_open: true + tty: true + restart: unless-stopped + +volumes: + node_modules: + pnpm_store: + finetune-output: \ No newline at end of file diff --git a/studio-frontend/package.json b/studio-frontend/package.json index 115e3d0..0205332 100644 --- a/studio-frontend/package.json +++ b/studio-frontend/package.json @@ -60,6 +60,10 @@ "overrides": { "set-value": "^3.0.3", "form-data": "4.0.4" + }, + "peerDependencyRules": { + "ignoreMissing": [], + "allowAny": [] } }, "engines": { @@ -81,7 +85,8 @@ "cross-spawn": ">=7.0.5", "solid-js": ">=1.9.4", "tar-fs": ">=3.0.8", - "form-data": "4.0.4" + "form-data": "4.0.4", + "zod": ">=3.23.0" }, "eslintIgnore": [ "**/dist", diff --git a/studio-frontend/packages/server/package.json b/studio-frontend/packages/server/package.json index 5ab2abd..7ef55d6 100644 --- a/studio-frontend/packages/server/package.json +++ b/studio-frontend/packages/server/package.json @@ -76,6 +76,7 @@ "moment-timezone": "^0.5.34", "multer": "^1.4.5-lts.1", "mysql2": "^3.9.2", + "form-data": "^4.0.0", "openai": "^4.57.3", "pg": "^8.11.1", "posthog-node": "^3.5.0", @@ -85,7 +86,8 @@ "sqlite3": "^5.1.6", "typeorm": "^0.3.6", "uuid": "^9.0.1", - "winston": "^3.9.0" + "winston": "^3.9.0", + "https-proxy-agent": "^7.0.4" }, "devDependencies": { "@types/content-disposition": "0.5.8", diff --git a/studio-frontend/packages/server/src/controllers/finetuning/index.ts b/studio-frontend/packages/server/src/controllers/finetuning/index.ts new file mode 100644 index 0000000..add0c5a --- /dev/null +++ b/studio-frontend/packages/server/src/controllers/finetuning/index.ts @@ -0,0 +1,211 @@ +import { Request, Response, NextFunction } from 'express' +import { StatusCodes } from 'http-status-codes' +import { InternalFlowiseError } from '../../errors/internalFlowiseError' +import finetuningService from '../../services/finetuning' + +/** + * Upload a training file + * POST /api/v1/finetuning/files + */ +const uploadTrainingFile = async (req: Request, res: Response, next: NextFunction) => { + try { + if (!req.file) { + // Debug: log request body and files to help trace upload issues + console.debug('finetuningController.uploadTrainingFile - no file received. req.body=', req.body, 'req.files=', (req as any).files) + throw new InternalFlowiseError(StatusCodes.BAD_REQUEST, 'Error: finetuningController.uploadTrainingFile - file not provided!') + } + + const purpose = req.body.purpose || 'fine-tune' + const apiResponse = await finetuningService.uploadTrainingFile(req.file, purpose) + return res.json(apiResponse) + } catch (error) { + next(error) + } +} + +/** + * Create a fine-tuning job + * POST /api/v1/finetuning/jobs + */ +const createFineTuningJob = async (req: Request, res: Response, next: NextFunction) => { + try { + const hasFile = !!req.body?.training_file + if (!req.body || !hasFile || !req.body.model) { + throw new InternalFlowiseError( + StatusCodes.BAD_REQUEST, + 'Error: finetuningController.createFineTuningJob - model and training_file are required!' + ) + } + + const apiResponse = await finetuningService.createFineTuningJob(req.body) + return res.json(apiResponse) + } catch (error) { + next(error) + } +} + +/** + * List all fine-tuning jobs + * GET /api/v1/finetuning/jobs + */ +const listFineTuningJobs = async (req: Request, res: Response, next: NextFunction) => { + try { + const apiResponse = await finetuningService.listFineTuningJobs() + return res.json(apiResponse) + } catch (error) { + next(error) + } +} + +/** + * Retrieve a specific fine-tuning job + * POST /api/v1/finetuning/jobs/retrieve + */ +const retrieveFineTuningJob = async (req: Request, res: Response, next: NextFunction) => { + try { + if (typeof req.body === 'undefined' || !req.body.fine_tuning_job_id) { + throw new InternalFlowiseError( + StatusCodes.BAD_REQUEST, + 'Error: finetuningController.retrieveFineTuningJob - fine_tuning_job_id not provided!' + ) + } + + const apiResponse = await finetuningService.retrieveFineTuningJob(req.body.fine_tuning_job_id) + return res.json(apiResponse) + } catch (error) { + next(error) + } +} + +/** + * Cancel a fine-tuning job + * POST /api/v1/finetuning/jobs/cancel + */ +const cancelFineTuningJob = async (req: Request, res: Response, next: NextFunction) => { + try { + if (typeof req.body === 'undefined' || !req.body.fine_tuning_job_id) { + throw new InternalFlowiseError( + StatusCodes.BAD_REQUEST, + 'Error: finetuningController.cancelFineTuningJob - fine_tuning_job_id not provided!' + ) + } + + const apiResponse = await finetuningService.cancelFineTuningJob(req.body.fine_tuning_job_id) + return res.json(apiResponse) + } catch (error) { + next(error) + } +} + +/** + * Delete a fine-tuning job (cancel remote if possible and remove local records) + * POST /api/v1/finetuning/jobs/delete + */ +const deleteFineTuningJob = async (req: Request, res: Response, next: NextFunction) => { + try { + if (typeof req.body === 'undefined' || !req.body.fine_tuning_job_id) { + throw new InternalFlowiseError( + StatusCodes.BAD_REQUEST, + 'Error: finetuningController.deleteFineTuningJob - fine_tuning_job_id not provided!' + ) + } + + const apiResponse = await finetuningService.deleteFineTuningJob(req.body.fine_tuning_job_id) + return res.json(apiResponse) + } catch (error) { + next(error) + } +} + + + +/** + * Fetch Ray/job logs for a fine-tuning job + * POST /api/v1/finetuning/jobs/logs + * body: { fine_tuning_job_id: string, ray_job_id?: string, tail?: number } + */ +const getFineTuningJobLogs = async (req: Request, res: Response, next: NextFunction) => { + try { + if (typeof req.body === 'undefined' || !req.body.fine_tuning_job_id) { + throw new InternalFlowiseError( + StatusCodes.BAD_REQUEST, + 'Error: finetuningController.getFineTuningJobLogs - fine_tuning_job_id not provided!' + ) + } + + const fine_tuning_job_id = req.body.fine_tuning_job_id + const ray_job_id = req.body.ray_job_id + + try { + const apiResponse = await finetuningService.getFineTuningJobLogs(fine_tuning_job_id, { ray_job_id }) + // Service returns either { logs: string } or { logs: '', error: string } + return res.json(apiResponse) + } catch (err: any) { + // If the service throws, return a structured error payload instead of propagating a 500 + const message = err?.message || String(err) || 'Unknown error fetching logs' + return res.json({ logs: '', error: `Error: ${message}` }) + } + } catch (error) { + next(error) + } +} + + +/** + * Download fine-tuning job output as a zip file + * GET /api/v1/finetuning/download-ft/:jobId + */ +const downloadFineTuningOutput = async (req: Request, res: Response, next: NextFunction) => { + try { + const { jobId } = req.params + + if (!jobId) { + throw new InternalFlowiseError( + StatusCodes.BAD_REQUEST, + 'Error: finetuningController.downloadFineTuningOutput - jobId is required!' + ) + } + + // Get the zip file path (creates if needed, but returns immediately if already exists) + const filePath = await finetuningService.downloadFineTuningOutput(jobId) + if (!filePath) { + throw new InternalFlowiseError( + StatusCodes.NOT_FOUND, + `Error: finetuningController.downloadFineTuningOutput - output not found for job: ${jobId}` + ) + } + + // Set response headers for file download + const fileName = `${jobId}-output.zip` + res.setHeader('Content-Type', 'application/zip') + res.setHeader('Content-Disposition', `attachment; filename="${fileName}"`) + + // Stream the file + const fs = require('fs') + const fileStream = fs.createReadStream(filePath) + fileStream.on('error', (err: any) => { + console.error('Error streaming fine-tuning output file:', err) + if (!res.headersSent) { + res.status(StatusCodes.INTERNAL_SERVER_ERROR).json({ + error: 'Error streaming fine-tuning output file' + }) + } + }) + fileStream.pipe(res) + } catch (error) { + next(error) + } +} + + + +export default { + uploadTrainingFile, + createFineTuningJob, + listFineTuningJobs, + retrieveFineTuningJob, + cancelFineTuningJob, + deleteFineTuningJob, + getFineTuningJobLogs, + downloadFineTuningOutput +} diff --git a/studio-frontend/packages/server/src/database/entities/FineTuningCheckpoint.ts b/studio-frontend/packages/server/src/database/entities/FineTuningCheckpoint.ts new file mode 100644 index 0000000..9d119f3 --- /dev/null +++ b/studio-frontend/packages/server/src/database/entities/FineTuningCheckpoint.ts @@ -0,0 +1,19 @@ +import { Entity, Column, PrimaryColumn, CreateDateColumn } from 'typeorm' + +@Entity('fine_tuning_checkpoint') +export class FineTuningCheckpoint { + @PrimaryColumn() + id!: string + + @Column() + fine_tuning_job_id!: string + + @Column() + filename!: string + + @Column({ type: 'text', nullable: true }) + metadata?: string + + @CreateDateColumn({ type: 'datetime' }) + createdDate!: Date +} diff --git a/studio-frontend/packages/server/src/database/entities/FineTuningJob.ts b/studio-frontend/packages/server/src/database/entities/FineTuningJob.ts new file mode 100644 index 0000000..ba74cf9 --- /dev/null +++ b/studio-frontend/packages/server/src/database/entities/FineTuningJob.ts @@ -0,0 +1,34 @@ +import { Entity, Column, PrimaryColumn, CreateDateColumn } from 'typeorm' + +@Entity('fine_tuning_job') +export class FineTuningJob { + @PrimaryColumn() + id!: string + + @Column({ nullable: true }) + model?: string + + @Column({ nullable: true }) + task?: string + + @Column({ nullable: true }) + status?: string + + @Column({ nullable: true }) + training_file?: string + + @Column({ type: 'text', nullable: true }) + hyperparameters?: string + + @Column({ type: 'text', nullable: true }) + result_files?: string + + @Column({ type: 'text', nullable: true }) + error?: string + + @Column({ nullable: true, type: 'int' }) + trained_tokens?: number + + @CreateDateColumn({ type: 'datetime' }) + createdDate!: Date +} diff --git a/studio-frontend/packages/server/src/database/entities/index.ts b/studio-frontend/packages/server/src/database/entities/index.ts index 4cb079b..ba8e1fa 100644 --- a/studio-frontend/packages/server/src/database/entities/index.ts +++ b/studio-frontend/packages/server/src/database/entities/index.ts @@ -11,6 +11,7 @@ import { Lead } from './Lead' import { UpsertHistory } from './UpsertHistory' import { ApiKey } from './ApiKey' import { CustomTemplate } from './CustomTemplate' +import { FineTuningJob } from './FineTuningJob' export const entities = { ChatFlow, @@ -25,5 +26,6 @@ export const entities = { Lead, UpsertHistory, ApiKey, - CustomTemplate + CustomTemplate, + FineTuningJob } diff --git a/studio-frontend/packages/server/src/database/migrations/mysql/1760424809635-AddFineTuningTables.ts b/studio-frontend/packages/server/src/database/migrations/mysql/1760424809635-AddFineTuningTables.ts new file mode 100644 index 0000000..67381d4 --- /dev/null +++ b/studio-frontend/packages/server/src/database/migrations/mysql/1760424809635-AddFineTuningTables.ts @@ -0,0 +1,38 @@ +import { MigrationInterface, QueryRunner } from 'typeorm' + +export class AddFineTuningTables1760424809635 implements MigrationInterface { + public async up(queryRunner: QueryRunner): Promise { + await queryRunner.query( + `CREATE TABLE IF NOT EXISTS fine_tuning_job ( + id varchar(255) PRIMARY KEY NOT NULL, + model varchar(255), + task varchar(255), + status varchar(255), + training_file varchar(255), + hyperparameters longtext, + result_files longtext, + error longtext, + trained_tokens int, + createdDate datetime NOT NULL DEFAULT CURRENT_TIMESTAMP + ) ENGINE=InnoDB; + ` + ) + + await queryRunner.query( + `CREATE TABLE IF NOT EXISTS fine_tuning_checkpoint ( + id varchar(255) PRIMARY KEY NOT NULL, + fine_tuning_job_id varchar(255) NOT NULL, + filename varchar(255) NOT NULL, + metadata longtext, + createdDate datetime NOT NULL DEFAULT CURRENT_TIMESTAMP, + INDEX IDX_fine_tuning_checkpoint_job (fine_tuning_job_id) + ) ENGINE=InnoDB; + ` + ) + } + + public async down(queryRunner: QueryRunner): Promise { + await queryRunner.query(`DROP TABLE IF EXISTS fine_tuning_checkpoint`) + await queryRunner.query(`DROP TABLE IF EXISTS fine_tuning_job`) + } +} diff --git a/studio-frontend/packages/server/src/database/migrations/mysql/index.ts b/studio-frontend/packages/server/src/database/migrations/mysql/index.ts index 3645d89..6df72eb 100644 --- a/studio-frontend/packages/server/src/database/migrations/mysql/index.ts +++ b/studio-frontend/packages/server/src/database/migrations/mysql/index.ts @@ -30,6 +30,7 @@ import { AddStudioFieldsToChatFlow1733282099772 } from './1733282099772-AddStudi import { AddSandboxTracerUrlToChatFlow1743740099772 } from './1743740099772-AddSandboxTracerUrlToChatFlow' import { AddSandboxDebugLogsUrlToChatFlow1749612373191 } from './1749612373191-AddSandboxDebugLogsUrlToChatFlow' import { AddDeploymentStatusToChatFlow1754700956637 } from './1754700956637-AddDeploymentStatusToChatFlow' +import { AddFineTuningTables1760424809635 } from './1760424809635-AddFineTuningTables' export const mysqlMigrations = [ @@ -64,5 +65,6 @@ export const mysqlMigrations = [ AddStudioFieldsToChatFlow1733282099772, AddSandboxTracerUrlToChatFlow1743740099772, AddSandboxDebugLogsUrlToChatFlow1749612373191, - AddDeploymentStatusToChatFlow1754700956637 + AddDeploymentStatusToChatFlow1754700956637, + AddFineTuningTables1760424809635 ] diff --git a/studio-frontend/packages/server/src/database/migrations/sqlite/1760424809635-AddFineTuningTables.ts b/studio-frontend/packages/server/src/database/migrations/sqlite/1760424809635-AddFineTuningTables.ts new file mode 100644 index 0000000..83e91d6 --- /dev/null +++ b/studio-frontend/packages/server/src/database/migrations/sqlite/1760424809635-AddFineTuningTables.ts @@ -0,0 +1,39 @@ +import { MigrationInterface, QueryRunner } from 'typeorm' + +export class AddFineTuningTables1760424809635 implements MigrationInterface { + public async up(queryRunner: QueryRunner): Promise { + await queryRunner.query( + `CREATE TABLE IF NOT EXISTS "fine_tuning_job" ( + "id" varchar PRIMARY KEY NOT NULL, + "model" varchar, + "task" varchar, + "status" varchar, + "training_file" varchar, + "hyperparameters" text, + "result_files" text, + "error" text, + "trained_tokens" integer, + "createdDate" datetime NOT NULL DEFAULT (datetime('now')) + );` + ) + + await queryRunner.query( + `CREATE TABLE IF NOT EXISTS "fine_tuning_checkpoint" ( + "id" varchar PRIMARY KEY NOT NULL, + "fine_tuning_job_id" varchar NOT NULL, + "filename" varchar NOT NULL, + "metadata" text, + "createdDate" datetime NOT NULL DEFAULT (datetime('now')) + );` + ) + + await queryRunner.query( + `CREATE INDEX IF NOT EXISTS "IDX_fine_tuning_checkpoint_job" ON "fine_tuning_checkpoint" ("fine_tuning_job_id") ;` + ) + } + + public async down(queryRunner: QueryRunner): Promise { + await queryRunner.query(`DROP TABLE IF EXISTS fine_tuning_checkpoint`) + await queryRunner.query(`DROP TABLE IF EXISTS fine_tuning_job`) + } +} diff --git a/studio-frontend/packages/server/src/database/migrations/sqlite/index.ts b/studio-frontend/packages/server/src/database/migrations/sqlite/index.ts index c9ed343..1b87c17 100644 --- a/studio-frontend/packages/server/src/database/migrations/sqlite/index.ts +++ b/studio-frontend/packages/server/src/database/migrations/sqlite/index.ts @@ -29,6 +29,7 @@ import { AddStudioFieldsToChatFlow1733282099772 } from './1733282099772-AddStudi import { AddSandboxTracerUrlToChatFlow1743740099772 } from './1743740099772-AddSandboxTracerUrlToChatFlow' import { AddSandboxDebugLogsUrlToChatFlow1749612373191 } from './1749612373191-AddSandboxDebugLogsUrlToChatFlow' import { AddDeploymentStatusToChatFlow1754700956637 } from './1754700956637-AddDeploymentStatusToChatFlow' +import { AddFineTuningTables1760424809635 } from './1760424809635-AddFineTuningTables' export const sqliteMigrations = [ Init1693835579790, @@ -62,4 +63,5 @@ export const sqliteMigrations = [ AddSandboxTracerUrlToChatFlow1743740099772, AddSandboxDebugLogsUrlToChatFlow1749612373191, AddDeploymentStatusToChatFlow1754700956637 + ,AddFineTuningTables1760424809635 ] diff --git a/studio-frontend/packages/server/src/index.ts b/studio-frontend/packages/server/src/index.ts index 9fa5dad..06b236d 100644 --- a/studio-frontend/packages/server/src/index.ts +++ b/studio-frontend/packages/server/src/index.ts @@ -22,6 +22,8 @@ import flowiseApiV1Router from './routes' import errorHandlerMiddleware from './middlewares/errors' import { SSEStreamer } from './utils/SSEStreamer' import { validateAPIKey } from './utils/validateKey' +import { setupFineTuningDownloadHandlers } from './ws/finetuningDownload' +import { setupFineTuningStatusHandlers } from './ws/finetuningStatus' declare global { namespace Express { @@ -141,7 +143,8 @@ export class App { '/api/v1/leads', '/api/v1/get-upload-file', '/api/v1/ip', - '/api/v1/ping' + '/api/v1/ping', + '/api/v1/finetuning/download-ft/' ] const URL_CASE_INSENSITIVE_REGEX: RegExp = /\/api\/v1\//i const URL_CASE_SENSITIVE_REGEX: RegExp = /\/api\/v1\// @@ -227,13 +230,36 @@ export class App { const packagePath = getNodeModulesPackagePath('flowise-ui') const uiBuildPath = path.join(packagePath, 'build') const uiHtmlPath = path.join(packagePath, 'build', 'index.html') + const nodeEnv = process.env.NODE_ENV || 'undefined' + + // Treat any non-production environment as development for the landing page + if (nodeEnv === 'development') { + this.app.get('/', (req: Request, res: Response) => { + res.send(` + + + + + Flowise Server (development) + + + +

Flowise Server

+

Mode: development

+

Server is listening on port 3000.

+

UI is listening on port 8088.

+

Ping API

+ + `) + }) + } else { + this.app.use('/', express.static(uiBuildPath)) - this.app.use('/', express.static(uiBuildPath)) - - // All other requests not handled will return React app - this.app.use((req: Request, res: Response) => { - res.sendFile(uiHtmlPath) - }) + // All other requests not handled will return React app + this.app.use((req: Request, res: Response) => { + res.sendFile(uiHtmlPath) + }) + } // Error handling this.app.use(errorHandlerMiddleware) @@ -267,6 +293,10 @@ export async function start(): Promise { cors: getCorsOptions() }) + // Setup WebSocket handlers + setupFineTuningDownloadHandlers(io) + setupFineTuningStatusHandlers(io) + await serverApp.initDatabase() await serverApp.config(io) diff --git a/studio-frontend/packages/server/src/middlewares/errors/index.ts b/studio-frontend/packages/server/src/middlewares/errors/index.ts index 75cd2c2..06b5422 100644 --- a/studio-frontend/packages/server/src/middlewares/errors/index.ts +++ b/studio-frontend/packages/server/src/middlewares/errors/index.ts @@ -5,14 +5,24 @@ import { InternalFlowiseError } from '../../errors/internalFlowiseError' // we need eslint because we have to pass next arg for the error middleware // eslint-disable-next-line async function errorHandlerMiddleware(err: InternalFlowiseError, req: Request, res: Response, next: NextFunction) { - let displayedError = { + // Safely read streaming flag from body (req.body may be undefined) + const streamingFlag = req && (req as any).body ? (req as any).body.streaming : undefined + + // Build the response payload + const displayedError = { statusCode: err.statusCode || StatusCodes.INTERNAL_SERVER_ERROR, success: false, message: err.message, // Provide error stack trace only in development stack: process.env.NODE_ENV === 'development' ? err.stack : {} } - if (!req.body.streaming || req.body.streaming === 'false') { + + // Log the error server-side for easier debugging + // Keep this server-side only; we still control what is returned to the client + // eslint-disable-next-line no-console + console.error('Unhandled error caught by errorHandlerMiddleware:', err) + + if (!streamingFlag || streamingFlag === 'false') { res.setHeader('Content-Type', 'application/json') res.status(displayedError.statusCode).json(displayedError) } diff --git a/studio-frontend/packages/server/src/routes/finetuning/index.ts b/studio-frontend/packages/server/src/routes/finetuning/index.ts new file mode 100644 index 0000000..35c6114 --- /dev/null +++ b/studio-frontend/packages/server/src/routes/finetuning/index.ts @@ -0,0 +1,32 @@ +import express from 'express' +import multer from 'multer' +import finetuningController from '../../controllers/finetuning' + +const router = express.Router() + +// Use memory storage for multer to store files in buffer +const upload = multer({ storage: multer.memoryStorage() }) + +// Upload training file +router.post('/files', upload.single('file'), finetuningController.uploadTrainingFile) + +// Create fine-tuning job +router.post('/jobs', finetuningController.createFineTuningJob) + +// List all fine-tuning jobs +router.get('/jobs', finetuningController.listFineTuningJobs) + +// Retrieve a specific fine-tuning job +router.post('/jobs/retrieve', finetuningController.retrieveFineTuningJob) + +// Fetch logs for a fine-tuning job +router.post('/jobs/logs', finetuningController.getFineTuningJobLogs) + +// Cancel a fine-tuning job +router.post('/jobs/cancel', finetuningController.cancelFineTuningJob) +router.post('/jobs/delete', finetuningController.deleteFineTuningJob) + +// Download fine-tuning job output +router.get('/download-ft/:jobId', finetuningController.downloadFineTuningOutput) + +export default router diff --git a/studio-frontend/packages/server/src/routes/index.ts b/studio-frontend/packages/server/src/routes/index.ts index 6501f55..e1a92a5 100644 --- a/studio-frontend/packages/server/src/routes/index.ts +++ b/studio-frontend/packages/server/src/routes/index.ts @@ -13,6 +13,7 @@ import documentStoreRouter from './documentstore' import exportImportRouter from './export-import' import feedbackRouter from './feedback' import fetchLinksRouter from './fetch-links' +import finetuningRouter from './finetuning' import flowConfigRouter from './flow-config' import getUploadFileRouter from './get-upload-file' import getUploadPathRouter from './get-upload-path' @@ -59,6 +60,7 @@ router.use('/document-store', documentStoreRouter) router.use('/export-import', exportImportRouter) router.use('/feedback', feedbackRouter) router.use('/fetch-links', fetchLinksRouter) +router.use('/finetuning', finetuningRouter) router.use('/flow-config', flowConfigRouter) router.use('/internal-chatmessage', internalChatmessagesRouter) router.use('/internal-prediction', internalPredictionRouter) diff --git a/studio-frontend/packages/server/src/services/chatflows/index.ts b/studio-frontend/packages/server/src/services/chatflows/index.ts index 4ae6c1d..d5abbcd 100644 --- a/studio-frontend/packages/server/src/services/chatflows/index.ts +++ b/studio-frontend/packages/server/src/services/chatflows/index.ts @@ -13,8 +13,25 @@ import { containsBase64File, updateFlowDataWithFilePaths } from '../../utils/fil import { getRunningExpressApp } from '../../utils/getRunningExpressApp' import { utilGetUploadsConfig } from '../../utils/getUploadsConfig' import logger from '../../utils/logger' -import axios from 'axios' -import { Readable } from 'stream' +import axios, { AxiosRequestConfig } from 'axios' +import { HttpsProxyAgent } from 'https-proxy-agent' + +// Configure github axios to support HTTP_PROXY/HTTPS_PROXY environment variables +const getGithubAxiosConfig = (): AxiosRequestConfig => { + const http_proxy = process.env.http_proxy || process.env.HTTP_PROXY + const agent = (http_proxy && http_proxy.trim() !== "") ? new HttpsProxyAgent(http_proxy) : undefined + + return { + headers: { + Accept: 'application/vnd.github.v3+json', + }, + proxy: false, + ...(agent && { + httpAgent: agent, + httpsAgent: agent, + }), + } +} const STUDIO_SERVER_URL = process.env.STUDIO_SERVER_URL || 'http://studio-backend.studio.svc.cluster.local:5000' @@ -161,17 +178,23 @@ const getAllChatflowsbyUserId = async (userid: string, type?: ChatflowType): Pro const importSampleChatflowsbyUserId = async (userid: string, type?: ChatflowType): Promise => { try { - const response = await axios.get('https://api.github.com/repos/opea-project/GenAIStudio/contents/sample-workflows'); + const axiosConfig = getGithubAxiosConfig() + + console.log('Importing sample chatflows for user:', userid); + + const response = await axios.get( + 'https://api.github.com/repos/opea-project/GenAIStudio/contents/sample-workflows', + axiosConfig + ); + + console.log('Response from GitHub:', response.data); + const files = response.data.filter((item: any) => item.type === 'file'); - console.log(`Number of files: ${files.length}`); const chatflows: Partial[] = []; - for (const file of files) { - console.log(`Download URL: ${file.download_url}`); - const fileResponse = await axios.get(file.download_url); + const fileResponse = await axios.get(file.download_url, axiosConfig); const parsedFlowData = fileResponse.data; - const newChatflow: Partial = { userid: userid, name: file.name.replace('.json', ''), @@ -180,7 +203,6 @@ const importSampleChatflowsbyUserId = async (userid: string, type?: ChatflowType deployed: false, isPublic: false }; - chatflows.push(newChatflow); } const insertResponse = await importChatflows(chatflows); @@ -617,4 +639,4 @@ export default { getSinglePublicChatbotConfig, oneClickDeploymentService, updateDeploymentStatus -} +} \ No newline at end of file diff --git a/studio-frontend/packages/server/src/services/finetuning/index.ts b/studio-frontend/packages/server/src/services/finetuning/index.ts new file mode 100644 index 0000000..0fed1c8 --- /dev/null +++ b/studio-frontend/packages/server/src/services/finetuning/index.ts @@ -0,0 +1,752 @@ +import axios, { AxiosInstance } from 'axios' +import http from 'http' +import https from 'https' +import * as fs from 'fs' +import * as path from 'path' +import { exec } from 'child_process' +import { promisify } from 'util' +import { StatusCodes } from 'http-status-codes' +import { InternalFlowiseError } from '../../errors/internalFlowiseError' +import { getErrorMessage } from '../../errors/utils' +import { getRunningExpressApp } from '../../utils/getRunningExpressApp' +import { FineTuningJob } from '../../database/entities/FineTuningJob' +import logger from '../../utils/logger' + +const execAsync = promisify(exec) + +const FINETUNING_SERVICE_URL = process.env.FINETUNING_HOST ? `http://${process.env.FINETUNING_HOST}:8015` : 'undefined' +console.debug('finetuningService - FINETUNING_SERVICE_URL', FINETUNING_SERVICE_URL) + +// Create an axios client with keep-alive to reduce connection churn +const agentOptions = { keepAlive: true, maxSockets: 20 } +const httpAgent = new http.Agent(agentOptions) +const httpsAgent = new https.Agent(agentOptions) + +const axiosClient: AxiosInstance = axios.create({ + baseURL: FINETUNING_SERVICE_URL, + timeout: 60000, // increase timeout to 60s + httpAgent, + httpsAgent, + headers: { + 'Content-Type': 'application/json' + } +}) + +// In-memory mapping: filename (raw and decoded) -> { id, rawFilename } +const uploadedFileIdMap: Map = new Map() + +/** + * Helper function to zip a fine-tuning job output directory + * Checks if zip already exists and is up-to-date before creating a new one + * @param outputDir - Full path to the output directory for the job + * @param jobId - ID of the fine-tuning job + * @returns Path to the zipped file or null if failed + */ +const ensureFineTuningOutputZip = async (outputDir: string, jobId: string): Promise => { + try { + // eslint-disable-next-line no-console + console.debug(`finetuningService.ensureFineTuningOutputZip - processing output for job: ${jobId}`) + + // Validate output directory exists + if (!fs.existsSync(outputDir)) { + // eslint-disable-next-line no-console + console.warn(`finetuningService.ensureFineTuningOutputZip - output directory not found: ${outputDir}`) + return null + } + + const zipFilePath = `${outputDir}.zip` + const outputStats = fs.statSync(outputDir) + + // Check if zip exists and is up-to-date + if (fs.existsSync(zipFilePath)) { + const zipStats = fs.statSync(zipFilePath) + // If zip is newer than the output directory, skip re-zipping + if (zipStats.mtimeMs > outputStats.mtimeMs) { + // eslint-disable-next-line no-console + console.debug(`finetuningService.ensureFineTuningOutputZip - zip already up-to-date: ${zipFilePath}`) + return zipFilePath + } + // Remove outdated zip + try { + fs.unlinkSync(zipFilePath) + // eslint-disable-next-line no-console + console.debug(`finetuningService.ensureFineTuningOutputZip - removed outdated zip: ${zipFilePath}`) + } catch (e) { + // eslint-disable-next-line no-console + console.warn(`finetuningService.ensureFineTuningOutputZip - failed to remove outdated zip: ${e}`) + } + } + + // Create zip file using tar (more efficient than node zip libraries) + // eslint-disable-next-line no-console + console.debug(`finetuningService.ensureFineTuningOutputZip - starting to zip output for job ${jobId}`) + try { + const parentDir = path.dirname(outputDir) + const dirName = path.basename(outputDir) + const cmd = `cd "${parentDir}" && tar -czf "${path.basename(zipFilePath)}" "${dirName}"` + await execAsync(cmd, { + maxBuffer: 1024 * 1024 * 100, // 100MB buffer for large outputs + timeout: 600000 // 10 minute timeout + }) + + // eslint-disable-next-line no-console + console.debug(`finetuningService.ensureFineTuningOutputZip - zip created successfully for job ${jobId}: ${zipFilePath}`) + return zipFilePath + } catch (execErr: any) { + // eslint-disable-next-line no-console + console.error(`finetuningService.ensureFineTuningOutputZip - tar failed for job ${jobId}: ${execErr?.message || execErr}`) + return null + } + } catch (error: any) { + // eslint-disable-next-line no-console + console.error(`finetuningService.ensureFineTuningOutputZip - error: ${error?.message || error}`) + return null + } +} + +/** + * Upload a training file to the finetuning service + */ +const uploadTrainingFile = async (file: Express.Multer.File, purpose: string = 'fine-tune') => { + try { + // Create FormData using the browser/Node.js FormData API + const FormData = require('form-data') + const formData = new FormData() + + formData.append('file', file.buffer, { + filename: file.originalname, + contentType: file.mimetype + }) + formData.append('purpose', purpose) + + const response = await axios.post(`${FINETUNING_SERVICE_URL}/v1/files`, formData, { + headers: { + ...formData.getHeaders() + } + }) + + // Debug: log the response from the finetuning service for uploaded file + try { + // eslint-disable-next-line no-console + console.debug('finetuningService.uploadTrainingFile - response.data:', response.data) + } catch (logErr) { + // ignore logging errors + } + + return response.data + } catch (error: any) { + throw new InternalFlowiseError( + StatusCodes.INTERNAL_SERVER_ERROR, + `Error: finetuningService.uploadTrainingFile - ${getErrorMessage(error)}` + ) + } +} + +// Helper: persist or update a fine-tuning job record in the local DB +const persistJobToDb = async (jobData: any) => { + try { + if (!jobData) return + const appServer = getRunningExpressApp() + if (!appServer || !appServer.AppDataSource) return + + const repo = appServer.AppDataSource.getRepository(FineTuningJob) + + // Determine canonical id from the response + const id = jobData.id || jobData.job_id || jobData.fine_tuning_job_id || jobData.fine_tuning_id + if (!id) return + + // Build entity object mapping common fields; fall back to stringifying objects + let taskVal: any = jobData.task || undefined + try { + if (!taskVal && jobData.General) { + if (typeof jobData.General === 'string') { + const parsed = JSON.parse(jobData.General) + taskVal = parsed?.task || taskVal + } else if (typeof jobData.General === 'object') { + taskVal = jobData.General?.task || taskVal + } + } + } catch (e) { + // ignore parse errors + } + + const entity: any = { + id: String(id), + name: jobData.name || jobData.id || undefined, + model: jobData.model || undefined, + status: jobData.status || jobData.state || undefined, + training_file: jobData.training_file || jobData.trainingFile || undefined, + task: taskVal || undefined, + progress: typeof jobData.progress === 'number' ? jobData.progress : undefined, + trained_tokens: typeof jobData.trained_tokens === 'number' ? jobData.trained_tokens : undefined + } + + + if (jobData.hyperparameters) { + try { + entity.hyperparameters = typeof jobData.hyperparameters === 'object' ? JSON.stringify(jobData.hyperparameters) : String(jobData.hyperparameters) + } catch (e) {} + } + + if (jobData.result_files) { + try { + entity.result_files = typeof jobData.result_files === 'object' ? JSON.stringify(jobData.result_files) : String(jobData.result_files) + } catch (e) {} + } + + if (jobData.error) { + try { + entity.error = typeof jobData.error === 'object' ? JSON.stringify(jobData.error) : String(jobData.error) + } catch (e) {} + } + + if (jobData.estimated_finish) { + entity.estimated_finish = new Date(jobData.estimated_finish) + } + if (jobData.finishedDate || jobData.finished_at || jobData.completed_at) { + entity.finishedDate = new Date(jobData.finishedDate || jobData.finished_at || jobData.completed_at) + } + + // Upsert: merge if exists + let existing = await repo.findOneBy({ id: String(id) }) + if (!existing) { + const created = repo.create(entity) + await repo.save(created) + } else { + repo.merge(existing, entity) + await repo.save(existing) + } + } catch (e) { + // Don't fail the main flow if DB persistence fails; only log + try { + // eslint-disable-next-line no-console + console.error('finetuningService.persistJobToDb - failed to persist job', e) + } catch (logErr) { + // ignore + } + } +} + +// Helper: update specific fields for a job in the DB +const updateJobInDb = async (jobId: string, updates: Partial) => { + try { + if (!jobId) return + const appServer = getRunningExpressApp() + if (!appServer || !appServer.AppDataSource) return + const repo = appServer.AppDataSource.getRepository(FineTuningJob) + const existing = await repo.findOneBy({ id: String(jobId) }) + if (!existing) return + repo.merge(existing, updates) + await repo.save(existing) + } catch (e) { + try { + // eslint-disable-next-line no-console + console.error('finetuningService.updateJobInDb - failed to update job', jobId, e) + } catch (logErr) { + // ignore + } + } +} + +/** + * Create a fine-tuning job + */ +const createFineTuningJob = async (jobConfig: { + training_file: string + model: string + General?: { + task?: string + lora_config?: any + } + Dataset?: { + max_length?: number + query_max_len?: number + passage_max_len?: number + padding?: string + } + Training?: { + epochs?: number + batch_size?: number + gradient_accumulation_steps?: number + } +}) => { + try { + // Work with the jobConfig as-provided by the UI. + const forwardedJobConfig = { ...jobConfig } + + // (Removed verbose initial jobConfig logging to reduce noise) + const sanitizedPayload = JSON.parse(JSON.stringify(forwardedJobConfig)) + + // Remove empty nested objects that may confuse the server + if (sanitizedPayload.General && Object.keys(sanitizedPayload.General).length === 0) { + delete sanitizedPayload.General + } + if (sanitizedPayload.Dataset && Object.keys(sanitizedPayload.Dataset).length === 0) { + delete sanitizedPayload.Dataset + } + if (sanitizedPayload.Training && Object.keys(sanitizedPayload.Training).length === 0) { + delete sanitizedPayload.Training + } + + if (sanitizedPayload.training_file && typeof sanitizedPayload.training_file === 'string') { + const originalFilename = sanitizedPayload.training_file + + // Try to decode first in case it's URL-encoded + let lookupKey = originalFilename + try { + const decoded = decodeURIComponent(originalFilename) + lookupKey = decoded + } catch (e) { + // ignore decode errors + } + + // Check if we have a stored mapping from the upload + let stored = uploadedFileIdMap.get(lookupKey) + if (!stored && lookupKey !== originalFilename) { + // Also try the original (encoded) key + stored = uploadedFileIdMap.get(originalFilename) + } + + if (stored && stored.rawFilename) { + sanitizedPayload.training_file = stored.rawFilename + } + } + + // Try a sequence of attempts to accommodate naming/encoding/id differences. + const attemptPost = async (payload: any, label = 'attempt') => { + try { + // eslint-disable-next-line no-console + console.debug(`finetuningService.createFineTuningJob - ${label} payload:`, payload) + const resp = await axiosClient.post('/v1/fine_tuning/jobs', payload) + // eslint-disable-next-line no-console + console.debug(`finetuningService.createFineTuningJob - ${label} response:`, typeof resp?.data === 'string' ? resp.data : JSON.stringify(resp?.data)) + return resp + } catch (err: any) { + // Log detailed info for debugging + try { + // eslint-disable-next-line no-console + console.error(`finetuningService.createFineTuningJob - ${label} failed`, { + message: err?.message, + status: err?.response?.status, + responseData: typeof err?.response?.data === 'string' ? err.response.data : JSON.stringify(err?.response?.data), + payload + }) + } catch (logErr) { + // ignore logging errors + } + throw err + } + } + + // Send the sanitized payload + const resp = await attemptPost(sanitizedPayload, 'final') + const respData = resp.data + // If the external service didn't echo back the task, preserve task from our sanitized payload + try { + const payloadTask = sanitizedPayload?.General?.task || sanitizedPayload?.task + if (payloadTask && !respData.task) { + // attach task so persistJobToDb stores it + try { respData.task = payloadTask } catch (e) { /* ignore */ } + } + } catch (e) { + // ignore + } + + // Persist to local DB + try { + await persistJobToDb(respData) + } catch (e) { + // ignore + } + return respData + } catch (error: any) { + // Log error details from external service if available for debugging + try { + // eslint-disable-next-line no-console + console.error('finetuningService.createFineTuningJob - axios error:', { + message: error.message, + responseData: error.response ? (typeof error.response.data === 'string' ? error.response.data : JSON.stringify(error.response.data)) : undefined, + status: error.response ? error.response.status : undefined, + headers: error.response ? error.response.headers : undefined + }) + } catch (logErr) { + // ignore logging errors + } + throw new InternalFlowiseError( + StatusCodes.INTERNAL_SERVER_ERROR, + `Error: finetuningService.createFineTuningJob - ${getErrorMessage(error)}` + ) + } +} + +/** + * List all fine-tuning jobs + */ +const listFineTuningJobs = async () => { + try { + // First try to read persisted jobs from local DB + try { + const appServer = getRunningExpressApp() + const repo = appServer.AppDataSource.getRepository(FineTuningJob) + const persisted = await repo.find() + if (persisted && persisted.length > 0) { + return persisted + } + } catch (e) { + // If DB read fails, we'll fall back to external service + // eslint-disable-next-line no-console + console.debug('finetuningService.listFineTuningJobs - DB read failed, falling back to external service', e) + } + + // Fallback: query external finetuning service and persist results + const response = await axiosClient.get('/v1/fine_tuning/jobs') + const data = response.data + try { + if (Array.isArray(data)) { + for (const j of data) { + // best-effort persist + // eslint-disable-next-line no-await-in-loop + await persistJobToDb(j) + } + } + } catch (e) { + // ignore persistence errors + } + + return data + } catch (error: any) { + throw new InternalFlowiseError( + StatusCodes.INTERNAL_SERVER_ERROR, + `Error: finetuningService.listFineTuningJobs - ${getErrorMessage(error)}` + ) + } +} + +/** + * Retrieve a specific fine-tuning job + */ +const retrieveFineTuningJob = async (fineTuningJobId: string) => { + const maxAttempts = 3 + const baseDelayMs = 500 + + const sleep = (ms: number) => new Promise(resolve => setTimeout(resolve, ms)) + + for (let attempt = 1; attempt <= maxAttempts; attempt++) { + try { + const response = await axiosClient.post('/v1/fine_tuning/jobs/retrieve', { + fine_tuning_job_id: fineTuningJobId + }) + const respData = response.data + // Persist/update DB with latest status + try { + await persistJobToDb(respData) + } catch (e) { + // ignore + } + return respData + } catch (error: any) { + const msg = getErrorMessage(error) + const isTransient = msg && ( + msg.toLowerCase().includes('socket hang up') || + msg.toLowerCase().includes('econnreset') || + msg.toLowerCase().includes('etimedout') || + msg.toLowerCase().includes('timeout') || + msg.toLowerCase().includes('connect') + ) + + if (attempt < maxAttempts && isTransient) { + const delay = baseDelayMs * Math.pow(2, attempt - 1) + // back off and retry silently + // eslint-disable-next-line no-await-in-loop + await sleep(delay) + continue + } + + // Only log a concise warning when this is the final attempt + if (attempt === maxAttempts) { + logger.warn(`finetuningService.retrieveFineTuningJob - final attempt ${attempt} failed for job ${fineTuningJobId}: ${msg}`) + } + + // Final failure: log details and throw + try { + logger.error('finetuningService.retrieveFineTuningJob - error details:', { + message: error?.message, + status: error?.response?.status, + responseData: error?.response?.data + }) + } catch (logErr) { + // ignore logging errors + } + + throw new InternalFlowiseError( + StatusCodes.INTERNAL_SERVER_ERROR, + `Error: finetuningService.retrieveFineTuningJob - ${msg}` + ) + } + } + + throw new InternalFlowiseError( + StatusCodes.INTERNAL_SERVER_ERROR, + `Error: finetuningService.retrieveFineTuningJob - failed after ${maxAttempts} attempts` + ) +} + +/** + * Cancel a fine-tuning job + */ +const cancelFineTuningJob = async (fineTuningJobId: string) => { + try { + const response = await axiosClient.post('/v1/fine_tuning/jobs/cancel', { + fine_tuning_job_id: fineTuningJobId + }) + // Update local DB to reflect cancelled status + try { + await updateJobInDb(fineTuningJobId, { status: 'cancelled', finishedDate: new Date() }) + } catch (e) { + // ignore + } + return response.data + } catch (error: any) { + throw new InternalFlowiseError( + StatusCodes.INTERNAL_SERVER_ERROR, + `Error: finetuningService.cancelFineTuningJob - ${getErrorMessage(error)}` + ) + } +} + +/** + * Delete a fine-tuning job locally and attempt to cancel it remotely. + * This will cancel the external job and remove DB records for the job and checkpoints. + */ +const deleteFineTuningJob = async (fineTuningJobId: string) => { + try { + // Attempt to cancel external job + try { + await axiosClient.post('/v1/fine_tuning/jobs/cancel', { + fine_tuning_job_id: fineTuningJobId + }) + } catch (e) { + // ignore external cancel errors + try { + // eslint-disable-next-line no-console + console.debug('finetuningService.deleteFineTuningJob - external cancel failed, continuing to delete locally', e) + } catch (logErr) {} + } + + // Remove local DB records + try { + const appServer = getRunningExpressApp() + const repo = appServer.AppDataSource.getRepository(FineTuningJob) + + // delete job + await repo.delete({ id: String(fineTuningJobId) }) + } catch (e) { + try { + // eslint-disable-next-line no-console + console.error('finetuningService.deleteFineTuningJob - failed to delete local DB records', e) + } catch (logErr) {} + } + + // Attempt to remove any output files/directories for this job under /tmp/finetuning/output + try { + const OUTPUT_BASE_DIR = '/tmp/finetuning/output' + const jobOutputDir = path.join(OUTPUT_BASE_DIR, String(fineTuningJobId)) + const resolvedJobDir = path.resolve(jobOutputDir) + const resolvedBaseDir = path.resolve(OUTPUT_BASE_DIR) + + // Safety: ensure the resolved path is within the expected base directory + if (resolvedJobDir.startsWith(resolvedBaseDir)) { + // Remove directory recursively if it exists + if (fs.existsSync(resolvedJobDir)) { + try { + // Use fs.rmSync when available; fallback to recursive unlink if necessary + if (typeof fs.rmSync === 'function') { + fs.rmSync(resolvedJobDir, { recursive: true, force: true }) + } else { + // older Node versions: remove files inside then rmdir + const rimraf = require('rimraf') + rimraf.sync(resolvedJobDir) + } + // eslint-disable-next-line no-console + console.debug(`finetuningService.deleteFineTuningJob - removed output dir: ${resolvedJobDir}`) + } catch (rmErr) { + try { console.warn('finetuningService.deleteFineTuningJob - failed to remove output dir', rmErr) } catch (ignore) {} + } + } + + // Also remove zip file if present + const zipPath = `${resolvedJobDir}.zip` + if (fs.existsSync(zipPath)) { + try { + fs.unlinkSync(zipPath) + // eslint-disable-next-line no-console + console.debug(`finetuningService.deleteFineTuningJob - removed zip: ${zipPath}`) + } catch (zipErr) { + try { console.warn('finetuningService.deleteFineTuningJob - failed to remove zip file', zipErr) } catch (ignore) {} + } + } + } else { + try { console.warn('finetuningService.deleteFineTuningJob - output path outside base dir, skipping removal:', resolvedJobDir) } catch (ignore) {} + } + } catch (e) { + try { console.warn('finetuningService.deleteFineTuningJob - error while removing output files', e) } catch (ignore) {} + } + + return { success: true } + } catch (error: any) { + throw new InternalFlowiseError( + StatusCodes.INTERNAL_SERVER_ERROR, + `Error: finetuningService.deleteFineTuningJob - ${getErrorMessage(error)}` + ) + } +} + +/** + * Download fine-tuning job output as a zip file + * Creates zip if needed, or returns existing zip immediately + * @param jobId - ID of the fine-tuning job + * @returns Path to the zipped file or null if not found + */ +const downloadFineTuningOutput = async (jobId: string): Promise => { + try { + if (!jobId) { + throw new InternalFlowiseError(StatusCodes.BAD_REQUEST, 'Job ID is required') + } + + const OUTPUT_BASE_DIR = '/tmp/finetuning/output' + const jobOutputDir = path.join(OUTPUT_BASE_DIR, jobId) + + // eslint-disable-next-line no-console + console.debug(`finetuningService.downloadFineTuningOutput - checking for output: ${jobOutputDir}`) + + // Verify job output directory exists + if (!fs.existsSync(jobOutputDir)) { + // eslint-disable-next-line no-console + console.warn(`finetuningService.downloadFineTuningOutput - output directory not found: ${jobOutputDir}`) + throw new InternalFlowiseError(StatusCodes.NOT_FOUND, `Fine-tuning job output not found for job: ${jobId}`) + } + + // Security check: ensure path is within the expected directory + const resolvedJobDir = path.resolve(jobOutputDir) + const resolvedBaseDir = path.resolve(OUTPUT_BASE_DIR) + if (!resolvedJobDir.startsWith(resolvedBaseDir)) { + // eslint-disable-next-line no-console + console.error(`finetuningService.downloadFineTuningOutput - path traversal attempt: ${jobOutputDir}`) + throw new InternalFlowiseError(StatusCodes.FORBIDDEN, 'Invalid job output path') + } + + // Ensure the output is zipped (returns immediately if zip is up-to-date) + const finalZipPath = await ensureFineTuningOutputZip(jobOutputDir, jobId) + if (!finalZipPath) { + throw new InternalFlowiseError( + StatusCodes.INTERNAL_SERVER_ERROR, + `Failed to create zip for job ${jobId}` + ) + } + + // eslint-disable-next-line no-console + console.debug(`finetuningService.downloadFineTuningOutput - file ready for download: ${finalZipPath}`) + return finalZipPath + } catch (error: any) { + if (error instanceof InternalFlowiseError) { + throw error + } + // eslint-disable-next-line no-console + console.error(`finetuningService.downloadFineTuningOutput - error: ${error?.message || error}`) + throw new InternalFlowiseError( + StatusCodes.INTERNAL_SERVER_ERROR, + `Error: finetuningService.downloadFineTuningOutput - ${getErrorMessage(error)}` + ) + } +} + +/** + * Get logs for a fine-tuning job by querying the Ray head node HTTP API. + * It will call: http:///api/jobs//logs + * Environment: set RAY_HEAD_NODE to the host:port of the Ray head (e.g. "ray-head.example.com:8265"). + */ +const getFineTuningJobLogs = async ( + fineTuningJobId: string, + options: { ray_job_id?: string } = {} +) => { + try { + const rayHost = process.env.FINETUNING_HOST ? `${process.env.FINETUNING_HOST}:8265` : 'undefined' + + // If caller provided an explicit ray_job_id, use it. Otherwise attempt to discover the Ray submission id + let submissionId: string | undefined = options.ray_job_id + + // Query Ray /api/jobs/ and select entries where entrypoint contains the FT id (jq-like) + const listUrl = `http://${rayHost}/api/jobs/` + console.debug('finetuningService.getFineTuningJobLogs - listUrl:', listUrl) + try { + const listResp = await axios.get(listUrl, { timeout: 20000 }) + // Debug: log status and length of Ray /api/jobs/ output; full dump only when explicitly enabled + try { + const raw = listResp.data + const len = typeof raw === 'string' ? raw.length : JSON.stringify(raw).length + console.debug('finetuningService.getFineTuningJobLogs - Ray /api/jobs/ status=', listResp.status, 'len=', len) + if (String(process.env.RAY_DUMP_JOBS).toLowerCase() === 'true') { + try { + const pretty = typeof raw === 'string' ? raw : JSON.stringify(raw, null, 2) + console.debug('finetuningService.getFineTuningJobLogs - Ray /api/jobs/ FULL DUMP:\n' + pretty) + } catch (e) { + try { console.debug('finetuningService.getFineTuningJobLogs - failed to stringify full Ray jobs list', String(e)) } catch (ignore) {} + } + } + } catch (logErr) { + try { console.debug('finetuningService.getFineTuningJobLogs - failed to inspect Ray jobs list', String(logErr)) } catch (ignore) {} + } + const jobsList = Array.isArray(listResp.data) ? listResp.data : [] + // Apply strict filter: entrypoint contains the exact FT id + const match = jobsList.find((j: any) => { + try { + const entrypoint = j?.entrypoint || '' + return String(entrypoint).includes(String(fineTuningJobId)) + } catch (e) { + return false + } + }) + if (match) { + submissionId = match.submission_id || match.job_id + } + } catch (e) { + try { console.error('finetuningService.getFineTuningJobLogs - failed to list Ray jobs', String(e)) } catch (err) {} + } + + // Construct logs URL with optional tail and fetch logs + const url = `http://${rayHost}/api/jobs/${encodeURIComponent(String(submissionId))}/logs` + const resp = await axios.get(url, { timeout: 30000 }) + // Normalize logs response so newlines are preserved and objects/arrays are readable + try { + const rawLogs = resp.data + if (typeof rawLogs === 'string') { + // string likely contains proper newlines + return { logs: rawLogs } + } + if (Array.isArray(rawLogs)) { + return { logs: rawLogs.join('\n') } + } + // object -> pretty-print with indentation to preserve newlines + return { logs: JSON.stringify(rawLogs, null, 2) } + } catch (e) { + // fallback to safe stringify + return { logs: JSON.stringify(resp.data, null, 2) } + } + } catch (error: any) { + // Provide helpful error details and return a structured error instead of throwing + const msg = `Error fetching logs: ${getErrorMessage(error)}` + try { (globalThis as any).console?.error && (globalThis as any).console.error('finetuningService.getFineTuningJobLogs -', String(error)) } catch (e) {} + return { logs: '', error: msg } + } +} + +export default { + uploadTrainingFile, + createFineTuningJob, + listFineTuningJobs, + retrieveFineTuningJob, + cancelFineTuningJob, + deleteFineTuningJob, + getFineTuningJobLogs, + downloadFineTuningOutput +} diff --git a/studio-frontend/packages/server/src/utils/webSocketDownloadManager.ts b/studio-frontend/packages/server/src/utils/webSocketDownloadManager.ts new file mode 100644 index 0000000..e69de29 diff --git a/studio-frontend/packages/server/src/ws/finetuningDownload.ts b/studio-frontend/packages/server/src/ws/finetuningDownload.ts new file mode 100644 index 0000000..484d2e8 --- /dev/null +++ b/studio-frontend/packages/server/src/ws/finetuningDownload.ts @@ -0,0 +1,172 @@ +import { Server, Socket } from 'socket.io' +import finetuningService from '../services/finetuning' +import logger from '../utils/logger' + +// Declare timer globals so this file compiles regardless of lib settings +declare function setTimeout(cb: (...args: any[]) => void, ms?: number): any +declare function clearTimeout(id: any): void + +/** + * Setup WebSocket handlers for fine-tuning output downloads + * This allows non-blocking, asynchronous zip creation and download + */ +export const setupFineTuningDownloadHandlers = (io: Server) => { + + logger.info('[WS Download] Setting up fine-tuning download namespace: /finetuning-download') + + // Create a dedicated namespace so download sockets don't mix with other WS handlers + const nsp = io.of('/finetuning-download') + + /** + * In-memory tracking of ongoing download tasks so multiple sockets can + * subscribe to the same job and reconnect (page refresh) without losing state. + * + * Map, + * downloadUrl?: string, + * fileName?: string, + * error?: string, + * timeoutHandle?: any + * }> + */ + const downloadTasks = new Map() + + // Grace period to keep completed task info for late reconnects (ms) + const COMPLETED_TASK_RETENTION_MS = 60 * 1000 // 60s + + nsp.on('connection', (socket: Socket) => { + logger.info(`[WS Download] Client connected - Socket ID: ${socket.id}`) + + const attachSubscriber = (jobId: string) => { + let task = downloadTasks.get(jobId) + if (!task) { + task = { + status: 'starting', + subscribers: new Set(), + downloadUrl: null, + fileName: null, + error: null, + timeoutHandle: null + } + downloadTasks.set(jobId, task) + } + + task.subscribers.add(socket) + return task + } + + // Handle fine-tuning output download request + // Client sends: { jobId: string } + socket.on('download-finetuning-output', async (data: { jobId: string }) => { + try { + const { jobId } = data + logger.info(`[WS Download] Download requested - Socket ID: ${socket.id}, Job ID: ${jobId}`) + + if (!jobId) { + socket.emit('download-finetuning-error', { + jobId: null, + error: 'Job ID is required' + }) + return + } + + // Attach this socket as a subscriber for this job + const task = attachSubscriber(jobId) + + // If task already completed, reply immediately with complete event + if (task.status === 'complete') { + socket.emit('download-finetuning-complete', { + jobId, + downloadUrl: task.downloadUrl, + fileName: task.fileName + }) + return + } + + // Emit current progress state to the newly connected socket + socket.emit('download-finetuning-progress', { + jobId, + status: task.status, + message: task.status === 'starting' ? 'Preparing download...' : 'Creating zip archive (this may take a few minutes)' + }) + + // If task is already zipping or starting and has a running promise, do nothing else + if (task.promise) { + // existing background work will notify subscribers when done + return + } + + // Kick off the async preparation and store the promise so others can join + task.status = 'zipping' + task.promise = (async () => { + try { + // Call the service to prepare the zip file (returns path) + const zipFilePath = await finetuningService.downloadFineTuningOutput(jobId) + + if (!zipFilePath) { + task.status = 'error' + task.error = 'Failed to create output archive' + // Notify all subscribers + task.subscribers.forEach((s: Socket) => { + s.emit('download-finetuning-error', { jobId, error: task.error }) + }) + return + } + + task.status = 'complete' + task.downloadUrl = `/api/v1/finetuning/download-ft/${jobId}` + task.fileName = `${jobId}-output.zip` + + logger.info(`[WS Download] Download ready for job: ${jobId}`) + + // Emit completion to all current subscribers + task.subscribers.forEach((s: Socket) => { + s.emit('download-finetuning-complete', { + jobId, + downloadUrl: task.downloadUrl, + fileName: task.fileName + }) + }) + + // Schedule cleanup of the completed task after retention period + task.timeoutHandle = setTimeout(() => { + downloadTasks.delete(jobId) + }, COMPLETED_TASK_RETENTION_MS) + + } catch (error: any) { + task.status = 'error' + task.error = error?.message || String(error) + logger.error(`[WS Download] Error preparing download for job ${jobId}: ${task.error}`) + task.subscribers.forEach((s: Socket) => { + s.emit('download-finetuning-error', { jobId, error: task.error }) + }) + // cleanup soon + task.timeoutHandle = setTimeout(() => { + downloadTasks.delete(jobId) + }, 5000) + } + })() + } catch (error: any) { + const errorMessage = error?.message || String(error) || 'Unknown error' + logger.error(`[WS Download] Handler error: ${errorMessage}`) + socket.emit('download-finetuning-error', { + jobId: data?.jobId || null, + error: errorMessage + }) + } + }) + + socket.on('disconnect', (reason: any) => { + logger.info(`[WS Download] Client disconnected - Socket ID: ${socket.id}, Reason: ${reason}`) + // Remove this socket from all task subscriber lists + downloadTasks.forEach((task, jobId) => { + if (task.subscribers && task.subscribers.has(socket)) { + task.subscribers.delete(socket) + } + }) + }) + + logger.debug(`[WS Download] Fine-tuning download handlers attached to socket ${socket.id}`) + }) +} diff --git a/studio-frontend/packages/server/src/ws/finetuningStatus.ts b/studio-frontend/packages/server/src/ws/finetuningStatus.ts new file mode 100644 index 0000000..3a78788 --- /dev/null +++ b/studio-frontend/packages/server/src/ws/finetuningStatus.ts @@ -0,0 +1,226 @@ +import { Server, Socket } from 'socket.io' +import finetuningService from '../services/finetuning' +import logger from '../utils/logger' + +// Declare timer globals so this file compiles regardless of lib settings +declare function setInterval(cb: (...args: any[]) => void, ms?: number): any +declare function clearInterval(id: any): void +declare function setTimeout(cb: (...args: any[]) => void, ms?: number): any + +// Store active job subscriptions: jobId -> Set of socket IDs +const jobSubscriptions = new Map>() + +// Background monitoring state +let monitoringInterval: any | null = null +const POLLING_INTERVAL = 5000 // 5 seconds - backend polls Ray API + +/** + * Setup WebSocket handlers for fine-tuning job status monitoring + * Clients can subscribe to specific job updates and receive real-time status changes + */ +export const setupFineTuningStatusHandlers = (io: Server) => { + io.on('connection', (socket: Socket) => { + logger.info(`[WS Status] Client connected - Socket ID: ${socket.id}`) + + /** + * Subscribe to job status updates + * Client sends: { jobIds: string[] } + * Server will emit 'job-status-update' events for these jobs + */ + socket.on('subscribe-job-status', (data: { jobIds: string[] }) => { + try { + const { jobIds } = data + + if (!Array.isArray(jobIds) || jobIds.length === 0) { + return + } + + // Add this socket to each job's subscription set + jobIds.forEach(jobId => { + if (!jobSubscriptions.has(jobId)) { + jobSubscriptions.set(jobId, new Set()) + } + jobSubscriptions.get(jobId)!.add(socket.id) + }) + + // Start background monitoring if not already running + startBackgroundMonitoring(io) + + // Send immediate acknowledgment + socket.emit('subscription-confirmed', { + jobIds, + message: 'Subscribed to job updates' + }) + + } catch (error: any) { + socket.emit('subscription-error', { + error: error?.message || 'Failed to subscribe' + }) + } + }) + + /** + * Unsubscribe from job status updates + * Client sends: { jobIds: string[] } + */ + socket.on('unsubscribe-job-status', (data: { jobIds: string[] }) => { + try { + const { jobIds } = data + + if (!Array.isArray(jobIds)) return + + jobIds.forEach(jobId => { + const subscribers = jobSubscriptions.get(jobId) + if (subscribers) { + subscribers.delete(socket.id) + if (subscribers.size === 0) { + jobSubscriptions.delete(jobId) + } + } + }) + + // Stop monitoring if no more subscriptions + if (jobSubscriptions.size === 0) { + stopBackgroundMonitoring() + } + + } catch (error: any) { + // Silent error handling + } + }) + + /** + * Handle client disconnect - clean up subscriptions + */ + socket.on('disconnect', (reason: any) => { + logger.info(`[WS Status] Client disconnected - Socket ID: ${socket.id}`) + + // Remove this socket from all job subscriptions + let removedCount = 0 + jobSubscriptions.forEach((subscribers, jobId) => { + if (subscribers.has(socket.id)) { + subscribers.delete(socket.id) + removedCount++ + if (subscribers.size === 0) { + jobSubscriptions.delete(jobId) + } + } + }) + + // Stop monitoring if no more subscriptions + if (jobSubscriptions.size === 0) { + stopBackgroundMonitoring() + } + }) + }) +} + +/** + * Start background monitoring of subscribed jobs + * Polls the fine-tuning service and emits updates via WebSocket + */ +function startBackgroundMonitoring(io: Server) { + // Already running + if (monitoringInterval) return + + // Poll immediately, then at regular intervals + checkJobStatuses(io) + + // Use global.setInterval to satisfy TypeScript without depending on DOM lib + // store as any to avoid NodeJS type issues in this repository's tsconfig + monitoringInterval = (setInterval(() => { + checkJobStatuses(io) + }, POLLING_INTERVAL) as unknown) as any +} + +/** + * Stop background monitoring + */ +function stopBackgroundMonitoring() { + if (!monitoringInterval) return + + clearInterval(monitoringInterval as any) + monitoringInterval = null +} + +/** + * Check status of all subscribed jobs and emit updates + */ +async function checkJobStatuses(io: Server) { + const jobIds = Array.from(jobSubscriptions.keys()) + + if (jobIds.length === 0) { + stopBackgroundMonitoring() + return + } + + // Keep routine checks quiet - debug level only + logger.debug(`[WS Status] Checking ${jobIds.length} subscribed jobs`) + + // Retrieve all subscribed jobs in parallel (non-blocking) + const promises = jobIds.map(async (jobId) => { + try { + const jobData = await finetuningService.retrieveFineTuningJob(jobId) + return { jobId, jobData, error: null } + } catch (error: any) { + logger.error(`[WS Status] Error retrieving job ${jobId}: ${error?.message || error}`) + return { jobId, jobData: null, error: error?.message || 'Failed to retrieve job' } + } + }) + + const results = await Promise.allSettled(promises) + + // Emit updates to subscribed clients + results.forEach((result) => { + if (result.status === 'rejected') { + logger.error(`[WS Status] Promise rejected: ${result.reason}`) + return + } + + const { jobId, jobData, error } = result.value + + // Get subscribers for this job + const subscribers = jobSubscriptions.get(jobId) + if (!subscribers || subscribers.size === 0) return + + if (error || !jobData) { + // Emit error to subscribers + subscribers.forEach(socketId => { + io.to(socketId).emit('job-status-error', { + jobId, + error: error || 'No data returned' + }) + }) + return + } + + // Normalize job data + const normalizedJob = { + id: jobData.id || jobData.job_id || jobData.fine_tuning_job_id || jobId, + name: jobData.name || jobData.id || jobId, + status: jobData.status || jobData.state || 'unknown', + model: jobData.model || 'N/A', + dataset: jobData.dataset || jobData.training_file || jobData.trainingFile || 'N/A', + createdDate: jobData.createdDate || jobData.created_at || jobData.createdAt || new Date().toISOString(), + // Include all original data + ...jobData + } + + // Emit update to all subscribers + subscribers.forEach(socketId => { + io.to(socketId).emit('job-status-update', normalizedJob) + }) + + // If job is no longer running, automatically unsubscribe after a delay + const finalStatuses = ['succeeded', 'completed', 'failed', 'cancelled', 'canceled'] + if (finalStatuses.includes((normalizedJob.status || '').toLowerCase())) { + // Delay cleanup slightly to allow any final events to be delivered + setTimeout(() => { + const subs = jobSubscriptions.get(jobId) + if (subs) { + jobSubscriptions.delete(jobId) + } + }, 10000) // Keep sending updates for 10 more seconds, then clean up + } + }) +} diff --git a/studio-frontend/packages/ui/public/silent-check-sso.html b/studio-frontend/packages/ui/public/silent-check-sso.html new file mode 100644 index 0000000..fb906ac --- /dev/null +++ b/studio-frontend/packages/ui/public/silent-check-sso.html @@ -0,0 +1,16 @@ + + + + Silent Check SSO + + + + + diff --git a/studio-frontend/packages/ui/src/KeycloakContext.jsx b/studio-frontend/packages/ui/src/KeycloakContext.jsx index 9753ee6..48b9001 100644 --- a/studio-frontend/packages/ui/src/KeycloakContext.jsx +++ b/studio-frontend/packages/ui/src/KeycloakContext.jsx @@ -1,51 +1,89 @@ import React, { createContext, useContext, useEffect, useState } from 'react'; -import Keycloak from 'keycloak-js'; // Create the Keycloak context const KeycloakContext = createContext(null); +// Check if Keycloak is disabled via environment variable +const isKeycloakDisabled = import.meta.env.VITE_DISABLE_KEYCLOAK === 'true'; +console.log('isKeycloakDisabled: ', isKeycloakDisabled); + +// Simple user object for when Keycloak is disabled +const createAdminUser = () => ({ + authenticated: true, + tokenParsed: { + email: 'admin@admin.com', + preferred_username: 'admin', + name: 'Admin User', + given_name: 'Admin', + family_name: 'User', + resource_access: { + genaistudio: { + roles: ['admin'] + } + } + }, + logout: () => { + console.log('Logout called - refreshing page'); + window.location.href = '/'; + } +}); + // Provide the Keycloak context to the application export const KeycloakProvider = ({ children }) => { const [keycloak, setKeycloak] = useState(null); const [isInitialized, setIsInitialized] = useState(false); useEffect(() => { - if (!window.crypto || !window.crypto.subtle) { - console.error("Web Crypto API is not available. This may cause security issues."); + // If Keycloak is disabled, use simple admin user + if (isKeycloakDisabled) { + console.info("Keycloak authentication is disabled. Using admin@admin.com as default user."); + const adminUser = createAdminUser(); + setKeycloak(adminUser); + setIsInitialized(true); + return; } - const initOptions = { - url: '/auth/', - realm: 'genaistudio', - clientId: 'genaistudio', - onLoad: 'login-required', // check-sso | login-required - responseType: 'code', // Corrected from KeycloakResponseType to responseType - silentCheckSsoRedirectUri: window.location.origin + "/silent-check-sso.html", - checkLoginIframe: false, - }; - - const kc = new Keycloak(initOptions); - - kc.init({ - onLoad: initOptions.onLoad, - responseType: 'code', // Corrected from KeycloakResponseType to responseType - }).then((auth) => { - if (!auth) { - window.location.reload(); - } else { - console.info("Authenticated"); - console.log('auth', auth); - console.log('Keycloak', kc); - - kc.onTokenExpired = () => { - console.log('token expired'); - }; - - setKeycloak(kc); // Set the Keycloak instance in state - setIsInitialized(true); // Mark initialization as complete + // Keycloak is enabled - dynamically import and initialize + import('keycloak-js').then((KeycloakModule) => { + const Keycloak = KeycloakModule.default; + + if (!window.crypto || !window.crypto.subtle) { + console.error("Web Crypto API is not available. This may cause security issues."); } - }).catch((error) => { - console.error("Authentication Failed", error); + + const initOptions = { + url: '/auth/', + realm: 'genaistudio', + clientId: 'genaistudio', + onLoad: 'login-required', + responseType: 'code', + silentCheckSsoRedirectUri: window.location.origin + "/silent-check-sso.html", + checkLoginIframe: false, + }; + + const kc = new Keycloak(initOptions); + + kc.init({ + onLoad: initOptions.onLoad, + responseType: 'code', + }).then((auth) => { + if (!auth) { + window.location.reload(); + } else { + console.info("Authenticated with Keycloak"); + console.log('auth', auth); + console.log('Keycloak', kc); + + kc.onTokenExpired = () => { + console.log('token expired'); + }; + + setKeycloak(kc); + setIsInitialized(true); + } + }).catch((error) => { + console.error("Authentication Failed", error); + }); }); }, []); diff --git a/studio-frontend/packages/ui/src/api/client.js b/studio-frontend/packages/ui/src/api/client.js index d2dd873..7606ec2 100644 --- a/studio-frontend/packages/ui/src/api/client.js +++ b/studio-frontend/packages/ui/src/api/client.js @@ -4,7 +4,7 @@ import { baseURL } from '@/store/constant' const apiClient = axios.create({ baseURL: `${baseURL}/api/v1`, headers: { - 'Content-type': 'application/json', + 'x-request-from': 'internal' } }) diff --git a/studio-frontend/packages/ui/src/api/finetuning.js b/studio-frontend/packages/ui/src/api/finetuning.js new file mode 100644 index 0000000..b3f4e4a --- /dev/null +++ b/studio-frontend/packages/ui/src/api/finetuning.js @@ -0,0 +1,299 @@ +import client from './client' +import { io } from 'socket.io-client' +import { baseURL as apiBaseURL } from '@/store/constant' + +// Get the base URL for WebSocket connection +const getSocketUrl = () => { + // Use the base URL from constants (without /api/v1) + return apiBaseURL || window.location.origin +} + +// Track active download sockets per jobId to avoid duplicate connections +const downloadSocketMap = new Map() + +const finetuningApi = { + // Upload training file + uploadFile: (file, purpose = 'fine-tune', onUploadProgress) => { + const formData = new FormData() + formData.append('file', file) + formData.append('purpose', purpose) + + return client.post('/finetuning/files', formData, { + // DO NOT set Content-Type here; letting axios set it ensures the multipart boundary is included + onUploadProgress + }) + }, + + // Create new fine-tuning job + createJob: (jobData) => { + const payload = { + training_file: jobData.training_file, + model: jobData.model + } + + // Add optional General configuration + if (jobData.General) { + payload.General = jobData.General + } + + // Add optional Dataset configuration + if (jobData.Dataset) { + payload.Dataset = jobData.Dataset + } + + // Add optional Training configuration + if (jobData.Training) { + payload.Training = jobData.Training + } + + return client.post('/finetuning/jobs', payload) + }, + + // List all fine-tuning jobs + getAllJobs: () => client.get('/finetuning/jobs'), + + // Retrieve specific fine-tuning job + getJob: (fineTuningJobId) => { + return client.post('/finetuning/jobs/retrieve', { + fine_tuning_job_id: fineTuningJobId + }) + }, + + // Cancel a fine-tuning job + cancelJob: (fineTuningJobId) => { + return client.post('/finetuning/jobs/cancel', { + fine_tuning_job_id: fineTuningJobId + }) + }, + + // Get logs for a fine-tuning job + getJobLogs: (fineTuningJobId, opts = {}) => { + return client.post('/finetuning/jobs/logs', { + fine_tuning_job_id: fineTuningJobId, + ray_job_id: opts.ray_job_id + }) + }, + + // Download fine-tuning job output as a zip file + // This returns a blob that can be saved as a file + // Accepts optional `onDownloadProgress` callback (progress event) and `signal` (AbortSignal) + downloadFinetuningOutput: (jobId, onDownloadProgress = undefined, signal = undefined) => { + const cfg = { + responseType: 'blob', + // allow long-running / large downloads + timeout: 0, + maxContentLength: Infinity, + maxBodyLength: Infinity + } + if (typeof onDownloadProgress === 'function') cfg.onDownloadProgress = onDownloadProgress + if (signal) cfg.signal = signal + return client.get(`/finetuning/download-ft/${encodeURIComponent(jobId)}`, cfg) + }, + + /** + * Download fine-tuning output using WebSocket for async zip preparation + * Each download gets its own dedicated WebSocket connection + * No timeout - waits indefinitely until zip is ready + * @param {string} jobId - The fine-tuning job ID + * @param {Object} callbacks - Callback functions { onProgress, onComplete, onError } + * @returns {Function} Cleanup function to disconnect this socket + */ + downloadFinetuningOutputWS: (jobId, callbacks = {}) => { + const { onProgress, onComplete, onError } = callbacks + + // Reuse existing socket for this jobId if present (even if not yet connected). + // This prevents duplicate sockets when React StrictMode mounts components twice. + const existingSocket = downloadSocketMap.get(jobId) + if (existingSocket) { + console.log(`[WS Download ${jobId}] Reusing existing socket (id: ${existingSocket.id || 'pending'})`) + // Attach provided callbacks to the existing socket + if (onProgress) existingSocket.on('download-finetuning-progress', onProgress) + if (onComplete) existingSocket.on('download-finetuning-complete', onComplete) + if (onError) existingSocket.on('download-finetuning-error', onError) + + // Return cleanup that detaches these listeners + return () => { + try { + if (onProgress) existingSocket.off('download-finetuning-progress', onProgress) + if (onComplete) existingSocket.off('download-finetuning-complete', onComplete) + if (onError) existingSocket.off('download-finetuning-error', onError) + } catch (e) {} + } + } + + // Connect specifically to the '/finetuning-download' namespace so server-side + // download handlers are isolated from status sockets. + const socketUrl = getSocketUrl() + + // Create dedicated socket for this download namespace + // Append the namespace to the URL so socket.io-client connects to it directly + const socket = io(`${socketUrl}/finetuning-download`, { + transports: ['websocket', 'polling'], + reconnection: true, + reconnectionDelay: 10000, + reconnectionAttempts: 5, + timeout: false + }) + + // Store socket for this job so future calls reuse it + try { downloadSocketMap.set(jobId, socket) } catch (e) {} + + // Handle connection + socket.on('connect', () => { + // Notify that the download socket is connected for this jobId + console.log(`[WS Download ${jobId}] Connected (socket id: ${socket.id})`) + // Request download preparation + socket.emit('download-finetuning-output', { jobId }) + }) + + // Handle completion + socket.on('download-finetuning-complete', (data) => { + console.log(`[WS Download ${jobId}] Complete`) + if (onComplete) onComplete(data) + // Disconnect after completion + try { socket.disconnect() } catch (e) {} + // remove from map + try { downloadSocketMap.delete(jobId) } catch (e) {} + }) + + // Handle errors + socket.on('download-finetuning-error', (data) => { + console.error(`[WS Download ${jobId}] Error:`, data) + if (onError) onError(data) + try { socket.disconnect() } catch (e) {} + try { downloadSocketMap.delete(jobId) } catch (e) {} + }) + + // Handle connection errors + socket.on('connect_error', (error) => { + console.error(`[WS Download ${jobId}] Connection error:`, error.message) + // Don't call onError for connection errors - let it retry + }) + + // Handle disconnect + socket.on('disconnect', (reason) => { + console.log(`[WS Download ${jobId}] Disconnected:`, reason) + try { downloadSocketMap.delete(jobId) } catch (e) {} + }) + + // Return cleanup function + return () => { + try { + if (onProgress) socket.off('download-finetuning-progress', onProgress) + if (onComplete) socket.off('download-finetuning-complete', onComplete) + if (onError) socket.off('download-finetuning-error', onError) + } catch (e) {} + try { + if (socket && socket.connected) { + console.log(`[WS Download ${jobId}] Manually disconnecting`) + socket.disconnect() + } + } catch (e) {} + try { downloadSocketMap.delete(jobId) } catch (e) {} + } + }, + + /** + * Subscribe to real-time job status updates via WebSocket + * Creates a dedicated WebSocket connection per job ID + * @param {string} jobId - Single job ID to monitor + * @param {Object} callbacks - Callback functions { onUpdate, onError, onConnected } + * @returns {Function} Cleanup function to disconnect + */ + subscribeToJobStatus: (jobId, callbacks = {}) => { + const { onUpdate, onError, onConnected } = callbacks + + if (!jobId) { + if (onError) onError({ error: 'No job ID provided' }) + return () => {} + } + + const socketUrl = getSocketUrl() + const socket = io(socketUrl, { + transports: ['websocket', 'polling'], + reconnection: true, + reconnectionDelay: 1000, + reconnectionAttempts: Infinity, + timeout: false + }) + + const FINAL_STATUSES = ['succeeded', 'completed', 'failed', 'cancelled', 'canceled'] + + socket.on('connect', () => { + console.log(`[WS] Connected for job ${jobId}`) + socket.emit('subscribe-job-status', { jobIds: [jobId] }) + }) + + socket.on('subscription-confirmed', (data) => { + if (onConnected) onConnected(data) + }) + + socket.on('subscription-error', (data) => { + if (onError) onError(data) + }) + + socket.on('job-status-update', (jobData) => { + // Only process updates for this specific job + if (jobData.id === jobId) { + if (onUpdate) onUpdate(jobData) + + // Check if job reached final status + const status = (jobData.status || '').toString().toLowerCase() + if (FINAL_STATUSES.includes(status)) { + // Auto-disconnect after final status + setTimeout(() => { + if (socket && socket.connected) { + socket.disconnect() + } + }, 1000) + } + } + }) + + socket.on('job-status-error', (err) => { + if (err.jobId === jobId && onError) { + onError(err) + } + }) + + socket.on('disconnect', () => { + console.log(`[WS] Disconnected for job ${jobId}`) + }) + + socket.on('connect_error', (error) => { + console.error(`[WS] Connection error for job ${jobId}:`, error.message) + }) + + // Return cleanup function + return () => { + if (socket && socket.connected) { + socket.emit('unsubscribe-job-status', { jobIds: [jobId] }) + socket.disconnect() + } + } + }, + + // Delete job API + deleteJob: (jobId) => { + // Call the backend delete endpoint which will cancel remote job and remove local DB records + return client.post('/finetuning/jobs/delete', { fine_tuning_job_id: jobId }) + }, + + // Get available base models (to be implemented on backend) + getBaseModels: () => { + // Return common models for now + return Promise.resolve({ + data: [ + 'meta-llama/Llama-2-7b-chat-hf', + 'meta-llama/Llama-2-7b-hf', + 'meta-llama/Llama-2-13b-hf', + 'BAAI/bge-reranker-large', + 'BAAI/bge-base-en-v1.5', + 'Qwen/Qwen2.5-3B', + 'Qwen/Qwen2.5-7B' + ] + }) + } +} + +export default finetuningApi \ No newline at end of file diff --git a/studio-frontend/packages/ui/src/hooks/useApi.jsx b/studio-frontend/packages/ui/src/hooks/useApi.jsx index 932f0a6..8f594d9 100644 --- a/studio-frontend/packages/ui/src/hooks/useApi.jsx +++ b/studio-frontend/packages/ui/src/hooks/useApi.jsx @@ -10,6 +10,8 @@ export default (apiFunc) => { try { const result = await apiFunc(...args) setData(result.data) + // return the data so callers awaiting request() get the payload + return result.data } catch (err) { setError(err || 'Unexpected Error!') } finally { diff --git a/studio-frontend/packages/ui/src/hooks/useFineTuningDownloadSocket.js b/studio-frontend/packages/ui/src/hooks/useFineTuningDownloadSocket.js new file mode 100644 index 0000000..e69de29 diff --git a/studio-frontend/packages/ui/src/hooks/useWebSocketDownload.js b/studio-frontend/packages/ui/src/hooks/useWebSocketDownload.js new file mode 100644 index 0000000..e69de29 diff --git a/studio-frontend/packages/ui/src/layout/MainLayout/Header/index.jsx b/studio-frontend/packages/ui/src/layout/MainLayout/Header/index.jsx index f276535..5ddb510 100644 --- a/studio-frontend/packages/ui/src/layout/MainLayout/Header/index.jsx +++ b/studio-frontend/packages/ui/src/layout/MainLayout/Header/index.jsx @@ -5,8 +5,9 @@ import { useNavigate } from 'react-router-dom' // material-ui import { useTheme } from '@mui/material/styles' -import { Avatar, Box, ButtonBase, Switch } from '@mui/material' +import { Avatar, Box, ButtonBase, Switch, Typography, IconButton, useMediaQuery } from '@mui/material' import { styled } from '@mui/material/styles' +import MenuIcon from '@mui/icons-material/Menu' // project imports import LogoSection from '../LogoSection' @@ -87,9 +88,10 @@ const MaterialUISwitch = styled(Switch)(({ theme }) => ({ } })) -const Header = ({userId}) => { +const Header = ({userId, handleLeftDrawerToggle}) => { // console.log ('Header', userId) const theme = useTheme() + const matchDownMd = useMediaQuery(theme.breakpoints.down('md')) // const navigate = useNavigate() // const customization = useSelector((state) => state.customization) @@ -122,18 +124,39 @@ const Header = ({userId}) => { width: '100%', // Full width of the parent container }} > - {/* Logo Section */} + {/* Left Section - Mobile menu + Logo */} - + {/* Mobile menu button */} + {matchDownMd && handleLeftDrawerToggle && ( + + + + )} + + {/* Logo - always visible on mobile, hidden on desktop in header */} + + + + + {/* Desktop logo - hidden on mobile */} + diff --git a/studio-frontend/packages/ui/src/layout/MainLayout/Sidebar/MenuList/CollapsedMenuList/index.jsx b/studio-frontend/packages/ui/src/layout/MainLayout/Sidebar/MenuList/CollapsedMenuList/index.jsx new file mode 100644 index 0000000..8d24ca3 --- /dev/null +++ b/studio-frontend/packages/ui/src/layout/MainLayout/Sidebar/MenuList/CollapsedMenuList/index.jsx @@ -0,0 +1,104 @@ +import { forwardRef } from 'react' +import { Link } from 'react-router-dom' +import { useDispatch, useSelector } from 'react-redux' + +// material-ui +import { useTheme } from '@mui/material/styles' +import { Box, IconButton, Tooltip, useMediaQuery } from '@mui/material' + +// project imports +import { MENU_OPEN, SET_MENU } from '@/store/actions' +import config from '@/config' +import menuItem from '@/menu-items' + +// ==============================|| COLLAPSED SIDEBAR MENU LIST ||============================== // + +const CollapsedMenuList = () => { + const theme = useTheme() + const dispatch = useDispatch() + const customization = useSelector((state) => state.customization) + const matchesSM = useMediaQuery(theme.breakpoints.down('lg')) + + // Get all menu items + const getAllMenuItems = (items) => { + let allItems = [] + items.forEach(item => { + if (item.type === 'group' && item.children) { + item.children.forEach(child => { + if (child.type === 'item') { + allItems.push(child) + } + }) + } + }) + return allItems + } + + const menuItems = getAllMenuItems(menuItem.items) + + const itemHandler = (item) => { + dispatch({ type: MENU_OPEN, id: item.id }) + if (matchesSM) dispatch({ type: SET_MENU, opened: false }) + } + + const CollapsedNavItem = ({ item }) => { + const Icon = item.icon + const isSelected = customization.isOpen.findIndex((id) => id === item.id) > -1 + + let itemTarget = '_self' + if (item.target) { + itemTarget = '_blank' + } + + let linkProps = { + component: forwardRef(function CollapsedNavItemComponent(props, ref) { + return + }) + } + if (item?.external) { + linkProps = { component: 'a', href: item.url, target: itemTarget } + } + + return ( + + itemHandler(item)} + sx={{ + width: '40px', + height: '40px', + margin: '4px 0', + backgroundColor: isSelected ? theme.palette.action.selected : 'transparent', + color: isSelected ? theme.palette.primary.main : theme.palette.text.secondary, + '&:hover': { + backgroundColor: theme.palette.action.hover, + color: theme.palette.primary.main + }, + borderRadius: '8px' + }} + disabled={item.disabled} + > + {item.icon ? : null} + + + ) + } + + return ( + + {menuItems.map((item) => ( + + ))} + + ) +} + +export default CollapsedMenuList \ No newline at end of file diff --git a/studio-frontend/packages/ui/src/layout/MainLayout/Sidebar/MenuList/NavGroup/index.jsx b/studio-frontend/packages/ui/src/layout/MainLayout/Sidebar/MenuList/NavGroup/index.jsx index 0625d54..f93ecfd 100644 --- a/studio-frontend/packages/ui/src/layout/MainLayout/Sidebar/MenuList/NavGroup/index.jsx +++ b/studio-frontend/packages/ui/src/layout/MainLayout/Sidebar/MenuList/NavGroup/index.jsx @@ -44,13 +44,11 @@ const NavGroup = ({ item }) => { ) } - sx={{ py: '20px' }} + sx={{ py: 0 }} > {items} - {/* group divider */} - ) } diff --git a/studio-frontend/packages/ui/src/layout/MainLayout/Sidebar/index.jsx b/studio-frontend/packages/ui/src/layout/MainLayout/Sidebar/index.jsx index efdda72..6ce5c01 100644 --- a/studio-frontend/packages/ui/src/layout/MainLayout/Sidebar/index.jsx +++ b/studio-frontend/packages/ui/src/layout/MainLayout/Sidebar/index.jsx @@ -2,7 +2,10 @@ import PropTypes from 'prop-types' // material-ui import { useTheme } from '@mui/material/styles' -import { Box, Drawer, useMediaQuery } from '@mui/material' +import { Box, Drawer, useMediaQuery, IconButton, Tooltip, Typography } from '@mui/material' +import ChevronLeftIcon from '@mui/icons-material/ChevronLeft' +import ChevronRightIcon from '@mui/icons-material/ChevronRight' +import MenuIcon from '@mui/icons-material/Menu' // third-party import PerfectScrollbar from 'react-perfect-scrollbar' @@ -10,8 +13,9 @@ import { BrowserView, MobileView } from 'react-device-detect' // project imports import MenuList from './MenuList' +import CollapsedMenuList from './MenuList/CollapsedMenuList' import LogoSection from '../LogoSection' -import { drawerWidth, headerHeight } from '@/store/constant' +import { drawerWidth, drawerWidthCollapsed, headerHeight } from '@/store/constant' // ==============================|| SIDEBAR DRAWER ||============================== // @@ -19,23 +23,86 @@ const Sidebar = ({ drawerOpen, drawerToggle, window }) => { const theme = useTheme() const matchUpMd = useMediaQuery(theme.breakpoints.up('md')) - const drawer = ( + // Desktop collapsed drawer content + const collapsedDrawer = ( + + {/* Sidebar icon when collapsed with tooltip */} + + + + + + + + + {/* Collapsed Menu Items */} + + + + + ) + + // Desktop expanded drawer content + const expandedDrawer = ( <> + {/* Header with GenAI Studio text and collapse button */} - - - + + GenAI Studio + + + + + + + + + {/* Menu content */} { - - - - - ) - const container = window !== undefined ? () => window.document.body : undefined - - return ( - - + - {drawer} - - + + GenAI Studio + + + + + + + {/* Mobile Menu content */} + + + + + ) + + const container = window !== undefined ? () => window.document.body : undefined + + return ( + <> + {/* Desktop Sidebar - Always present, changes width */} + {matchUpMd && ( + + {drawerOpen ? expandedDrawer : collapsedDrawer} + + )} + + {/* Mobile Sidebar - Overlay */} + {!matchUpMd && ( + + {mobileDrawer} + + )} + ) } diff --git a/studio-frontend/packages/ui/src/layout/MainLayout/ViewHeader.jsx b/studio-frontend/packages/ui/src/layout/MainLayout/ViewHeader.jsx index 9648888..cf39de8 100644 --- a/studio-frontend/packages/ui/src/layout/MainLayout/ViewHeader.jsx +++ b/studio-frontend/packages/ui/src/layout/MainLayout/ViewHeader.jsx @@ -24,17 +24,18 @@ const ViewHeader = ({ const theme = useTheme() return ( - + - + {isBackButton && ( diff --git a/studio-frontend/packages/ui/src/layout/MainLayout/index.jsx b/studio-frontend/packages/ui/src/layout/MainLayout/index.jsx index f53facc..491708f 100644 --- a/studio-frontend/packages/ui/src/layout/MainLayout/index.jsx +++ b/studio-frontend/packages/ui/src/layout/MainLayout/index.jsx @@ -4,12 +4,13 @@ import { Outlet } from 'react-router-dom' // material-ui import { styled, useTheme } from '@mui/material/styles' -import { AppBar, Box, Chip, CssBaseline, Toolbar, useMediaQuery } from '@mui/material' +import { AppBar, Box, Chip, CssBaseline, Toolbar, useMediaQuery, IconButton, Fab } from '@mui/material' +import ChevronRightIcon from '@mui/icons-material/ChevronRight' // project imports import Header from './Header' import Sidebar from './Sidebar' -import { drawerWidth, headerHeight } from '@/store/constant' +import { drawerWidth, drawerWidthCollapsed, headerHeight } from '@/store/constant' import { SET_MENU } from '@/store/actions' import {useKeycloak } from '../../KeycloakContext.jsx' @@ -17,43 +18,31 @@ import {useKeycloak } from '../../KeycloakContext.jsx' // styles const Main = styled('main', { shouldForwardProp: (prop) => prop !== 'open' })(({ theme, open }) => ({ ...theme.typography.mainContent, - ...(!open && { - backgroundColor: 'transparent', - borderBottomLeftRadius: 0, - borderBottomRightRadius: 0, - transition: theme.transitions.create('all', { - easing: theme.transitions.easing.sharp, - duration: theme.transitions.duration.leavingScreen - }), - marginRight: 0, - [theme.breakpoints.up('md')]: { - marginLeft: -drawerWidth, - width: `calc(100% - ${drawerWidth}px)` - }, - [theme.breakpoints.down('md')]: { - marginLeft: '20px', - width: `calc(100% - ${drawerWidth}px)`, - padding: '16px' - }, - [theme.breakpoints.down('sm')]: { - marginLeft: '10px', - width: `calc(100% - ${drawerWidth}px)`, - padding: '16px', - marginRight: '10px' - } + backgroundColor: 'transparent', + borderBottomLeftRadius: 0, + borderBottomRightRadius: 0, + transition: theme.transitions.create(['margin', 'width'], { + easing: theme.transitions.easing.sharp, + duration: theme.transitions.duration.leavingScreen }), - ...(open && { - backgroundColor: 'transparent', - transition: theme.transitions.create('all', { + marginRight: 0, + [theme.breakpoints.up('md')]: { + marginLeft: 0, + width: `calc(100% - ${open ? drawerWidth : drawerWidthCollapsed}px)`, + transition: theme.transitions.create(['margin', 'width'], { easing: theme.transitions.easing.easeOut, duration: theme.transitions.duration.enteringScreen }), + paddingLeft: '8px', + paddingRight: '8px', + paddingBottom: '8px', + paddingTop: '2px' + }, + [theme.breakpoints.down('md')]: { marginLeft: 0, - marginRight: 0, - borderBottomLeftRadius: 0, - borderBottomRightRadius: 0, - width: `calc(100% - ${drawerWidth}px)` - }) + width: '100%', + padding: '16px' + } })) // ==============================|| MAIN LAYOUT ||============================== // @@ -81,6 +70,7 @@ const MainLayout = () => { } useEffect(() => { + // On desktop, start with sidebar open; on mobile, keep it closed until user opens setTimeout(() => dispatch({ type: SET_MENU, opened: !matchDownMd }), 0) // eslint-disable-next-line react-hooks/exhaustive-deps }, [matchDownMd]) @@ -101,12 +91,12 @@ const MainLayout = () => { }} > -
+
{/* drawer */} - {/* */} + {/* main content */} (
diff --git a/studio-frontend/packages/ui/src/menu-items/dashboard.js b/studio-frontend/packages/ui/src/menu-items/dashboard.js index 6c7c3b1..ba88227 100644 --- a/studio-frontend/packages/ui/src/menu-items/dashboard.js +++ b/studio-frontend/packages/ui/src/menu-items/dashboard.js @@ -8,11 +8,13 @@ import { IconLock, IconRobot, IconVariable, - IconFiles + IconFiles, + IconApps, + IconBrain } from '@tabler/icons-react' // constant -const icons = { IconUsersGroup, IconHierarchy, IconBuildingStore, IconKey, IconTool, IconLock, IconRobot, IconVariable, IconFiles } +const icons = { IconUsersGroup, IconHierarchy, IconBuildingStore, IconKey, IconTool, IconLock, IconRobot, IconVariable, IconFiles, IconApps, IconBrain } // ==============================|| DASHBOARD MENU ITEMS ||============================== // @@ -23,27 +25,19 @@ const dashboard = { children: [ { id: 'opeaflows', - title: 'OPEA Flows', + title: 'Workflows', type: 'item', url: '/opeaflows', - icon: icons.IconHierarchy, - breakcrumbs: true + icon: icons.IconApps, + breadcrumbs: true }, { - id: 'sandbox', - title: 'OPEA Sandbox Evaluation', + id: 'finetuning', + title: 'Fine-tuning', type: 'item', - url: '/opeaflows', - icon: icons.IconHierarchy, - breakcrumbs: true - }, - { - id: 'opeadeployment', - title: 'OPEA Deployment', - type: 'item', - url: '/opeaflows', - icon: icons.IconHierarchy, - breakcrumbs: true + url: '/finetuning', + icon: icons.IconBrain, + breadcrumbs: true } // { // id: 'chatflows', diff --git a/studio-frontend/packages/ui/src/routes/MainRoutes.jsx b/studio-frontend/packages/ui/src/routes/MainRoutes.jsx index cd77b07..6b7a388 100644 --- a/studio-frontend/packages/ui/src/routes/MainRoutes.jsx +++ b/studio-frontend/packages/ui/src/routes/MainRoutes.jsx @@ -7,6 +7,9 @@ import Loadable from '@/ui-component/loading/Loadable' // chatflows routing const Opeaflows = Loadable(lazy(() => import('@/views/opeaflows'))) +// finetuning routing +const Finetuning = Loadable(lazy(() => import('@/views/finetuning'))) + // tracer routing const Tracer = Loadable(lazy(() => import('@/views/tracer'))) @@ -59,6 +62,10 @@ const MainRoutes = { path: '/opeaflows', element: }, + { + path: '/finetuning', + element: + }, { path:'/tracer/:ns', element: diff --git a/studio-frontend/packages/ui/src/store/constant.js b/studio-frontend/packages/ui/src/store/constant.js index de700eb..1143c56 100644 --- a/studio-frontend/packages/ui/src/store/constant.js +++ b/studio-frontend/packages/ui/src/store/constant.js @@ -1,6 +1,7 @@ // constant export const gridSpacing = 3 export const drawerWidth = 260 +export const drawerWidthCollapsed = 64 export const appDrawerWidth = 320 export const headerHeight = 80 export const maxScroll = 100000 diff --git a/studio-frontend/packages/ui/src/ui-component/button/AnimateButton.jsx b/studio-frontend/packages/ui/src/ui-component/button/AnimateButton.jsx index ce2d3fb..98b8852 100644 --- a/studio-frontend/packages/ui/src/ui-component/button/AnimateButton.jsx +++ b/studio-frontend/packages/ui/src/ui-component/button/AnimateButton.jsx @@ -5,7 +5,7 @@ import { motion, useCycle } from 'framer-motion' // ==============================|| ANIMATION BUTTON ||============================== // -const AnimateButton = forwardRef(function AnimateButton({ children, type, direction, offset, scale }, ref) { +const AnimateButton = forwardRef(function AnimateButton({ children, type = 'scale', direction = 'right', offset = 10, scale = { hover: 1, tap: 0.9 } }, ref) { let offset1 let offset2 switch (direction) { @@ -84,14 +84,6 @@ AnimateButton.propTypes = { scale: PropTypes.oneOfType([PropTypes.number, PropTypes.object]) } -AnimateButton.defaultProps = { - type: 'scale', - offset: 10, - direction: 'right', - scale: { - hover: 1, - tap: 0.9 - } -} +// defaults handled via function parameter defaults export default AnimateButton diff --git a/studio-frontend/packages/ui/src/ui-component/cards/MainCard.jsx b/studio-frontend/packages/ui/src/ui-component/cards/MainCard.jsx index 61bbf75..a295737 100644 --- a/studio-frontend/packages/ui/src/ui-component/cards/MainCard.jsx +++ b/studio-frontend/packages/ui/src/ui-component/cards/MainCard.jsx @@ -18,8 +18,9 @@ const MainCard = forwardRef(function MainCard( content = true, contentClass = '', contentSX = { - px: 2, - py: 0 + px: 3, + pt: 3, + pb: 3 }, darkTitle, secondary, @@ -39,8 +40,9 @@ const MainCard = forwardRef(function MainCard( ':hover': { boxShadow: boxShadow ? shadow || '0 2px 14px 0 rgb(32 40 45 / 8%)' : 'inherit' }, - maxWidth: '1280px', - mx: 'auto', + width: '100%', + mx: 0, + my: 0, ...sx }} > diff --git a/studio-frontend/packages/ui/src/ui-component/extended/Logo.jsx b/studio-frontend/packages/ui/src/ui-component/extended/Logo.jsx index e1a862d..153bb1f 100644 --- a/studio-frontend/packages/ui/src/ui-component/extended/Logo.jsx +++ b/studio-frontend/packages/ui/src/ui-component/extended/Logo.jsx @@ -15,8 +15,7 @@ const Logo = () => { Flowise ) diff --git a/studio-frontend/packages/ui/src/ui-component/extended/Transitions.jsx b/studio-frontend/packages/ui/src/ui-component/extended/Transitions.jsx index 4942dee..1b4ba51 100644 --- a/studio-frontend/packages/ui/src/ui-component/extended/Transitions.jsx +++ b/studio-frontend/packages/ui/src/ui-component/extended/Transitions.jsx @@ -6,7 +6,7 @@ import { Collapse, Fade, Box, Grow, Slide, Zoom } from '@mui/material' // ==============================|| TRANSITIONS ||============================== // -const Transitions = forwardRef(function Transitions({ children, position, type, direction, ...others }, ref) { +const Transitions = forwardRef(function Transitions({ children, position = 'top-left', type = 'grow', direction = 'up', ...others }, ref) { let positionSX = { transformOrigin: '0 0 0' } @@ -98,10 +98,6 @@ Transitions.propTypes = { direction: PropTypes.oneOf(['up', 'down', 'left', 'right']) } -Transitions.defaultProps = { - type: 'grow', - position: 'top-left', - direction: 'up' -} +// defaults handled via function parameter defaults export default Transitions diff --git a/studio-frontend/packages/ui/src/ui-component/table/FlowListTable.jsx b/studio-frontend/packages/ui/src/ui-component/table/FlowListTable.jsx index bcc415a..28703c4 100644 --- a/studio-frontend/packages/ui/src/ui-component/table/FlowListTable.jsx +++ b/studio-frontend/packages/ui/src/ui-component/table/FlowListTable.jsx @@ -367,7 +367,7 @@ export const FlowListTable = ({ data, images, isLoading, filterFunction, updateF setDeployWebSocketForId(id, wsInstance); wsInstance.onopen = () => { - console.log('[WebSocket] Connected for click deployment monitoring', id); + console.log('[WS] Connected for click deployment monitoring', id); wsInstance.send(JSON.stringify({ hostname: deploymentConfig.hostname, username: deploymentConfig.username, @@ -378,7 +378,7 @@ export const FlowListTable = ({ data, images, isLoading, filterFunction, updateF wsInstance.onmessage = (event) => { let data; try { data = JSON.parse(event.data); } catch { return; } - console.log('[WebSocket] Click deployment message:', data); + console.log('[WS] Click deployment message:', data); if (data.status === 'Success') { setDeployStatusForId(id, ['Success', data.message]); @@ -431,19 +431,19 @@ export const FlowListTable = ({ data, images, isLoading, filterFunction, updateF }; wsInstance.onerror = (error) => { - console.error('[WebSocket] Click deployment error:', error); + console.error('[WS] Click deployment error:', error); setDeployStatusForId(id, ['Error', 'Connection error during deployment monitoring']); wsInstance.close(); setDeployWebSocketForId(id, null); }; wsInstance.onclose = (event) => { - console.log(`[WebSocket] Click deployment closed: code=${event.code}, reason='${event.reason}', wasClean=${event.wasClean}`); + console.log(`[WS] Click deployment closed: code=${event.code}, reason='${event.reason}', wasClean=${event.wasClean}`); setDeployWebSocketForId(id, null); // Check deployment status if abnormal closure if (event.code !== 1000 && event.code !== 1001) { - console.log('[WebSocket] Abnormal closure detected, checking deployment status...'); + console.log('[WS] Abnormal closure detected, checking deployment status...'); setTimeout(async () => { try { const response = await chatflowsApi.getSpecificChatflow(id); @@ -617,11 +617,10 @@ export const FlowListTable = ({ data, images, isLoading, filterFunction, updateF {userRole === 'admin' && - + User diff --git a/studio-frontend/packages/ui/src/views/debuglogs/index.jsx b/studio-frontend/packages/ui/src/views/debuglogs/index.jsx index fb92fc2..0201e18 100644 --- a/studio-frontend/packages/ui/src/views/debuglogs/index.jsx +++ b/studio-frontend/packages/ui/src/views/debuglogs/index.jsx @@ -10,6 +10,7 @@ import { Paper, Button, Box, + Stack, Typography, Divider, Fade, @@ -127,17 +128,30 @@ export default function PodLogsView() { const selectedEventPod = podsData.pods.find(p => p.name === selectedPodEvents); return ( - // - - - {workflowName && ( - - Workflow name: {workflowName} - - )} - {/* Namespace: {podsData.namespace} */} - - + + + + + Debug Logs + + {workflowName && ( + + Workflow name: {workflowName} + + )} + + {/* Namespace: {podsData.namespace} */} + + Auto refresh: + + + ) : ( + + + {/* File Info */} + + + + + {uploadedFile.name} + + } + label="Selected" + color="primary" + size="small" + variant="outlined" + /> + + + + + + + + {formatFileSize(uploadedFile.size)} + + + {/* Upload Progress */} + {uploadProgress > 0 && uploadProgress < 100 && ( + + )} + + {/* File Preview */} + {preview && ( + + + Preview ({preview.totalLines} total lines): + + + + {preview.lines.join('\n')} + {preview.lines.length < preview.totalLines && '\n...'} + + + + )} + + + )} + + {/* Error Display */} + {(uploadError || error) && ( + } + > + {uploadError || error} + + )} + + ) +} + +FileUploadArea.propTypes = { + onFileUpload: PropTypes.func.isRequired, + acceptedTypes: PropTypes.arrayOf(PropTypes.string), + maxSizeMB: PropTypes.number, + error: PropTypes.string +} + +export default FileUploadArea \ No newline at end of file diff --git a/studio-frontend/packages/ui/src/views/finetuning/FinetuningJobModal.jsx b/studio-frontend/packages/ui/src/views/finetuning/FinetuningJobModal.jsx new file mode 100644 index 0000000..e9c9b13 --- /dev/null +++ b/studio-frontend/packages/ui/src/views/finetuning/FinetuningJobModal.jsx @@ -0,0 +1,1345 @@ +import { useState } from 'react' +import PropTypes from 'prop-types' + +// material-ui +import { + Box, + Button, + Dialog, + DialogActions, + DialogContent, + DialogTitle, + TextField, + FormControl, + InputLabel, + Select, + MenuItem, + Checkbox, + FormControlLabel, + Typography, + Stack, + IconButton, + CircularProgress, +} from '@mui/material' +import Autocomplete from '@mui/material/Autocomplete' +import { useTheme } from '@mui/material/styles' + +// icons +import { IconX } from '@tabler/icons-react' + +// components +import FileUploadArea from './FileUploadArea' + +// API +import finetuningApi from '@/api/finetuning' + +const FinetuningJobModal = ({ open, onClose, onJobCreated }) => { + const theme = useTheme() + + const [formData, setFormData] = useState({ + baseModel: '', + trainingDataset: null, + hf_token: '', + // OpenAI standard parameters + openai_params: { + n_epochs: 3, + batch_size: 16, + learning_rate_multiplier: 1.0, + prompt_loss_weight: 0.01 + }, + // Extended configuration + general: { + task: 'instruction_tuning', + output_dir: './tmp', + report_to: 'none', + save_strategy: 'no', + enable_gradient_checkpointing: false, + trust_remote_code: false + }, + dataset: { + max_length: 512, + block_size: 512, + validation_split_percentage: 5, + padding_side: 'right', + truncation_side: 'right', + max_source_length: 384, + pad_to_max: false, + query_max_len: 128, + passage_max_len: 128, + train_group_size: 8, + query_instruction_for_retrieval: '', + passage_instruction_for_retrieval: '', + reasoning_dataset_keys: ['Question', 'Complex_CoT', 'Response'], + // raw input string to preserve trailing commas/spaces while editing + reasoning_dataset_keys_input: 'Question, Complex_CoT, Response', + max_prompt_length: 512, + data_preprocess_type: 'neural_chat', + data_preprocess_neural_chat: true, + padding: 'true', + truncation: true, + mask_input: true, + mask_response: true + }, + training: { + optimizer: 'adamw_torch', + device: 'cpu', + batch_size: 2, + epochs: 1, + max_train_steps: null, + learning_rate: 5.0e-5, + lr_scheduler: 'linear', + weight_decay: 0.0, + num_training_workers: 1, + accelerate_mode: 'DDP', + mixed_precision: 'no', + gradient_accumulation_steps: 1, + logging_steps: 10, + dpo_beta: 0.1 + , + // Embedding-specific training config (only used when task === 'embedding') + embedding_training_config: { + + temperature: 0.02, + sentence_pooling_method: 'cls', + normalized: true, + use_inbatch_neg: true + } + }, + lora: { + r: 8, + lora_alpha: 32, + lora_dropout: 0.1, + task_type: 'CAUSAL_LM' + } + }) + + const [errors, setErrors] = useState({}) + const [isSubmitting, setIsSubmitting] = useState(false) + + const [loraEnabled, setLoraEnabled] = useState(false) + + + const baseModels = [ + 'meta-llama/Llama-2-7b-chat-hf', + 'meta-llama/Llama-2-7b-hf', + 'meta-llama/Llama-2-13b-hf', + 'BAAI/bge-reranker-large', + 'BAAI/bge-base-en-v1.5', + 'Qwen/Qwen2.5-3B', + 'Qwen/Qwen2.5-7B' + ] + + const taskTypes = [ + { value: 'instruction_tuning', label: 'Instruction Tuning' }, + { value: 'rerank', label: 'Reranking' }, + { value: 'embedding', label: 'Embedding' }, + { value: 'pretraining', label: 'Pretraining' }, + { value: 'dpo', label: 'Direct Preference Optimization (DPO)' }, + { value: 'reasoning', label: 'Reasoning' } + ] + + const handleInputChange = (field, value) => { + setFormData(prev => ({ + ...prev, + [field]: value + })) + + // Clear error for this field + if (errors[field]) { + setErrors(prev => ({ + ...prev, + [field]: null + })) + } + } + + const handleOpenAIParamChange = (param, value) => { + setFormData(prev => ({ + ...prev, + openai_params: { + ...prev.openai_params, + [param]: value + } + })) + } + + const handleConfigChange = (section, param, value) => { + setFormData(prev => ({ + ...prev, + [section]: { + ...prev[section], + [param]: value + } + })) + } + + // When a file is selected in FileUploadArea, just store the File object locally. + // The actual upload to the server will happen when the user clicks Create Job. + const handleFileUpload = (fileType, file) => { + if (!file) { + setFormData(prev => ({ + ...prev, + [fileType]: null + })) + return + } + + // Store the raw File object and its name; do not upload now + const fileEntry = { + file, + name: file.name + } + + setFormData(prev => ({ + ...prev, + [fileType]: fileEntry + })) + + // Clear any previous error for this field + if (errors[fileType]) { + setErrors(prev => ({ + ...prev, + [fileType]: null + })) + } + } + + const validateForm = () => { + const newErrors = {} + + // Base validation + if (!formData.baseModel) { + newErrors.baseModel = 'Base model is required' + } + + if (!formData.trainingDataset) { + newErrors.trainingDataset = 'Training dataset is required' + } + + // OpenAI parameters validation + if (formData.openai_params.learning_rate_multiplier <= 0) { + newErrors.learning_rate_multiplier = 'Learning rate multiplier must be greater than 0' + } + + if (formData.openai_params.batch_size <= 0) { + newErrors.batch_size = 'Batch size must be greater than 0' + } + + if (formData.openai_params.n_epochs <= 0) { + newErrors.n_epochs = 'Number of epochs must be greater than 0' + } + + // Training parameters validation + if (formData.training.learning_rate <= 0) { + newErrors.learning_rate = 'Learning rate must be greater than 0' + } + + if (formData.training.epochs <= 0) { + newErrors.epochs = 'Epochs must be greater than 0' + } + + if (formData.training.logging_steps <= 0) { + newErrors.logging_steps = 'Logging steps must be greater than 0' + } + + // General validation + if (!formData.general.output_dir) { + newErrors.output_dir = 'Output directory is required' + } + + // Dataset validation + if (formData.dataset.max_length <= 0) { + newErrors.dataset_max_length = 'Max length must be greater than 0' + } + + // LoRA parameters validation (only when enabled) + if (loraEnabled) { + if (formData.lora.r <= 0) { + newErrors.lora_r = 'LoRA rank must be greater than 0' + } + + if (formData.lora.lora_alpha <= 0) { + newErrors.lora_alpha = 'LoRA alpha must be greater than 0' + } + + if (formData.lora.lora_dropout < 0 || formData.lora.lora_dropout > 1) { + newErrors.lora_dropout = 'LoRA dropout must be between 0 and 1' + } + } + + setErrors(newErrors) + return Object.keys(newErrors).length === 0 + } + + const handleSubmit = async () => { + if (!validateForm()) { + return + } + + setIsSubmitting(true) + + try { + // Create the job configuration payload + // Build General object and set lora_config based on the LoRA checkbox + const generalPayload = { ...formData.general } + // If user enabled LoRA, include the object; otherwise send explicit null + generalPayload.lora_config = loraEnabled ? formData.lora : null + + // If the user selected a file but hasn't uploaded it yet, upload it now + let trainingFileName = formData.trainingDataset?.uploadedName || null + if (formData.trainingDataset && formData.trainingDataset.file) { + try { + setIsSubmitting(true) + const uploadResp = await finetuningApi.uploadFile(formData.trainingDataset.file, 'fine-tune', () => {}) + trainingFileName = uploadResp.data?.filename || null + } catch (err) { + console.error('Error uploading training file before job creation:', err) + setErrors(prev => ({ ...prev, trainingDataset: 'Failed to upload training file: ' + (err.message || 'Unknown') })) + setIsSubmitting(false) + return + } + } + + // Build payload + const jobPayload = { + model: formData.baseModel, + training_file: trainingFileName + } + + // General configuration with LoRA config + const gen = { ...formData.general } + gen.lora_config = loraEnabled ? formData.lora : null + gen.config = gen.config || {} + if (formData.hf_token) { + gen.config.token = formData.hf_token + } + jobPayload.General = gen + jobPayload.task = gen.task || 'instruction_tuning' + + // Dataset configuration + jobPayload.Dataset = { + max_length: formData.dataset.max_length, + block_size: formData.dataset.block_size, + max_source_length: formData.dataset.max_source_length, + padding_side: formData.dataset.padding_side, + truncation_side: formData.dataset.truncation_side, + padding: formData.dataset.padding, + truncation: formData.dataset.truncation, + mask_input: formData.dataset.mask_input, + mask_response: formData.dataset.mask_response, + query_max_len: formData.dataset.query_max_len, + passage_max_len: formData.dataset.passage_max_len, + train_group_size: formData.dataset.train_group_size, + query_instruction_for_retrieval: formData.dataset.query_instruction_for_retrieval, + passage_instruction_for_retrieval: formData.dataset.passage_instruction_for_retrieval, + pad_to_max: formData.dataset.pad_to_max, + data_preprocess_type: formData.dataset.data_preprocess_neural_chat ? 'neural_chat' : null + } + + // Training configuration + jobPayload.Training = { + optimizer: formData.training.optimizer, + device: formData.training.device, + batch_size: formData.training.batch_size, + epochs: formData.training.epochs, + max_train_steps: formData.training.max_train_steps, + learning_rate: formData.training.learning_rate, + lr_scheduler: formData.training.lr_scheduler, + weight_decay: formData.training.weight_decay, + num_training_workers: formData.training.num_training_workers, + accelerate_mode: formData.training.accelerate_mode, + mixed_precision: formData.training.mixed_precision, + gradient_accumulation_steps: formData.training.gradient_accumulation_steps, + logging_steps: formData.training.logging_steps, + // embedding_training_config will be attached below only for embedding task + dpo_beta: formData.training.dpo_beta + } + + // If embedding task, attach embedding_training_config + if (jobPayload.task === 'embedding') { + jobPayload.Training.embedding_training_config = formData.training.embedding_training_config + } + + // Call the actual API + const response = await finetuningApi.createJob(jobPayload) + + // Create job object from response + const newJob = { + id: response.data?.id || response.data?.fine_tuning_job_id || Date.now().toString(), + status: response.data?.status || 'pending', + model: formData.baseModel, + task: jobPayload.task || 'instruction_tuning', + dataset: formData.trainingDataset?.suffixedName || formData.trainingDataset?.name || 'Unknown', + progress: '0%', + createdDate: response.data?.created_at || new Date().toISOString(), + training_file: jobPayload.training_file, + openai_params: formData.openai_params, + training: formData.training, + general: formData.general, + dataset_config: formData.dataset + } + + if (formData.hf_token) { + newJob.general = { ...newJob.general, config: { ...(newJob.general.config || {}), token: formData.hf_token } } + } + + onJobCreated(newJob) + handleClose() + } catch (error) { + console.error('Error creating fine-tuning job:', error) + // TODO: Show error notification + } finally { + setIsSubmitting(false) + } + } + + const handleClose = () => { + setFormData({ + baseModel: '', + trainingDataset: null, + hf_token: '', + // OpenAI standard parameters + openai_params: { + n_epochs: 3, + batch_size: 16, + learning_rate_multiplier: 1.0, + prompt_loss_weight: 0.01 + }, + // Extended configuration + general: { + task: 'instruction_tuning', + output_dir: './tmp', + report_to: 'none', + save_strategy: 'no', + enable_gradient_checkpointing: false, + trust_remote_code: false + }, + dataset: { + max_length: 512, + block_size: 512, + validation_split_percentage: 5, + padding_side: 'right', + truncation_side: 'right', + max_source_length: 384, + pad_to_max: false, + query_max_len: 128, + passage_max_len: 128, + train_group_size: 8, + query_instruction_for_retrieval: '', + passage_instruction_for_retrieval: '', + reasoning_dataset_keys: ['Question', 'Complex_CoT', 'Response'], + reasoning_dataset_keys_input: 'Question, Complex_CoT, Response', + max_prompt_length: 512, + data_preprocess_type: 'neural_chat', + data_preprocess_neural_chat: true, + padding: 'true', + truncation: true, + mask_input: true, + mask_response: true + }, + training: { + optimizer: 'adamw_torch', + device: 'cpu', + batch_size: 2, + epochs: 1, + max_train_steps: null, + learning_rate: 5.0e-5, + lr_scheduler: 'linear', + weight_decay: 0.0, + num_training_workers: 1, + accelerate_mode: 'DDP', + mixed_precision: 'no', + gradient_accumulation_steps: 1, + logging_steps: 10, + dpo_beta: 0.1 + , + embedding_training_config: { + temperature: 0.02, + sentence_pooling_method: 'cls', + normalized: true, + use_inbatch_neg: true + } + }, + lora: { + r: 8, + lora_alpha: 32, + lora_dropout: 0.1, + task_type: 'CAUSAL_LM' + } + }) + setLoraEnabled(false) + setErrors({}) + setIsSubmitting(false) + onClose() + } + + return ( + + + + Create New Fine-tuning Job + + + + {/* Close button moved out of title: absolutely positioned within the dialog Paper */} + + + + + + + {/* Left Column: Model & Task Setup */} + + + {/* Base Model */} + + handleInputChange('baseModel', newValue || '')} + onInputChange={(event, newInputValue) => handleInputChange('baseModel', newInputValue)} + renderInput={(params) => ( + + )} + /> + {errors.baseModel && ( + + {errors.baseModel} + + )} + + + {/* HF Token */} + handleInputChange('hf_token', e.target.value)} + fullWidth + size="medium" + sx={{ '& .MuiInputBase-root': { minHeight: 48 } }} + /> + + {/* Task Type */} + + Task Type + + + + {/* Inline Instruction Tuning config shown right under Task Type */} + {formData.general.task === 'instruction_tuning' && ( + + + {/* 2-column responsive CSS grid for short-value fields */} + + + handleConfigChange('dataset', 'max_length', e.target.value)} + size="small" + sx={{ width: '100%' }} + fullWidth + /> + + + + handleConfigChange('dataset', 'block_size', e.target.value)} + size="small" + sx={{ width: '100%' }} + fullWidth + /> + + + + handleConfigChange('dataset', 'max_source_length', e.target.value)} + size="small" + sx={{ width: '100%' }} + fullWidth + /> + + + + handleConfigChange('dataset', 'max_prompt_length', e.target.value)} + size="small" + sx={{ width: '100%' }} + fullWidth + /> + + + + + handleConfigChange('dataset', 'padding_side', e.target.value)} + size="small" + sx={{ width: '100%' }} + /> + + + + handleConfigChange('dataset', 'truncation_side', e.target.value)} + size="small" + sx={{ width: '100%' }} + /> + + + + handleConfigChange('dataset', 'padding', e.target.value)} + size="small" + sx={{ width: '100%' }} + /> + + + + handleConfigChange('dataset', 'truncation', e.target.value)} + size="small" + sx={{ width: '100%' }} + /> + + + + handleConfigChange('dataset', 'mask_input', e.target.value)} + size="small" + sx={{ width: '100%' }} + /> + + + + handleConfigChange('dataset', 'mask_response', e.target.value)} + size="small" + sx={{ width: '100%' }} + /> + + + + handleConfigChange('dataset', 'data_preprocess_neural_chat', e.target.checked)} + /> + } + label="Use neural_chat for data preprocess type" + size="small" + sx={{ mt: 0 }} + /> + + + )} + + {/* Reasoning task dataset config (mirrors instruction tuning controls) */} + {formData.general.task === 'reasoning' && ( + + + {/* Comma-separated keys field that maps to array */} + { + const raw = e.target.value + // update raw input so trailing separators are preserved while typing + handleConfigChange('dataset', 'reasoning_dataset_keys_input', raw) + // allow comma or whitespace as separators to derive the array + const arr = raw.split(/[,\s]+/).map(s => s.trim()).filter(Boolean) + handleConfigChange('dataset', 'reasoning_dataset_keys', arr) + }} + size="small" + fullWidth + /> + + {/* Numeric fields: inline+scroll on small screens, 3-column fluid layout on md+ (no scrollbar) */} + + + handleConfigChange('dataset', 'max_length', e.target.value)} + size="small" + sx={{ width: '100%' }} + fullWidth + /> + + + + handleConfigChange('dataset', 'max_source_length', e.target.value)} + size="small" + sx={{ width: '100%' }} + fullWidth + /> + + + + handleConfigChange('dataset', 'max_prompt_length', e.target.value)} + size="small" + sx={{ width: '100%' }} + fullWidth + /> + + + + {/* 2-column responsive CSS grid for short-value fields */} + + + handleConfigChange('dataset', 'padding_side', e.target.value)} + size="small" + sx={{ width: '100%', maxWidth: 240 }} + /> + + + + handleConfigChange('dataset', 'truncation_side', e.target.value)} + size="small" + sx={{ width: '100%', maxWidth: 240 }} + /> + + + + handleConfigChange('dataset', 'padding', e.target.value)} + size="small" + sx={{ width: '100%', maxWidth: 240 }} + /> + + + + handleConfigChange('dataset', 'truncation', e.target.value)} + size="small" + sx={{ width: '100%', maxWidth: 240 }} + /> + + + + handleConfigChange('dataset', 'mask_input', e.target.value)} + size="small" + sx={{ width: '100%', maxWidth: 240 }} + /> + + + + handleConfigChange('dataset', 'mask_response', e.target.value)} + size="small" + sx={{ width: '100%', maxWidth: 240 }} + /> + + + + + )} + + {/* Pretraining task dataset config: minimal fields (max_length, truncation, padding) */} + {formData.general.task === 'pretraining' && ( + + + + + handleConfigChange('dataset', 'max_length', e.target.value)} + size="small" + sx={{ width: '100%' }} + fullWidth + /> + + + + handleConfigChange('dataset', 'block_size', e.target.value)} + size="small" + sx={{ width: '100%' }} + /> + + + + handleConfigChange('dataset', 'padding', e.target.value)} + size="small" + sx={{ width: '100%' }} + /> + + + + handleConfigChange('dataset', 'truncation', e.target.value)} + size="small" + sx={{ width: '100%' }} + /> + + + + + + )} + + {/* Rerank task dataset config */} + {formData.general.task === 'rerank' && ( + + + + + handleConfigChange('dataset', 'max_length', e.target.value)} + size="small" + fullWidth + /> + + + handleConfigChange('dataset', 'train_group_size', e.target.value)} + size="small" + fullWidth + /> + + + + + )} + + {/* Embedding task dataset config */} + {formData.general.task === 'embedding' && ( + + + handleConfigChange('dataset', 'query_instruction_for_retrieval', e.target.value)} + size="small" + fullWidth + /> + + handleConfigChange('dataset', 'passage_instruction_for_retrieval', e.target.value)} + size="small" + fullWidth + /> + + + + handleConfigChange('dataset', 'query_max_len', e.target.value)} + size="small" + fullWidth + /> + + + handleConfigChange('dataset', 'passage_max_len', e.target.value)} + size="small" + fullWidth + /> + + + + handleConfigChange('dataset', 'padding', e.target.value)} + size="small" + fullWidth + /> + + handleConfigChange('dataset', 'train_group_size', e.target.value)} + size="small" + fullWidth + /> + + + )} + + {/* DPO task dataset config: max_length, max_prompt_length, pad_to_max */} + {formData.general.task === 'dpo' && ( + + + + + handleConfigChange('dataset', 'max_length', e.target.value)} + size="small" + sx={{ width: '100%' }} + fullWidth + /> + + + + handleConfigChange('dataset', 'max_prompt_length', e.target.value)} + size="small" + sx={{ width: '100%' }} + fullWidth + /> + + + + handleConfigChange('dataset', 'pad_to_max', e.target.value)} + size="small" + sx={{ width: '100%' }} + /> + + + + + )} + + + + {/* Right Column: Training Dataset & Training Parameters */} + + + {/* Training Dataset Upload */} + + handleFileUpload('trainingDataset', file)} + acceptedTypes={['.json', '.jsonl', '.csv']} + maxSizeMB={100} + error={errors.trainingDataset} + /> + + + {/* Training Parameters */} + + + {/* compact grid similar to task-type configs */} + + + handleConfigChange('training', 'epochs', parseInt(e.target.value))} + error={!!errors.epochs} + inputProps={{ min: 1, step: 1 }} + size="small" + fullWidth + sx={{ '& .MuiInputBase-root': { minHeight: 40 } }} + /> + + + handleConfigChange('training', 'batch_size', parseInt(e.target.value))} + error={!!errors.batch_size} + inputProps={{ min: 1, max: 256, step: 1 }} + size="small" + fullWidth + sx={{ '& .MuiInputBase-root': { minHeight: 40 } }} + /> + + + + + + handleConfigChange('training', 'learning_rate', parseFloat(e.target.value))} + error={!!errors.learning_rate} + inputProps={{ min: 0.00001, max: 0.01, step: 0.00001 }} + size="small" + fullWidth + sx={{ '& .MuiInputBase-root': { minHeight: 40 } }} + /> + + + handleConfigChange('training', 'max_train_steps', e.target.value ? parseInt(e.target.value) : null)} + inputProps={{ min: 1, step: 1 }} + size="small" + fullWidth + sx={{ '& .MuiInputBase-root': { minHeight: 40 } }} + /> + + + + + + handleConfigChange('training', 'optimizer', e.target.value)} + size="small" + fullWidth + sx={{ '& .MuiInputBase-root': { minHeight: 40 } }} + /> + + + handleConfigChange('training', 'gradient_accumulation_steps', parseInt(e.target.value))} + inputProps={{ min: 1, step: 1 }} + size="small" + fullWidth + sx={{ '& .MuiInputBase-root': { minHeight: 40 } }} + /> + + + + + + + Device + + + + + + Mixed Precision + + + + + + Accelerate Mode + + + + + + + + handleConfigChange('training', 'weight_decay', parseFloat(e.target.value))} + inputProps={{ min: 0, step: 0.01 }} + size="small" + fullWidth + sx={{ '& .MuiInputBase-root': { minHeight: 40 } }} + /> + + + handleConfigChange('training', 'logging_steps', parseInt(e.target.value))} + error={!!errors.logging_steps} + inputProps={{ min: 1, step: 1 }} + size="small" + fullWidth + sx={{ '& .MuiInputBase-root': { minHeight: 40 } }} + /> + + + handleConfigChange('training', 'lr_scheduler', e.target.value)} + size="small" + fullWidth + sx={{ '& .MuiInputBase-root': { minHeight: 40 } }} + /> + + + + + + {}} + InputProps={{ readOnly: true }} + inputProps={{ min: 1, step: 1, 'aria-readonly': true }} + disabled + size="small" + fullWidth + sx={{ '& .MuiInputBase-root': { minHeight: 40 } }} + /> + + + handleConfigChange('training', 'dpo_beta', parseFloat(e.target.value))} + inputProps={{ min: 0, step: 0.01 }} + size="small" + fullWidth + sx={{ '& .MuiInputBase-root': { minHeight: 40 } }} + /> + + + + {formData.general.task === 'embedding' ? ( + + + + handleConfigChange('training', 'embedding_training_config', { + ...formData.training.embedding_training_config, + temperature: e.target.value === '' ? null : parseFloat(e.target.value) + })} + inputProps={{ step: 0.01 }} + size="small" + fullWidth + /> + + + handleConfigChange('training', 'embedding_training_config', { + ...formData.training.embedding_training_config, + sentence_pooling_method: e.target.value + })} + size="small" + fullWidth + /> + + + + handleConfigChange('training', 'embedding_training_config', { + ...formData.training.embedding_training_config, + normalized: e.target.checked + })} + /> + } + label="Normalized embeddings" + /> + + handleConfigChange('training', 'embedding_training_config', { + ...formData.training.embedding_training_config, + use_inbatch_neg: e.target.checked + })} + /> + } + label="Use in-batch negatives" + /> + + ) : null } + + + + + + + + + + + + + ) +} + +FinetuningJobModal.propTypes = { + open: PropTypes.bool.isRequired, + onClose: PropTypes.func.isRequired, + onJobCreated: PropTypes.func.isRequired +} + +export default FinetuningJobModal \ No newline at end of file diff --git a/studio-frontend/packages/ui/src/views/finetuning/FinetuningJobsTable.jsx b/studio-frontend/packages/ui/src/views/finetuning/FinetuningJobsTable.jsx new file mode 100644 index 0000000..7137603 --- /dev/null +++ b/studio-frontend/packages/ui/src/views/finetuning/FinetuningJobsTable.jsx @@ -0,0 +1,782 @@ +import { useState, useMemo, useEffect, useRef } from 'react' +import { useSelector } from 'react-redux' +import PropTypes from 'prop-types' + +// material-ui +import { + Box, + Button, + Chip, + LinearProgress, + Paper, + Stack, + Table, + TableBody, + TableCell, + TableContainer, + TableHead, + TableRow, + TableSortLabel, + Typography, + IconButton, + Tooltip, + Menu, + MenuItem, + Dialog, + DialogTitle, + DialogContent, + DialogActions +} from '@mui/material' +import { useTheme, styled } from '@mui/material/styles' +import { tableCellClasses } from '@mui/material/TableCell' +import { CircularProgress } from '@mui/material' + +// icons +import { IconDots, IconEye, IconTrash, IconDownload, IconPlayerStop } from '@tabler/icons-react' + +// API +import finetuningApi from '@/api/finetuning' + +// utils - format created date as 'MonthName DayOrdinal, Year' e.g. 'September 4th, 2025' +const formatDate = (date) => { + if (!date) return 'Unknown' + let dt + try { + if (typeof date === 'number') { + dt = date < 1e12 ? new Date(date * 1000) : new Date(date) + } else if (typeof date === 'string' && /^\d+$/.test(date)) { + const n = parseInt(date, 10) + dt = n < 1e12 ? new Date(n * 1000) : new Date(n) + } else { + dt = new Date(date) + } + if (isNaN(dt.getTime())) return 'Unknown' + + const month = dt.toLocaleString('default', { month: 'long' }) + const day = dt.getDate() + const year = dt.getFullYear() + + const ordinal = (n) => { + const s = ["th", "st", "nd", "rd"] + const v = n % 100 + return s[(v - 20) % 10] || s[v] || s[0] + } + + return `${month} ${day}${ordinal(day)}, ${year}` + } catch (e) { + return 'Unknown' + } +} + +const StyledTableCell = styled(TableCell)(({ theme }) => ({ + borderColor: theme.palette.grey[900] + 25, + + [`&.${tableCellClasses.head}`]: { + color: theme.palette.grey[900] + }, + [`&.${tableCellClasses.body}`]: { + fontSize: 14, + height: 64 + } +})) + +const StyledTableRow = styled(TableRow)(() => ({ + '&:last-child td, &:last-child th': { + border: 0 + } +})) + +const FinetuningJobsTable = ({ data, isLoading = false, onRefresh = null, filterFunction = null }) => { + const theme = useTheme() + const customization = useSelector((state) => state.customization) + // sorting + const [order, setOrder] = useState('asc') + const [orderBy, setOrderBy] = useState('createdDate') + const handleRequestSort = (property) => { + const isAsc = orderBy === property && order === 'asc' + setOrder(isAsc ? 'desc' : 'asc') + setOrderBy(property) + } + // sorted data + const sortedData = useMemo(() => { + if (!data) return [] + return [...data].sort((a, b) => { + let cmp = 0 + if (orderBy === 'id') { + cmp = String(a.id).localeCompare(String(b.id)) + } else if (orderBy === 'createdDate') { + cmp = new Date(a.createdDate) - new Date(b.createdDate) + } else { + cmp = 0 + } + return order === 'asc' ? cmp : -cmp + }) + }, [data, order, orderBy]) + const [anchorEl, setAnchorEl] = useState(null) + const [selectedJob, setSelectedJob] = useState(null) + const [actionLoading, setActionLoading] = useState(false) + // Track multiple concurrent downloads: { [jobId]: { progress: number } } + const [downloadingJobs, setDownloadingJobs] = useState({}) + const [downloadDialogOpen, setDownloadDialogOpen] = useState(false) + const [downloadProgress, setDownloadProgress] = useState(0) + const [detailsOpen, setDetailsOpen] = useState(false) + const [detailsData, setDetailsData] = useState(null) + const [logsOpen, setLogsOpen] = useState(false) + const [logsData, setLogsData] = useState('') + const [logsLoading, setLogsLoading] = useState(false) + const logsContainerRef = useRef(null) + + // Auto-refresh logs every 3 seconds when logs dialog is open + useEffect(() => { + if (!logsOpen || !selectedJob) return + + const fetchLogs = async () => { + try { + const response = await finetuningApi.getJobLogs(selectedJob.id) + const body = response.data + // Support two shapes: { logs: string } or raw string + if (body && typeof body === 'object' && 'logs' in body) { + // If the service provided an error message, prefer showing that when logs are empty + const logsStr = body.logs || '' + if (!logsStr && body.error) { + setLogsData(`Error: ${body.error}`) + } else { + setLogsData(normalizeLogs(logsStr)) + } + } else if (typeof body === 'string') { + setLogsData(normalizeLogs(body)) + } else { + setLogsData(JSON.stringify(body, null, 2)) + } + } catch (error) { + console.error('Error auto-refreshing logs:', error) + } + } + + // Initial fetch when dialog opens + fetchLogs() + + // Set up interval for auto-refresh every 5 seconds + const intervalId = setInterval(fetchLogs, 5000) + + return () => clearInterval(intervalId) + }, [logsOpen, selectedJob]) + + // When logs dialog opens or logsData changes, scroll to bottom + useEffect(() => { + if (!logsOpen) return + // scroll after next paint to ensure content is rendered + const id = setTimeout(() => { + try { + const el = logsContainerRef.current + if (el) { + el.scrollTop = el.scrollHeight + } + } catch (e) { + // ignore + } + }, 0) + return () => clearTimeout(id) + }, [logsOpen, logsData]) + + const handleMenuClick = (event, job) => { + setAnchorEl(event.currentTarget) + setSelectedJob(job) + } + + const handleMenuClose = () => { + setAnchorEl(null) + setSelectedJob(null) + } + + const handleCancelJob = async () => { + if (!selectedJob) return + + setActionLoading(true) + try { + await finetuningApi.cancelJob(selectedJob.id) + handleMenuClose() + if (onRefresh) onRefresh() + } catch (error) { + console.error('Error canceling job:', error) + alert('Failed to cancel job: ' + (error.message || 'Unknown error')) + } finally { + setActionLoading(false) + } + } + + const handleDownloadFinetuningOutput = async (job) => { + if (!job) { + alert('Job is required') + return + } + + const id = String(job.id) + setDownloadProgress(0) + // mark this job as preparing; show dialog (user can close dialog without cancelling) + setDownloadingJobs((prev) => ({ ...(prev || {}), [id]: { progress: 0 } })) + setDownloadDialogOpen(true) + + // Persist pending download so we can recover on page refresh + try { + const pending = JSON.parse(sessionStorage.getItem('ft_pending_downloads') || '[]') + if (!pending.includes(id)) { + pending.push(id) + sessionStorage.setItem('ft_pending_downloads', JSON.stringify(pending)) + } + } catch (e) { + // ignore sessionStorage errors + } + + // Use WebSocket-based download for non-blocking zip creation + const cleanup = finetuningApi.downloadFinetuningOutputWS(job.id, { + onProgress: (data) => { + // Update UI to show preparation is in progress + setDownloadingJobs((prev) => ({ + ...(prev || {}), + [id]: { progress: 0, status: data.status, message: data.message } + })) + }, + onComplete: async (data) => { + // File is ready - trigger native browser download + // No authentication needed (endpoint is whitelisted) + const downloadUrl = data.downloadUrl || `/api/v1/finetuning/download-ft/${job.id}` + console.log('Starting native browser download:', downloadUrl) + + // Use window.location.href to trigger native browser download + // Browser will show download in download manager with progress bar + window.location.href = downloadUrl + + // Remove from pending list + try { + const pending = JSON.parse(sessionStorage.getItem('ft_pending_downloads') || '[]') + const filtered = (pending || []).filter((x) => x !== id) + sessionStorage.setItem('ft_pending_downloads', JSON.stringify(filtered)) + } catch (e) {} + + // Mark this job finished and close dialog + setDownloadingJobs((prev) => ({ ...(prev || {}), [id]: { progress: 100 } })) + setDownloadProgress(100) + setTimeout(() => { + setDownloadingJobs((prev) => { + const copy = { ...(prev || {}) } + delete copy[id] + return copy + }) + setDownloadDialogOpen(false) + }, 800) + }, + onError: (data) => { + console.error('Download preparation error:', data) + alert('Failed to prepare download: ' + (data.error || 'Unknown error')) + // Remove from pending list + try { + const pending = JSON.parse(sessionStorage.getItem('ft_pending_downloads') || '[]') + const filtered = (pending || []).filter((x) => x !== id) + sessionStorage.setItem('ft_pending_downloads', JSON.stringify(filtered)) + } catch (e) {} + + // Clear downloading state + setDownloadingJobs((prev) => { + const copy = { ...(prev || {}) } + delete copy[id] + return copy + }) + setDownloadProgress(0) + setActionLoading(false) + setDownloadDialogOpen(false) + } + }) + + // Store cleanup function to allow cancellation if needed + // (optional enhancement: you could add a cancel button to call this) + window._ftDownloadCleanup = cleanup + } + + const handleViewLogs = async (jobArg = null) => { + const jobToUse = jobArg || selectedJob + if (!jobToUse) return + + // ensure selectedJob is set for downstream operations + setSelectedJob(jobToUse) + + // Clear any existing logs data and show loading + setLogsLoading(true) + + // Open the dialog - the auto-refresh effect will handle fetching logs + setLogsOpen(true) + // Close the menu but keep selectedJob set so auto-refresh can use it + setAnchorEl(null) + + // Stop loading indicator after a brief moment as auto-refresh takes over + setTimeout(() => setLogsLoading(false), 500) + } + + // Normalize logs string: + // - decode common escaped sequences ("\\n", "\\r", "\\t", "\\uXXXX", "\\xHH") + // - convert escaped ESC sequences into the real ESC character + // - strip ANSI escape sequences (colors / CSI sequences) + // - remove C0 control chars except newline, carriage return and tab + const normalizeLogs = (raw) => { + if (!raw && raw !== 0) return '' + try { + let s = String(raw) + + // Iteratively decode escaped sequences up to a safe depth (handles double-escaped strings) + for (let i = 0; i < 6; i++) { + const prev = s + // common escapes + s = s.replace(/\\r/g, '\r').replace(/\\n/g, '\n').replace(/\\t/g, '\t') + // hex and unicode escapes + s = s.replace(/\\x([0-9A-Fa-f]{2})/g, (_, h) => String.fromCharCode(parseInt(h, 16))) + s = s.replace(/\\u([0-9A-Fa-f]{4})/g, (_, h) => String.fromCharCode(parseInt(h, 16))) + // octal-ish common sequence for ESC + s = s.replace(/\\0?33/g, '\x1b') + if (s === prev) break + } + + // Replace any textual \u001b or \x1b left as two-char sequences with actual ESC char + s = s.replace(/\\u001b/g, '\x1b').replace(/\\x1b/g, '\x1b') + // Also replace literal textual backslash-u forms that may have survived + s = s.replace(/\u001b/g, '\x1b') + + // Now convert the string '\x1b' into the real ESC character + s = s.replace(/\\x1b/g, '\x1b') + s = s.replace(/\x1b/g, '\x1b') + // If we have the two-character sequence \x1b, coerce into the actual character + s = s.replace(/\\x1b/g, '\x1b') + // Best-effort: turn any remaining textual '\u001b' into the ESC char + s = s.replace(/\\u001b/g, '\x1b') + + // Finally replace the textual token '\x1b' with the actual ESC character + s = s.replace(/\\x1b/g, '\x1b') + s = s.replace(/\x1b/g, String.fromCharCode(27)) + + // Remove ANSI CSI/SGR sequences (ESC [ ... letters) + s = s.replace(/\x1b\[[0-9;=?]*[A-Za-z]/g, '') + // Remove OSC sequences ESC ] ... BEL or ESC \ + s = s.replace(/\x1b\][^\x1b]*?(\x07|\x1b\\)/g, '') + // Remove any leftover ESC followed by non-printable run + s = s.replace(/\x1b[^\n\r]*/g, '') + + // Remove C0 control chars except newline (10), carriage return (13) and tab (9) + s = s.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, '') + + // Normalize CR to newline so progress-carriage returns become visible lines + s = s.replace(/\r\n/g, '\n').replace(/\r/g, '\n') + + // Collapse repeated blank lines a bit (optional) + s = s.replace(/\n{3,}/g, '\n\n') + + // Trim trailing whitespace on each line but keep indentation + s = s.split('\n').map((l) => l.replace(/[\s\u00A0]+$/u, '')).join('\n') + + return s + } catch (e) { + // On any failure just return the original string representation + return String(raw) + } + } + + const handleViewDetails = async () => { + if (!selectedJob) return + + setActionLoading(true) + try { + const response = await finetuningApi.getJob(selectedJob.id) + setDetailsData(response.data) + setDetailsOpen(true) + handleMenuClose() + } catch (error) { + console.error('Error fetching job details:', error) + alert('Failed to fetch job details: ' + (error.message || 'Unknown error')) + } finally { + setActionLoading(false) + } + } + + // On mount: re-establish any pending download WS connections saved in sessionStorage + useEffect(() => { + try { + const pending = JSON.parse(sessionStorage.getItem('ft_pending_downloads') || '[]') + if (Array.isArray(pending) && pending.length > 0) { + // For each pending job id, re-attach a download WS to get status + pending.forEach((jobId) => { + // avoid duplicate entries in state + if (!downloadingJobs || !downloadingJobs[jobId]) { + setDownloadingJobs((prev) => ({ ...(prev || {}), [jobId]: { progress: 0 } })) + } + finetuningApi.downloadFinetuningOutputWS(jobId, { + onProgress: (data) => { + setDownloadingJobs((prev) => ({ ...(prev || {}), [jobId]: { progress: 0, status: data.status, message: data.message } })) + setDownloadDialogOpen(true) + }, + onComplete: (data) => { + // Trigger native download + const downloadUrl = data.downloadUrl || `/api/v1/finetuning/download-ft/${jobId}` + window.location.href = downloadUrl + // cleanup pending + try { + const pending2 = JSON.parse(sessionStorage.getItem('ft_pending_downloads') || '[]') + const filtered = (pending2 || []).filter((x) => x !== jobId) + sessionStorage.setItem('ft_pending_downloads', JSON.stringify(filtered)) + } catch (e) {} + setDownloadingJobs((prev) => { + const copy = { ...(prev || {}) } + delete copy[jobId] + return copy + }) + setDownloadDialogOpen(false) + }, + onError: (data) => { + console.error('Recovered download preparation error:', data) + try { + const pending2 = JSON.parse(sessionStorage.getItem('ft_pending_downloads') || '[]') + const filtered = (pending2 || []).filter((x) => x !== jobId) + sessionStorage.setItem('ft_pending_downloads', JSON.stringify(filtered)) + } catch (e) {} + setDownloadingJobs((prev) => { + const copy = { ...(prev || {}) } + delete copy[jobId] + return copy + }) + setDownloadDialogOpen(false) + } + }) + }) + } + } catch (e) { + // ignore sessionStorage parse errors + } + }, []) + + const getStatusColor = (status) => { + switch (status?.toLowerCase()) { + case 'completed': + case 'succeeded': + return 'success' + case 'running': + return 'primary' + case 'failed': + return 'error' + case 'pending': + return 'default' + default: + return 'default' + } + } + + const getProgressValue = (progress) => { + if (typeof progress === 'string' && progress.includes('%')) { + return parseInt(progress.replace('%', '')) + } + return progress || 0 + } + + // Only allow downloads when job status indicates completion/success + const isDownloadableStatus = (status) => { + if (!status) return false + const s = String(status).toLowerCase() + return s === 'succeeded' + } + + if (isLoading) { + return ( + + + + Loading fine-tuning jobs... + + + ) + } + + if (!data || data.length === 0) { + return ( + + + No fine-tuning jobs found + + + ) + } + + const visibleData = useMemo(() => { + if (!sortedData || sortedData.length === 0) return [] + if (typeof filterFunction === 'function') { + try { + return sortedData.filter(filterFunction) + } catch (e) { + console.error('Error in filterFunction:', e) + return sortedData + } + } + return sortedData + }, [sortedData, filterFunction]) + + return ( + <> + {/* toolbar is provided by parent*/} + + + + + + + handleRequestSort('id')} + > + ID + + + Status + Model + Task + Dataset + Output + Logs + Actions + + handleRequestSort('createdDate')} + > + Created Date + + + + + + {visibleData.length === 0 ? ( + + + No fine-tuning jobs match the current filter + + + ) : ( + visibleData.map((job) => ( + + + + {job.id} + + + + {/* Status with blinking indicator when running; show Chip only for other statuses */} + {(() => { + const s = String(job.status || '').toLowerCase() + return (s === 'running' || s === 'pending') ? ( + + + {job.status} + + ) : ( + + ) + })()} + + + + + {job.model || 'N/A'} + + + + + {job.task || job.task_type || job.taskType || 'N/A'} + + + + + {job.dataset || 'N/A'} + + + + {(() => { + const jid = String(job.id) + const isPreparing = Boolean(downloadingJobs && downloadingJobs[jid]) + return ( + + + handleDownloadFinetuningOutput(job)} + disabled={ + actionLoading || + isPreparing || + !isDownloadableStatus(job.status) + } + title={isPreparing ? 'Preparing download' : 'Download fine-tuning output'} + > + {isPreparing ? ( + + ) : ( + + )} + + + + ) + })()} + + + + handleViewLogs(job)} title="View Logs"> + + + + + + handleMenuClick(e, job)} + > + + + + + + {job.createdDate ? formatDate(job.createdDate) : 'Unknown'} + + + + )) + )} + +
+
+ + + + View Details + + + + Cancel Job + + { + if (!selectedJob) return + if (!window.confirm('Are you sure you want to delete this job?')) return + setActionLoading(true) + try { + await finetuningApi.deleteJob(selectedJob.id) + handleMenuClose() + if (onRefresh) onRefresh() + } catch (error) { + console.error('Error deleting job:', error) + alert('Failed to delete job: ' + (error.message || 'Unknown error')) + } finally { + setActionLoading(false) + } + }} + disabled={actionLoading} + > + + Delete Job + + + + {/* Preparing Download Dialog */} + setDownloadDialogOpen(false)} maxWidth="xs" fullWidth> + Preparing download + + + The server is preparing the job output for download. This may take a few moments for large outputs. + + 0 ? 'determinate' : 'indeterminate'} value={downloadProgress} /> + + + + + + + {/* Details Dialog */} + setDetailsOpen(false)} maxWidth="md" fullWidth> + Job Details + + {detailsData ? ( +
{JSON.stringify(detailsData, null, 2)}
+ ) : ( + No details available + )} +
+ + + +
+ + {/* Logs Dialog */} + { + setLogsOpen(false) + // clear selected job when dialog closes to avoid stale selection + setSelectedJob(null) + }} + maxWidth="lg" + fullWidth + > + Job Logs + + {logsLoading ? ( + Loading logs... + ) : ( +
+
{logsData || 'No logs available'}
+
+ )} +
+ + + +
+ + ) +} + +FinetuningJobsTable.propTypes = { + data: PropTypes.array.isRequired, + isLoading: PropTypes.bool, + onRefresh: PropTypes.func, + filterFunction: PropTypes.func +} + +export default FinetuningJobsTable \ No newline at end of file diff --git a/studio-frontend/packages/ui/src/views/finetuning/index.jsx b/studio-frontend/packages/ui/src/views/finetuning/index.jsx new file mode 100644 index 0000000..617da61 --- /dev/null +++ b/studio-frontend/packages/ui/src/views/finetuning/index.jsx @@ -0,0 +1,311 @@ +import { useEffect, useState, useRef } from 'react' +import { useNavigate } from 'react-router-dom' + +// material-ui +import { Box, Skeleton, Stack, ToggleButton, ToggleButtonGroup, Typography, Input } from '@mui/material' +import { useTheme } from '@mui/material/styles' + +// project imports +import MainCard from '@/ui-component/cards/MainCard' +import ItemCard from '@/ui-component/cards/ItemCard' +import { gridSpacing } from '@/store/constant' +import WorkflowEmptySVG from '@/assets/images/workflow_empty.svg' +import LoginDialog from '@/ui-component/dialog/LoginDialog' +import ConfirmDialog from '@/ui-component/dialog/ConfirmDialog' +import { StyledButton } from '@/ui-component/button/StyledButton' +import ViewHeader from '@/layout/MainLayout/ViewHeader' +import ErrorBoundary from '@/ErrorBoundary' +import FinetuningJobsTable from './FinetuningJobsTable' +import FinetuningJobModal from './FinetuningJobModal' + +// API +import finetuningApi from '@/api/finetuning' + +// Hooks +import useApi from '@/hooks/useApi' + +// icons +import { IconPlus, IconLayoutGrid, IconList, IconSearch } from '@tabler/icons-react' + +//keycloak +import { useKeycloak } from '../../KeycloakContext' + +// ==============================|| Fine-tuning ||============================== // + +const Finetuning = () => { + const keycloak = useKeycloak() + const navigate = useNavigate() + const theme = useTheme() + + const [isLoading, setLoading] = useState(true) + const [error, setError] = useState(null) + const [search, setSearch] = useState('') + const [loginDialogOpen, setLoginDialogOpen] = useState(false) + const [loginDialogProps, setLoginDialogProps] = useState({}) + const [jobs, setJobs] = useState([]) + const [jobModalOpen, setJobModalOpen] = useState(false) + + let userRole = keycloak?.tokenParsed?.resource_access?.genaistudio?.roles[0] + let getAllJobsApi = null + + if (keycloak.authenticated) { + getAllJobsApi = useApi(finetuningApi.getAllJobs) + } + + useEffect(() => { + loadJobs() + + // Cleanup all WebSocket connections on unmount + return () => { + Object.keys(jobSocketsRef.current).forEach(jobId => { + if (jobSocketsRef.current[jobId]) { + jobSocketsRef.current[jobId]() + } + }) + jobSocketsRef.current = {} + } + }, []) + + // Store cleanup functions for each job's WebSocket connection + const jobSocketsRef = useRef({}) + + // Function to start monitoring a specific job + const startJobMonitoring = (jobId) => { + // Don't create duplicate connections + if (jobSocketsRef.current[jobId]) { + return + } + + // Subscribe to this job's status updates + const cleanup = finetuningApi.subscribeToJobStatus(jobId, { + onUpdate: (jobData) => { + // Update the job in state + setJobs(prev => prev.map(j => + j.id === jobData.id ? { ...j, ...jobData } : j + )) + }, + onError: (error) => { + console.error(`[Job ${jobId}] WebSocket error:`, error) + } + }) + + // Store cleanup function + jobSocketsRef.current[jobId] = cleanup + } + + // Function to stop monitoring a specific job + const stopJobMonitoring = (jobId) => { + if (jobSocketsRef.current[jobId]) { + jobSocketsRef.current[jobId]() + delete jobSocketsRef.current[jobId] + } + } + + const loadJobs = async () => { + if (!getAllJobsApi) return + + try { + setLoading(true) + const response = await getAllJobsApi.request() + // Normalize server objects (TypeORM entities or external API objects) + const normalizeJob = (j) => { + if (!j) return null + const id = j.id || j.job_id || j.fine_tuning_job_id || String(Date.now()) + const name = j.name || id + const status = j.status || j.state || 'pending' + const model = j.model || 'N/A' + const dataset = j.dataset || j.training_file || j.trainingFile || 'N/A' + const progress = typeof j.progress === 'number' ? `${j.progress}%` : (j.progress || '0%') + const createdDate = j.createdDate || j.created_at || j.createdAt || new Date().toISOString() + return { + ...j, + id, + name, + status, + model, + dataset, + progress, + createdDate + } + } + + const jobsData = Array.isArray(response) ? response.map(normalizeJob).filter(Boolean) : [] + setJobs(jobsData) + setLoading(false) + + // Start monitoring any active jobs + const activeStatuses = ['pending', 'validating_files', 'running'] + jobsData.forEach(job => { + const status = (job?.status || '').toString().toLowerCase() + if (activeStatuses.includes(status)) { + startJobMonitoring(job.id) + } + }) + } catch (error) { + console.error('Error loading fine-tuning jobs:', error) + setJobs([]) + setError(error) + setLoading(false) + } + } + + const handleCreateJob = () => { + try { + if (document.activeElement instanceof HTMLElement) { + document.activeElement.blur() + } + } catch (e) { + // ignore in non-browser environments + } + setTimeout(() => setJobModalOpen(true), 0) + } + + const handleJobCreated = (newJob) => { + setJobs(prev => [...prev, newJob]) + setJobModalOpen(false) + + // Start monitoring the new job if it's in an active state + const activeStatuses = ['pending', 'validating_files', 'running'] + const status = (newJob?.status || '').toString().toLowerCase() + if (activeStatuses.includes(status)) { + startJobMonitoring(newJob.id) + } + } + + const onSearchChange = (event) => { + setSearch(event.target.value) + } + + // Predicate function used by FinetuningJobsTable to show/hide rows + const filterJobs = (job) => { + if (!search || search.trim() === '') return true + const q = search.toLowerCase() + const id = (job?.id || '').toString().toLowerCase() + const name = (job?.name || '').toString().toLowerCase() + const model = (job?.model || '').toString().toLowerCase() + const dataset = (job?.dataset || job?.training_file || '').toString().toLowerCase() + const task = (job?.task || job?.task_type || job?.taskType || '').toString().toLowerCase() + const status = (job?.status || '').toString().toLowerCase() + return id.includes(q) || name.includes(q) || model.includes(q) || dataset.includes(q) || task.includes(q) || status.includes(q) + } + + return ( + <> + + + + + Fine-tuning Jobs + + + + } + sx={{ borderRadius: 2, height: 40 }} + > + Create New Job + + + + + + + } + type='search' + /> + +
+ + {isLoading ? ( + + + + + + ) : ( + + {jobs.length === 0 ? ( + + + WorkflowEmptySVG + +

No Fine-tuning Jobs Yet

+ Create your first fine-tuning job to get started! +
+
+
+ ) : ( + + )} +
+ )} + + + + setJobModalOpen(false)} + onJobCreated={handleJobCreated} + /> + + setLoginDialogOpen(false)} + onConfirm={() => setLoginDialogOpen(false)} + /> + + ) +} + +export default Finetuning \ No newline at end of file diff --git a/studio-frontend/packages/ui/src/views/opeaflows/index.jsx b/studio-frontend/packages/ui/src/views/opeaflows/index.jsx index 1beefee..5e7e28f 100644 --- a/studio-frontend/packages/ui/src/views/opeaflows/index.jsx +++ b/studio-frontend/packages/ui/src/views/opeaflows/index.jsx @@ -2,7 +2,7 @@ import { useEffect, useState } from 'react' import { useNavigate } from 'react-router-dom' // material-ui -import { Box, Skeleton, Stack, ToggleButton, ToggleButtonGroup } from '@mui/material' +import { Box, Skeleton, Stack, Input, Typography } from '@mui/material' import { useTheme } from '@mui/material/styles' // project imports @@ -27,7 +27,7 @@ import useApi from '@/hooks/useApi' import { baseURL } from '@/store/constant' // icons -import { IconPlus, IconLayoutGrid, IconList } from '@tabler/icons-react' +import { IconPlus, IconLayoutGrid, IconList, IconSearch } from '@tabler/icons-react' //keycloak import { useKeycloak } from '../../KeycloakContext' @@ -161,47 +161,58 @@ const Opeaflows = () => { ) : ( - - {/* + - - - - - - - */} - - - } sx={{ borderRadius: 2, height: 40, width: 250 }}> - Create New Workflow - - } sx={{ borderRadius: 2, height: 40, width: 250 }}> - Import Sample Workflows - + Workflows + + + + } sx={{ borderRadius: 2, height: 40, width: 250 }}> + Create New Workflow + + } sx={{ borderRadius: 2, height: 40, width: 250 }}> + Import Sample Workflows + + + + + + + } + type='search' + /> +
{!view || view === 'card' ? ( <> diff --git a/studio-frontend/packages/ui/src/views/tracer/index.jsx b/studio-frontend/packages/ui/src/views/tracer/index.jsx index f6034f0..d3fa4e9 100644 --- a/studio-frontend/packages/ui/src/views/tracer/index.jsx +++ b/studio-frontend/packages/ui/src/views/tracer/index.jsx @@ -11,6 +11,7 @@ import { Paper, Button, Box, + Stack, Typography, Divider, TablePagination, @@ -170,15 +171,27 @@ export default function LLMTraces() { return ( - // - - - - {workflowName && ( - - Workflow name: {workflowName} + + + + + LLM Call Traces - )} + {workflowName && ( + + Workflow name: {workflowName} + + )} + {traceList.length > 0 ? ( <> Traces: @@ -215,7 +228,7 @@ export default function LLMTraces() { ) : ( No traces found )} - + {selectedTrace && ( diff --git a/studio-frontend/packages/ui/vite.config.js b/studio-frontend/packages/ui/vite.config.js index c987920..a2591b2 100644 --- a/studio-frontend/packages/ui/vite.config.js +++ b/studio-frontend/packages/ui/vite.config.js @@ -9,16 +9,17 @@ export default defineConfig(async ({ mode }) => { const serverEnv = dotenv.config({ processEnv: {}, path: '../server/.env' }).parsed const serverHost = serverEnv?.['HOST'] ?? 'localhost' const serverPort = parseInt(serverEnv?.['PORT'] ?? '3000') - if (!Number.isNaN(serverPort) && serverPort > 0 && serverPort < 65535) { - proxy = { - '/api': { - target: `http://${serverHost}:${serverPort}`, - changeOrigin: true - }, - '/socket.io': { - target: `http://${serverHost}:${serverPort}`, - changeOrigin: true - } + proxy = { + '/api': { + target: `http://${serverHost}:${serverPort}`, + changeOrigin: true, + secure: false + }, + '/socket.io': { + target: `http://${serverHost}:${serverPort}`, + changeOrigin: true, + ws: true, + secure: false } } }