Merge pull request #748 from vishnoianil/doc-conv-flag

nerdalert · web-flow · commit 4655f92f8a2c · 2025-04-26T07:22:33.000-04:00
Add flag to enable or disable knowledge document conversion
diff --git a/.env.github.example b/.env.github.example
@@ -1,5 +1,3 @@
-IL_UI_ADMIN_USERNAME=admin # user/pass for dev mode
-IL_UI_ADMIN_PASSWORD=password
 IL_UI_DEPLOYMENT=github # Start UI stack in github mode.
 OAUTH_GITHUB_ID=<OAUTH_APP_ID>
 OAUTH_GITHUB_SECRET=<OAUTH_APP_SECRET>
@@ -12,14 +10,14 @@ NEXT_PUBLIC_TAXONOMY_DOCUMENTS_REPO=github.com/instructlab-public/taxonomy-knowl
 NEXT_PUBLIC_AUTHENTICATION_ORG=<AUTHENTICATION_ORG>
 NEXT_PUBLIC_TAXONOMY_REPO_OWNER=<GITHUB_ACCOUNT>
 NEXT_PUBLIC_TAXONOMY_REPO=<REPO_NAME>
-
-IL_GRANITE_API=<GRANITE_HOST>
-IL_GRANITE_MODEL_NAME=<GRANITE_MODEL_NAME>
-IL_MERLINITE_API=<MERLINITE_HOST>
-IL_MERLINITE_MODEL_NAME=<MERLINITE_MODEL_NAME>
-
-IL_ENABLE_DEV_MODE=true #Enable this option if you want to enable UI features that helps in development, such as form Auto-Fill feature.
-
 NEXT_PUBLIC_EXPERIMENTAL_FEATURES=false
 
 SLACK_WEBHOOK_URL=
+
+# (Optional) Enable this option if you want to enable UI features that helps in development, such as form Auto-Fill feature.
+# Default: false
+IL_ENABLE_DEV_MODE=false
+
+# (Optional) Enable document conversion. Any non-markdown file will be converted to markdown file
+# Default: false
+IL_ENABLE_DOC_CONVERSION=false
diff --git a/.env.native.example b/.env.native.example
@@ -5,13 +5,25 @@ IL_UI_ADMIN_PASSWORD=password
 NEXTAUTH_SECRET=your_super_secret_random_string
 NEXTAUTH_URL=http://localhost:3000
 
-IL_UI_DEPLOYMENT=native # Two deployment modes are available: github and native
-IL_ENABLE_DEV_MODE=true #Enable this option if you want to enable UI features that helps in development, such as form Auto-Fill feature.
+# (Optional) Enable this option if you want to enable UI features that helps in development, such as form Auto-Fill feature.
+# Default: false
+IL_ENABLE_DEV_MODE=false
 
-NEXT_PUBLIC_TAXONOMY_ROOT_DIR= # Required value. Recommended /<INSERT_HOME_DIRECTORY_PATH>/.instructlab-ui
+# (Optional) Enable document conversion. Any non-markdown file will be converted to markdown file
+# Default: false
+IL_ENABLE_DOC_CONVERSION=false
 
+# (Optional) Document conversion requires docling service to convert the documents.
+# Uncomment and fill in the http://host:port where docling service is running.
+# By default it assuming docling service is running on local host listening on port 5001
+# IL_FILE_CONVERSION_SERVICE=http://localhost:5001
+
+# (Required) Set to the parent directory where taxonomy repo is cloned.
+NEXT_PUBLIC_TAXONOMY_ROOT_DIR=
+
+# (Optional)Enable experiment features like synthetic data generation, training, and chat evaluation.
 NEXT_PUBLIC_EXPERIMENTAL_FEATURES=false
 
-# IL_FILE_CONVERSION_SERVICE=http://localhost:5001 # Uncomment and fill in the http://host:port if the docling conversion service is running.
-# NEXT_PUBLIC_API_SERVER=http://localhost:8080 # Uncomment and point to the URL the api-server is running on. Native mode only and needs to be running on the same host as the UI.
-# NEXT_PUBLIC_MODEL_SERVER_URL=http://x.x.x.x # Used for model chat evaluation vLLM instances. Currently, server side rendering is not supported so the client must have access to this address for model chat evaluation to function in the UI. Currently ports, 8000 & 8001 are hardcoded and why it is not an option to set.
+# (Optional) Uncomment and point to the URL the api-server is running on. Native mode only and needs
+# to be running on the same host as the UI.
+# NEXT_PUBLIC_API_SERVER=http://localhost:8080
diff --git a/deploy/podman/github/README.md b/deploy/podman/github/README.md
@@ -32,6 +32,7 @@ kubectl create secret generic ui-env \
   --from-literal=IL_MERLINITE_API="" \
   --from-literal=IL_MERLINITE_MODEL_NAME="" \
   --from-literal=IL_ENABLE_DEV_MODE=false \
+  --from-literal=IL_ENABLE_DOC_CONVERSION=false \
   --dry-run=client -o yaml > secret.yaml
 ```
 
diff --git a/deploy/podman/github/instructlab-ui.yaml b/deploy/podman/github/instructlab-ui.yaml
@@ -118,6 +118,11 @@ spec:
             secretKeyRef:
               name: ui-env
               key: IL_ENABLE_DEV_MODE
+        - name: IL_ENABLE_DOC_CONVERSION
+          valueFrom:
+            secretKeyRef:
+              name: ui-env
+              key: IL_ENABLE_DOC_CONVERSION
         ports:
         - containerPort: 3000
           hostPort: 3000
diff --git a/deploy/podman/github/secret.yaml.example b/deploy/podman/github/secret.yaml.example
@@ -16,6 +16,7 @@ data:
   IL_MERLINITE_API: ""
   IL_MERLINITE_MODEL_NAME: ""
   IL_ENABLE_DEV_MODE: ""
+  IL_ENABLE_DOC_CONVERSION: ""
 kind: Secret
 metadata:
   creationTimestamp: null
diff --git a/deploy/podman/native/README.md b/deploy/podman/native/README.md
@@ -25,6 +25,7 @@ kubectl create secret generic ui-env \
   --from-literal=NEXT_PUBLIC_TAXONOMY_ROOT_DIR="<TAXONOMY_ROOT_DIR>" \
   --from-literal=NEXT_PUBLIC_EXPERIMENTAL_FEATURES="false" \
   --from-literal=IL_ENABLE_DEV_MODE=false \
+  --from-literal=IL_ENABLE_DOC_CONVERSION=false \
   --dry-run=client -o yaml > secret.yaml
 ```
 
diff --git a/deploy/podman/native/instructlab-ui.yaml b/deploy/podman/native/instructlab-ui.yaml
@@ -83,6 +83,11 @@ spec:
             secretKeyRef:
               name: ui-env
               key: IL_ENABLE_DEV_MODE
+        - name: IL_ENABLE_DOC_CONVERSION
+          valueFrom:
+            secretKeyRef:
+              name: ui-env
+              key: IL_ENABLE_DOC_CONVERSION
         - name: NEXT_PUBLIC_API_SERVER
           valueFrom:
             secretKeyRef:
diff --git a/deploy/podman/native/secret.yaml.example b/deploy/podman/native/secret.yaml.example
@@ -1,14 +1,15 @@
 apiVersion: v1
 data:
-  IL_UI_ADMIN_PASSWORD: <PASSWORD>
-  IL_UI_ADMIN_USERNAME: <USERNAME>
-  IL_UI_DEPLOYMENT: <UI_DEPLOYMENT>
-  IL_ENABLE_DEV_MODE: <DEV_MODE>
-  NEXT_PUBLIC_EXPERIMENTAL_FEATURES: <EXPERIMENTAL_FEATURES>
-  NEXT_PUBLIC_TAXONOMY_ROOT_DIR: <TAXONOMY_ROOT_DIR>
-  NEXTAUTH_URL: <AUTH_URL>
-  NEXTAUTH_SECRET: <AUTH_SECRET>
-  NEXT_PUBLIC_API_SERVER: <API_SERVER_URL>
+  IL_UI_ADMIN_PASSWORD: ""
+  IL_UI_ADMIN_USERNAME: ""
+  IL_UI_DEPLOYMENT: ""
+  IL_ENABLE_DEV_MODE: ""
+  IL_ENABLE_DOC_CONVERSION: ""
+  NEXT_PUBLIC_EXPERIMENTAL_FEATURES: ""
+  NEXT_PUBLIC_TAXONOMY_ROOT_DIR: ""
+  NEXTAUTH_URL: ""
+  NEXTAUTH_SECRET: ""
+  NEXT_PUBLIC_API_SERVER: ""
 
 kind: Secret
 metadata:
diff --git a/src/app/api/envConfig/route.ts b/src/app/api/envConfig/route.ts
@@ -16,6 +16,7 @@ export async function GET() {
     UPSTREAM_REPO_NAME: process.env.NEXT_PUBLIC_TAXONOMY_REPO || '',
     DEPLOYMENT_TYPE: process.env.IL_UI_DEPLOYMENT || '',
     ENABLE_DEV_MODE: process.env.IL_ENABLE_DEV_MODE || 'false',
+    ENABLE_DOC_CONVERSION: process.env.IL_ENABLE_DOC_CONVERSION || 'false',
     EXPERIMENTAL_FEATURES: process.env.NEXT_PUBLIC_EXPERIMENTAL_FEATURES || '',
     TAXONOMY_ROOT_DIR: process.env.NEXT_PUBLIC_TAXONOMY_ROOT_DIR || '',
     TAXONOMY_KNOWLEDGE_DOCUMENT_REPO:
diff --git a/src/components/Contribute/Knowledge/UploadFile.tsx b/src/components/Contribute/Knowledge/UploadFile.tsx
@@ -45,13 +45,23 @@ export const UploadFile: React.FunctionComponent<UploadFileProps> = ({ existingF
   const [showExistingFilesStatus, setExistingFilesStatus] = useState(false);
   const [showOverwriteModal, setShowOverwriteModal] = useState(false);
   const [showFileDeleteModal, setShowFileDeleteModal] = useState(false);
+  const [enableDocConversion, setEnableDocConversion] = useState(false);
   const [fileToDelete, setFileToDelete] = useState<string[]>([]);
   const [filesToOverwrite, setFilesToOverwrite] = useState<File[]>([]);
   const [droppedFiles, setDroppedFiles] = React.useState<File[] | undefined>();
   const [statusIcon, setStatusIcon] = useState<'inProgress' | 'success' | 'danger'>('inProgress');
   const [modalText, setModalText] = useState('');
   React.useContext(MultipleFileUploadContext);
 
+  useEffect(() => {
+    const getEnvVariables = async () => {
+      const res = await fetch('/api/envConfig');
+      const envConfig = await res.json();
+      setEnableDocConversion(envConfig.ENABLE_DOC_CONVERSION === 'true');
+    };
+    getEnvVariables();
+  }, []);
+
   useEffect(() => {
     if (filesToUpload.length > 0) {
       setShowNewFilesStatus(true);
@@ -111,17 +121,19 @@ export const UploadFile: React.FunctionComponent<UploadFileProps> = ({ existingF
     setShowFileDeleteModal(false);
   };
 
-  // Define allowed file types
-  const allowedFileTypes: { [mime: string]: string[] } = {
-    'application/pdf': ['.pdf'],
-    'application/vnd.openxmlformats-officedocument.wordprocessingml.document': ['.docx'],
-    'application/vnd.openxmlformats-officedocument.presentationml.presentation': ['.pptx'],
-    'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': ['.xlsx'],
-    'image/*': ['.png', '.jpg', '.jpeg', '.gif', '.bmp', '.svg'],
-    'text/html': ['.html'],
-    'text/asciidoc': ['.adoc'],
-    'text/markdown': ['.md']
-  };
+  // Define allowed file types. If doc conversion is not enabled, only allow markdown files.
+  const allowedFileTypes: { [mime: string]: string[] } = enableDocConversion
+    ? {
+        'application/pdf': ['.pdf'],
+        'application/vnd.openxmlformats-officedocument.wordprocessingml.document': ['.docx'],
+        'application/vnd.openxmlformats-officedocument.presentationml.presentation': ['.pptx'],
+        'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': ['.xlsx'],
+        'image/*': ['.png', '.jpg', '.jpeg', '.gif', '.bmp', '.svg'],
+        'text/html': ['.html'],
+        'text/asciidoc': ['.adoc'],
+        'text/markdown': ['.md']
+      }
+    : { 'text/markdown': ['.md'] };
 
   // Handle drop (and re-drop) of files
   const handleFileDrop = (_event: DropEvent, files: File[]) => {
@@ -262,7 +274,11 @@ export const UploadFile: React.FunctionComponent<UploadFileProps> = ({ existingF
             <MultiFileUploadArea
               titleIcon={<UploadIcon />}
               titleText="Drag and drop files here or upload"
-              infoText="Accepted file types include PDF, DOCX, PPTX, XLSX, HTML, AsciiDoc, Markdown, and images. All files will be converted to Markdown."
+              infoText={
+                enableDocConversion
+                  ? 'Accepted file types include PDF, DOCX, PPTX, XLSX, HTML, AsciiDoc, Markdown, and images. All files will be converted to Markdown.'
+                  : 'Accepted file type: Markdown'
+              }
               uploadText="Upload from device"
               manualUploadText="Upload from git repository"
               onManualUpload={() => setShowUploadFromGitModal(true)}