From 62ffb1ed325ff0279bcfdb8dbea4a006c65ed3f6 Mon Sep 17 00:00:00 2001 From: Anil Vishnoi Date: Fri, 9 May 2025 12:24:05 -0700 Subject: [PATCH] Add API's for document add/replace/list/get/remove Signed-off-by: Anil Vishnoi --- src/app/api/documents/add/route.ts | 72 +++++++++++++ src/app/api/documents/get/route.ts | 93 ++++++++++++++++ src/app/api/documents/list/route.ts | 102 ++++++++++++++++++ src/app/api/documents/remove/route.ts | 62 +++++++++++ src/app/api/utils.ts | 3 +- .../Knowledge/KnowledgeWizard/UploadFile.tsx | 2 +- .../Contribute/Utils/documentUtils.ts | 4 +- src/types/index.ts | 2 +- 8 files changed, 335 insertions(+), 5 deletions(-) create mode 100644 src/app/api/documents/add/route.ts create mode 100644 src/app/api/documents/get/route.ts create mode 100644 src/app/api/documents/list/route.ts create mode 100644 src/app/api/documents/remove/route.ts diff --git a/src/app/api/documents/add/route.ts b/src/app/api/documents/add/route.ts new file mode 100644 index 00000000..5ce6ec04 --- /dev/null +++ b/src/app/api/documents/add/route.ts @@ -0,0 +1,72 @@ +// src/app/api/documents/add/route.ts + +'use server'; +import { NextRequest, NextResponse } from 'next/server'; +import * as git from 'isomorphic-git'; +import fs from 'fs'; +import path from 'path'; +import { cloneTaxonomyDocsRepo, DOC_POOL_DIR, TAXONOMY_DOCS_ROOT_DIR } from '@/app/api/utils'; +import { devLog } from '@/utils/devlog'; + +/** + * Handler to upload new files and replace existing files to the document pool of taxonomy knowledge doc repo. + */ +export async function POST(req: NextRequest) { + try { + const body = await req.json(); + const { newFiles } = body; + const docsRepoPath = await cloneTaxonomyDocsRepo(); + + // If the repository was not cloned, return an error + if (!docsRepoPath) { + return NextResponse.json({ error: 'Failed to clone taxonomy knowledge docs repository' }, { status: 500 }); + } + + // Checkout the main branch + await git.checkout({ fs, dir: docsRepoPath, ref: 'main' }); + + const subDirectory = DOC_POOL_DIR; + + const newDocsDirPath = path.join(docsRepoPath, subDirectory); + + if (!fs.existsSync(newDocsDirPath)) { + fs.mkdirSync(newDocsDirPath, { recursive: true }); + devLog(`New sub directory ${newDocsDirPath} created successfully.`); + } + + // Write the files to the repository + for (const file of newFiles) { + const filePath = path.join(newDocsDirPath, file.fileName); + devLog(`Writing file to ${filePath} in taxonomy knowledge docs repository.`); + fs.writeFileSync(filePath, file.fileContent); + } + + const filenames = newFiles.map((file: { fileName: string; fileContent: string }) => path.join(subDirectory, file.fileName)); + + // Stage the files + await git.add({ fs, dir: docsRepoPath, filepath: '.' }); + await git.remove({ fs, dir: docsRepoPath, filepath: '.' }); + + // Commit the files + await git.commit({ + fs, + dir: docsRepoPath, + author: { name: 'instructlab-ui', email: 'ui@instructlab.ai' }, + message: `File uploaded: ${filenames}\n\nSigned-off-by: ui@instructlab.ai` + }); + + devLog(`Successfully uploaded following file to taxonomy knowledge docs repository: ${filenames}`); + + const origTaxonomyDocsRepoDir = path.join(TAXONOMY_DOCS_ROOT_DIR, '/taxonomy-knowledge-docs'); + return NextResponse.json( + { + repoUrl: origTaxonomyDocsRepoDir, + documentNames: filenames + }, + { status: 201 } + ); + } catch (error) { + console.error('Failed to upload knowledge file:', error); + return NextResponse.json({ error: 'Failed to upload knowledge file' }, { status: 500 }); + } +} diff --git a/src/app/api/documents/get/route.ts b/src/app/api/documents/get/route.ts new file mode 100644 index 00000000..5f4bea83 --- /dev/null +++ b/src/app/api/documents/get/route.ts @@ -0,0 +1,93 @@ +// src/app/api/documents/get.ts + +'use server'; +import { NextRequest, NextResponse } from 'next/server'; +import * as git from 'isomorphic-git'; +import fs from 'fs'; +import path from 'path'; +import { DOC_POOL_DIR, findTaxonomyDocRepoPath } from '@/app/api/utils'; +import { KnowledgeFile } from '@/types'; + +const BASE_BRANCH = 'main'; + +/** + * Function to retrieve knowledge file content from a document pool. + * @param filename - Name of the file to retrieve + * @returns A KnowledgeFile object with the content + */ +const getKnowledgeFiles = async (filename: string): Promise => { + const REPO_DIR = findTaxonomyDocRepoPath(); + + // Ensure the repository path exists + if (!fs.existsSync(REPO_DIR)) { + throw new Error('Taxonomy knowledge doc repository does not exist. No files present.'); + } + + // Check if the branch exists + const branches = await git.listBranches({ fs, dir: REPO_DIR }); + if (!branches.includes(BASE_BRANCH)) { + throw new Error(`Branch "${BASE_BRANCH}" does not exist.`); + } + + // Checkout the specified branch + await git.checkout({ fs, dir: REPO_DIR, ref: BASE_BRANCH }); + + // check if the file exists in the document pool + const filePath = path.join(REPO_DIR, DOC_POOL_DIR, filename); + + if (!fs.existsSync(filePath)) { + throw new Error(`File "${filename}" does not exist in document pool.`); + } + + try { + // Retrieve the latest commit SHA for the file on the specified branch + const logs = await git.log({ + fs, + dir: REPO_DIR, + ref: BASE_BRANCH, + filepath: path.join(DOC_POOL_DIR, filename), + depth: 1 // Only the latest commit + }); + + if (logs.length === 0) { + throw new Error(`File "${filename}" exist, but has no related commit history.`); + } + + const latestCommit = logs[0]; + + const commitDate = new Date(latestCommit.commit.committer.timestamp * 1000).toISOString(); + + // Read the file content + const fileContent = fs.readFileSync(filePath, 'utf-8'); + + const knowledgeFile: KnowledgeFile = { + filename: path.basename(filename), + content: fileContent, + commitDate: commitDate + }; + return knowledgeFile; + } catch (error) { + console.error(`Failed to read file ${filename}:`, error); + throw new Error(`File "${filename}" does not exist in document pool.`); + } +}; + +/** + * Handler to retrieve knowledge file content + */ +export async function GET(request: NextRequest) { + const url = new URL(request.url); + const filename = url.searchParams.get('filename'); + if (filename != null) { + try { + const knowledgeFile = await getKnowledgeFiles(filename); + return NextResponse.json({ file: knowledgeFile }, { status: 200 }); + } catch (error) { + console.error(`Failed to retrieve content of the file: ${filename}`, error); + return NextResponse.json({ error: (error as Error).message }, { status: 500 }); + } + } else { + console.error(`File name must be not empty.`); + return NextResponse.json({ error: `File name must be not empty.` }, { status: 500 }); + } +} diff --git a/src/app/api/documents/list/route.ts b/src/app/api/documents/list/route.ts new file mode 100644 index 00000000..2b9cb3cb --- /dev/null +++ b/src/app/api/documents/list/route.ts @@ -0,0 +1,102 @@ +// src/app/api/documents/list/route.ts + +'use server'; +import { NextResponse } from 'next/server'; +import * as git from 'isomorphic-git'; +import fs from 'fs'; +import path from 'path'; +import { DOC_POOL_DIR, findTaxonomyDocRepoPath } from '@/app/api/utils'; +import { KnowledgeFile } from '@/types'; +import { devLog } from '@/utils/devlog'; + +const BASE_BRANCH = 'main'; + +/** + * Function to retrieve list of documents from taxonomy-knowledge-docs document pool. + * @returns An array of document name. + */ +const getKnowledgeFiles = async (): Promise => { + const REPO_DIR = findTaxonomyDocRepoPath(); + const knowledgeFiles: KnowledgeFile[] = []; + + // Ensure the repository path exists + if (!fs.existsSync(REPO_DIR)) { + devLog("Taxonomy knowledge doc directory doesn't exist at :", REPO_DIR); + return knowledgeFiles; + } + + // Check if the branch exists + const branches = await git.listBranches({ fs, dir: REPO_DIR }); + if (!branches.includes(BASE_BRANCH)) { + throw new Error(`Branch "${BASE_BRANCH}" does not exist.`); + } + + // Checkout the specified branch + await git.checkout({ fs, dir: REPO_DIR, ref: BASE_BRANCH }); + + // Read all files in the repository root directory + + const docPoolDir = path.join(REPO_DIR, DOC_POOL_DIR); + + // Ensure the doc-pool directory exist + if (!fs.existsSync(docPoolDir)) { + devLog(`${DOC_POOL_DIR} directory doesn't exist.`); + return knowledgeFiles; + } + + const allFiles = await fs.promises.readdir(docPoolDir, { recursive: true }); + + // Filter for Markdown files only + const markdownFiles = allFiles.filter((file) => path.extname(file).toLowerCase() === '.md'); + + for (const file of markdownFiles) { + const filePath = path.join(docPoolDir, file); + + // Check if the file is a regular file + const stat = fs.statSync(filePath); + if (!stat.isFile()) { + continue; + } + + try { + // Retrieve the latest commit SHA for the file on the specified branch + const logs = await git.log({ + fs, + dir: REPO_DIR, + ref: BASE_BRANCH, + filepath: path.join(DOC_POOL_DIR, file), + depth: 1 // Only the latest commit + }); + + if (logs.length === 0) { + // No commits found for this file; skip it + continue; + } + + const latestCommit = logs[0]; + + const commitDate = new Date(latestCommit.commit.committer.timestamp * 1000).toISOString(); + knowledgeFiles.push({ + filename: path.basename(file), + commitDate: commitDate + }); + } catch (error) { + console.error(`Failed to retrieve commit for file ${file}:`, error); + throw new Error(`Failed to retrieve commit for file: ${error}`); + } + } + return knowledgeFiles; +}; + +/** + * Handler to retrieve list of knowledge file from the taxonomy knowledge doc document pool. + */ +export async function GET() { + try { + const knowledgeFiles = await getKnowledgeFiles(); + return NextResponse.json({ files: knowledgeFiles }, { status: 200 }); + } catch (error) { + console.error('Failed to fetch list of files from document pool:', error); + return NextResponse.json({ error: (error as Error).message }, { status: 500 }); + } +} diff --git a/src/app/api/documents/remove/route.ts b/src/app/api/documents/remove/route.ts new file mode 100644 index 00000000..4b7f25aa --- /dev/null +++ b/src/app/api/documents/remove/route.ts @@ -0,0 +1,62 @@ +// src/app/api/documents/remove/route.ts + +'use server'; +import { NextRequest, NextResponse } from 'next/server'; +import * as git from 'isomorphic-git'; +import fs from 'fs'; +import path from 'path'; +import { DOC_POOL_DIR, findTaxonomyDocRepoPath } from '@/app/api/utils'; +import { devLog } from '@/utils/devlog'; + +/** + * Handler to delete knowledge file from taxonomy knowledge doc repo's document pool + */ +export async function POST(req: NextRequest) { + try { + const body = await req.json(); + const { fileName } = body; + const docsRepoPath = findTaxonomyDocRepoPath(); + + // If the repository was not cloned, return an error + if (!docsRepoPath && docsRepoPath == '') { + console.error('Taxonomy knowledge docs repository is not present on the host'); + return NextResponse.json( + { error: 'Failed to delete the document from document pool. Taxonomy knowledge docs repository is not present.' }, + { status: 500 } + ); + } + + // Checkout the main branch + await git.checkout({ fs, dir: docsRepoPath, ref: 'main' }); + + const newDocsDirPath = path.join(docsRepoPath, DOC_POOL_DIR); + + if (!fs.existsSync(newDocsDirPath)) { + console.error(`Document pool directory doesn't exist: ${docsRepoPath}`); + return NextResponse.json({ error: 'Failed to delete the file from document pool. File does not exists.' }, { status: 500 }); + } + + const filePath = path.join(newDocsDirPath, fileName); + devLog(`Deleting file ${filePath} from document pool.`); + fs.rmSync(filePath); + + // Stage the files + await git.add({ fs, dir: docsRepoPath, filepath: '.' }); + await git.remove({ fs, dir: docsRepoPath, filepath: '.' }); + + // Commit the files + await git.commit({ + fs, + dir: docsRepoPath, + author: { name: 'instructlab-ui', email: 'ui@instructlab.ai' }, + message: `File deleted from document pool: ${fileName}\n\nSigned-off-by: ui@instructlab.ai` + }); + + devLog(`Successfully deleted file ${fileName} from document pool.`); + + return NextResponse.json({ message: `Successfully deleted file ${fileName} from document pool.` }, { status: 201 }); + } catch (error) { + console.error('Failed to upload knowledge files:', error); + return NextResponse.json({ error: 'Failed to upload knowledge files' }, { status: 500 }); + } +} diff --git a/src/app/api/utils.ts b/src/app/api/utils.ts index 6a22be95..2a6a470f 100644 --- a/src/app/api/utils.ts +++ b/src/app/api/utils.ts @@ -4,6 +4,7 @@ import fs from 'fs'; import * as git from 'isomorphic-git'; import http from 'isomorphic-git/http/node'; +export const DOC_POOL_DIR = 'doc-pool'; export const TAXONOMY_DOCS_ROOT_DIR = process.env.NEXT_PUBLIC_TAXONOMY_ROOT_DIR || ''; export const TAXONOMY_DOCS_CONTAINER_MOUNT_DIR = '/tmp/.instructlab-ui'; export const TAXONOMY_KNOWLEDGE_DOCS_REPO_URL = @@ -24,7 +25,7 @@ export const cloneTaxonomyDocsRepo = async (): Promise => { return null; } - const taxonomyDocsDirectoryPath = path.join(remoteTaxonomyRepoDirFinal, '/taxonomy-knowledge-docs'); + const taxonomyDocsDirectoryPath = path.join(path.dirname(remoteTaxonomyRepoDirFinal), '/taxonomy-knowledge-docs'); if (fs.existsSync(taxonomyDocsDirectoryPath)) { console.log(`Using existing taxonomy knowledge docs repository at ${TAXONOMY_DOCS_ROOT_DIR}/taxonomy-knowledge-docs.`); diff --git a/src/components/Contribute/Knowledge/KnowledgeWizard/UploadFile.tsx b/src/components/Contribute/Knowledge/KnowledgeWizard/UploadFile.tsx index c9248bb8..bacace22 100644 --- a/src/components/Contribute/Knowledge/KnowledgeWizard/UploadFile.tsx +++ b/src/components/Contribute/Knowledge/KnowledgeWizard/UploadFile.tsx @@ -302,7 +302,7 @@ export const UploadFile: React.FunctionComponent = ({ existingF {existingFiles.map((file: KnowledgeFile) => { - const fileObject = new File([file.content], file.filename, { type: 'text/plain' }); + const fileObject = new File([file.content ? file.content : ''], file.filename, { type: 'text/plain' }); return ( { - updatedExistingFiles.push({ fileName: file.filename, fileContent: file.content }); + knowledgeFormData.uploadedFiles.map((file: { filename: string; content?: string }) => { + updatedExistingFiles.push({ fileName: file.filename, fileContent: file.content ? file.content : '' }); }); // Trigger the upload only if all the newly uploaded files were read successfully and there are existing uploaded files. if (newFiles.length === knowledgeFormData.filesToUpload.length && (newFiles.length !== 0 || updatedExistingFiles.length !== 0)) { diff --git a/src/types/index.ts b/src/types/index.ts index 3253f99e..88bafe2e 100644 --- a/src/types/index.ts +++ b/src/types/index.ts @@ -142,7 +142,7 @@ export interface SkillSeedExample extends SeedExample { export interface KnowledgeFile { filename: string; - content: string; + content?: string; commitSha?: string; commitDate?: string; }