diff --git a/src/lib/smart-search-plugin.mjs b/src/lib/smart-search-plugin.mjs index 145d15cc..a6ca5fc8 100644 --- a/src/lib/smart-search-plugin.mjs +++ b/src/lib/smart-search-plugin.mjs @@ -4,26 +4,31 @@ import { cwd } from "node:process"; import { htmlToText } from "html-to-text"; function smartSearchPlugin({ endpoint, accessToken }) { + let isPluginExecuted = false; + return { apply: (compiler) => { compiler.hooks.done.tapPromise("SmartSearchPlugin", async () => { + if (isPluginExecuted) { + return; + } + + isPluginExecuted = true; + + if (compiler.options.mode !== "production") { + console.log("Skipping indexing in non-production mode."); + return; + } + try { const pages = await collectPages(path.join(cwd(), "src/pages/docs")); - pages.push({ - id: "test-document", - data: { - title: "Test Document", - content: "This is a test document for indexing.", - path: "/test-path", - }, - }); - console.log("Docs Pages collected for indexing:", pages.length); + await deleteExistingDocs(endpoint, accessToken); await sendPagesToEndpoint(pages, endpoint, accessToken); } catch (error) { - console.error("Error sending pages:", error); + console.error("Error in smartSearchPlugin:", error); } }); }, @@ -32,45 +37,54 @@ function smartSearchPlugin({ endpoint, accessToken }) { async function collectPages(directory) { const pages = []; - const files = await fs.readdir(directory); + const entries = await fs.readdir(directory, { withFileTypes: true }); - for (const file of files) { - const filePath = path.join(directory, file); - const stat = await fs.stat(filePath); + for (const entry of entries) { + const entryPath = path.join(directory, entry.name); - if (stat.isDirectory()) { - const subPages = await collectPages(filePath); + if (entry.isDirectory()) { + const subPages = await collectPages(entryPath); pages.push(...subPages); - } else if (file.endsWith(".mdx")) { - const content = await fs.readFile(filePath, "utf8"); + } else if (entry.isFile() && entry.name.endsWith(".mdx")) { + const content = await fs.readFile(entryPath, "utf8"); - // Safely extract metadata using regex const metadataMatch = content.match( - /export const metadata = (?{[\S\s]+?});/, + /export\s+const\s+metadata\s*=\s*(?{[\S\s]*?});/, ); + let metadata = {}; - if (metadataMatch) { + if ( + metadataMatch && + metadataMatch.groups && + metadataMatch.groups.metadata + ) { try { - // eslint-disable-next-line no-eval - metadata = eval(`(${metadataMatch.groups.metadata})`); // Parse the metadata block + metadata = eval(`(${metadataMatch.groups.metadata})`); } catch (error) { console.error("Error parsing metadata:", error); + continue; } + } else { + console.warn(`No metadata found in ${entryPath}. Skipping.`); + continue; } const textContent = htmlToText(content); - const id = filePath - .replace(cwd(), "") - .replaceAll("/", "-") - .replace(".mdx", ""); + + const cleanedPath = cleanPath(entryPath); + + const id = `mdx:${cleanedPath}`; + + console.log(`Indexing document with ID: ${id}, path: ${cleanedPath}`); pages.push({ id, data: { - title: metadata.title || undefined, // No fallback to "Untitled Document" + title: metadata.title, content: textContent, - path: filePath.replace(cwd(), ""), + path: cleanedPath, + content_type: "mdx_doc", }, }); } @@ -79,13 +93,61 @@ async function collectPages(directory) { return pages; } -const query = ` - mutation CreateIndexDocument($input: DocumentInput!) { - index(input: $input) { - success +function cleanPath(filePath) { + const relativePath = path.relative(cwd(), filePath); + return ( + "/" + + relativePath + .replace(/^src\/pages\//, "") + .replace(/^pages\//, "") + .replace(/\/index\.mdx$/, "") + .replace(/\.mdx$/, "") + ); +} + +async function deleteExistingDocs(endpoint, accessToken) { + const variables = { + filter: { + content_type: "mdx_doc", + }, + }; + + const deleteQuery = ` + mutation DeleteDocs($filter: DocumentFilterInput) { + deleteMany(filter: $filter) { + code + message + success + } + } + `; + + try { + const response = await fetch(endpoint, { + method: "POST", + headers: { + "Content-Type": "application/json", + Authorization: `Bearer ${accessToken}`, + }, + body: JSON.stringify({ query: deleteQuery, variables }), + }); + + const result = await response.json(); + if (result.errors) { + console.error("GraphQL deletion error:", result.errors); + } else { + console.log("Existing MDX documents deleted:", result.data.deleteMany); + } + } catch (error) { + console.error("Error deleting existing documents:", error); + } +} + +const bulkIndexQuery = ` + mutation BulkIndex($documents: [DocumentInput!]!) { + bulkIndex(input: { documents: $documents }) { code - message - document { + documents { id data } @@ -99,40 +161,39 @@ async function sendPagesToEndpoint(pages, endpoint, accessToken) { return; } - for (const page of pages) { - const documentId = `mdx:${page.id}`; - const variables = { - input: { - id: documentId, - data: { - content: page.data.content, - path: page.data.path, - title: page.data.title || undefined, // No fallback to "Untitled Document" - }, + const documents = pages.map((page) => ({ + id: page.id, + data: page.data, + })); + + const variables = { documents }; + + try { + const response = await fetch(endpoint, { + method: "POST", + headers: { + "Content-Type": "application/json", + Authorization: `Bearer ${accessToken}`, }, - }; - - try { - const response = await fetch(endpoint, { - method: "POST", - headers: { - "Content-Type": "application/json", - Authorization: `Bearer ${accessToken}`, - }, - body: JSON.stringify({ query, variables }), - }); + body: JSON.stringify({ query: bulkIndexQuery, variables }), + }); - const result = await response.json(); - if (result.errors) { - console.error("GraphQL indexing error:", result.errors); - } - } catch (error) { + if (!response.ok) { console.error( - "Error indexing document:", - page.data.title || "No title", - error, + `Error during bulk indexing: ${response.status} ${response.statusText}`, ); + return; + } + + const result = await response.json(); + + if (result.errors) { + console.error("GraphQL bulk indexing error:", result.errors); + } else { + console.log(`Indexed ${documents.length} documents successfully.`); } + } catch (error) { + console.error("Error during bulk indexing:", error); } } diff --git a/src/pages/api/search.js b/src/pages/api/search.js index 078e06be..28c6b55f 100644 --- a/src/pages/api/search.js +++ b/src/pages/api/search.js @@ -1,9 +1,14 @@ import process from "node:process"; -// Example input: /src/pages/docs/how-to/authentication/index.mdx -// Example output: /docs/how-to/authentication -const generateDocPath = (filePath) => - filePath.replace(/^\/src\/pages/, "").replace(/\/index\.mdx$/, ""); +function cleanPath(filePath) { + return ( + filePath + .replace(/^\/?src\/pages/, "") + .replace(/^\/?pages/, "") + .replace(/\/index\.mdx$/, "") + .replace(/\.mdx$/, "") || "/" + ); +} export default async function handler(req, res) { const endpoint = process.env.NEXT_PUBLIC_SEARCH_ENDPOINT; @@ -15,16 +20,16 @@ export default async function handler(req, res) { } const graphqlQuery = ` - query FindDocuments($query: String!) { - find(query: $query) { - total - documents { - id - data - } + query FindDocuments($query: String!) { + find(query: $query) { + total + documents { + id + data } } - `; + } + `; try { const response = await fetch(endpoint, { @@ -42,33 +47,59 @@ export default async function handler(req, res) { const result = await response.json(); if (result.errors) { - console.error("Elasticsearch errors:", result.errors); + console.error("Search errors:", result.errors); return res.status(500).json({ errors: result.errors }); } - const formattedResults = result.data.find.documents.map((content) => { - const contentType = content.data.post_type ?? "doc"; + const formattedResults = result.data.find.documents + .map((content) => { + const contentType = content.data.content_type || content.data.post_type; + let item; // Initialize the variable to hold the result + + if (contentType === "mdx_doc" && content.data.title) { + // MDX Document + const path = content.data.path ? cleanPath(content.data.path) : "/"; + + item = { + id: content.id, + title: content.data.title, + path, + type: "mdx_doc", + }; + } else if ( + (contentType === "wp_post" || contentType === "post") && + content.data.post_title && + content.data.post_name + ) { + // WordPress Post + item = { + id: content.id, + title: content.data.post_title, + path: `/blog/${content.data.post_name}`, + type: "post", + }; + } else { + item = undefined; + } + + return item; + }) + .filter((item) => item !== undefined); - if (contentType === "doc") { - return { - id: content.id, - title: content.data.title || "Untitled", - path: content.data.path ? generateDocPath(content.data.path) : "#", - type: contentType, - }; + // Remove duplicates based on ID + const seenIds = new Set(); + const uniqueResults = formattedResults.filter((item) => { + if (seenIds.has(item.id)) { + return false; // Skip if already in the Set } - return { - id: content.id, - title: content.data.post_title || "Untitled", - path: `/blog/${content.data.post_name}`, - type: contentType, - }; + seenIds.add(item.id); // Add new ID to the Set + return true; // Keep this item }); - return res.status(200).json(formattedResults); + return res.status(200).json(uniqueResults); } catch (error) { - console.error("Error fetching MDX data:", error); + console.error("Error fetching search data:", error); return res.status(500).json({ error: error.message }); } }