Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
189 changes: 125 additions & 64 deletions src/lib/smart-search-plugin.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -4,26 +4,31 @@
import { htmlToText } from "html-to-text";

function smartSearchPlugin({ endpoint, accessToken }) {
let isPluginExecuted = false;

return {
apply: (compiler) => {
compiler.hooks.done.tapPromise("SmartSearchPlugin", async () => {
if (isPluginExecuted) {
return;
}

isPluginExecuted = true;

if (compiler.options.mode !== "production") {
console.log("Skipping indexing in non-production mode.");
return;
}

try {
const pages = await collectPages(path.join(cwd(), "src/pages/docs"));

pages.push({
id: "test-document",
data: {
title: "Test Document",
content: "This is a test document for indexing.",
path: "/test-path",
},
});

console.log("Docs Pages collected for indexing:", pages.length);

await deleteExistingDocs(endpoint, accessToken);
await sendPagesToEndpoint(pages, endpoint, accessToken);
} catch (error) {
console.error("Error sending pages:", error);
console.error("Error in smartSearchPlugin:", error);
}
});
},
Expand All @@ -32,45 +37,54 @@

async function collectPages(directory) {
const pages = [];
const files = await fs.readdir(directory);
const entries = await fs.readdir(directory, { withFileTypes: true });

for (const file of files) {
const filePath = path.join(directory, file);
const stat = await fs.stat(filePath);
for (const entry of entries) {
const entryPath = path.join(directory, entry.name);

if (stat.isDirectory()) {
const subPages = await collectPages(filePath);
if (entry.isDirectory()) {
const subPages = await collectPages(entryPath);
pages.push(...subPages);
} else if (file.endsWith(".mdx")) {
const content = await fs.readFile(filePath, "utf8");
} else if (entry.isFile() && entry.name.endsWith(".mdx")) {
const content = await fs.readFile(entryPath, "utf8");

// Safely extract metadata using regex
const metadataMatch = content.match(
/export const metadata = (?<metadata>{[\S\s]+?});/,
/export\s+const\s+metadata\s*=\s*(?<metadata>{[\S\s]*?});/,
);

let metadata = {};

if (metadataMatch) {
if (
metadataMatch &&
metadataMatch.groups &&
metadataMatch.groups.metadata
) {
try {
// eslint-disable-next-line no-eval
metadata = eval(`(${metadataMatch.groups.metadata})`); // Parse the metadata block
metadata = eval(`(${metadataMatch.groups.metadata})`);

Check failure on line 63 in src/lib/smart-search-plugin.mjs

View workflow job for this annotation

GitHub Actions / Validate linting with ES Lint

eval can be harmful
} catch (error) {
console.error("Error parsing metadata:", error);
continue;
}
} else {
console.warn(`No metadata found in ${entryPath}. Skipping.`);
continue;
}

const textContent = htmlToText(content);
const id = filePath
.replace(cwd(), "")
.replaceAll("/", "-")
.replace(".mdx", "");

const cleanedPath = cleanPath(entryPath);

const id = `mdx:${cleanedPath}`;

console.log(`Indexing document with ID: ${id}, path: ${cleanedPath}`);

pages.push({
id,
data: {
title: metadata.title || undefined, // No fallback to "Untitled Document"
title: metadata.title,
content: textContent,
path: filePath.replace(cwd(), ""),
path: cleanedPath,
content_type: "mdx_doc",
},
});
}
Expand All @@ -79,13 +93,61 @@
return pages;
}

const query = `
mutation CreateIndexDocument($input: DocumentInput!) {
index(input: $input) {
success
function cleanPath(filePath) {
const relativePath = path.relative(cwd(), filePath);
return (
"/" +
relativePath
.replace(/^src\/pages\//, "")
.replace(/^pages\//, "")
.replace(/\/index\.mdx$/, "")
.replace(/\.mdx$/, "")
);
}

async function deleteExistingDocs(endpoint, accessToken) {
const variables = {
filter: {
content_type: "mdx_doc",
},
};

const deleteQuery = `
mutation DeleteDocs($filter: DocumentFilterInput) {
deleteMany(filter: $filter) {
code
message
success
}
}
`;

try {
const response = await fetch(endpoint, {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${accessToken}`,
},
body: JSON.stringify({ query: deleteQuery, variables }),
});

const result = await response.json();
if (result.errors) {
console.error("GraphQL deletion error:", result.errors);
} else {
console.log("Existing MDX documents deleted:", result.data.deleteMany);
}
} catch (error) {
console.error("Error deleting existing documents:", error);
}
}

const bulkIndexQuery = `
mutation BulkIndex($documents: [DocumentInput!]!) {
bulkIndex(input: { documents: $documents }) {
code
message
document {
documents {
id
data
}
Expand All @@ -99,40 +161,39 @@
return;
}

for (const page of pages) {
const documentId = `mdx:${page.id}`;
const variables = {
input: {
id: documentId,
data: {
content: page.data.content,
path: page.data.path,
title: page.data.title || undefined, // No fallback to "Untitled Document"
},
const documents = pages.map((page) => ({
id: page.id,
data: page.data,
}));

const variables = { documents };

try {
const response = await fetch(endpoint, {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${accessToken}`,
},
};

try {
const response = await fetch(endpoint, {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${accessToken}`,
},
body: JSON.stringify({ query, variables }),
});
body: JSON.stringify({ query: bulkIndexQuery, variables }),
});

const result = await response.json();
if (result.errors) {
console.error("GraphQL indexing error:", result.errors);
}
} catch (error) {
if (!response.ok) {
console.error(
"Error indexing document:",
page.data.title || "No title",
error,
`Error during bulk indexing: ${response.status} ${response.statusText}`,
);
return;
}

const result = await response.json();

if (result.errors) {
console.error("GraphQL bulk indexing error:", result.errors);
} else {
console.log(`Indexed ${documents.length} documents successfully.`);
}
} catch (error) {
console.error("Error during bulk indexing:", error);
}
}

Expand Down
91 changes: 61 additions & 30 deletions src/pages/api/search.js
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
import process from "node:process";

// Example input: /src/pages/docs/how-to/authentication/index.mdx
// Example output: /docs/how-to/authentication
const generateDocPath = (filePath) =>
filePath.replace(/^\/src\/pages/, "").replace(/\/index\.mdx$/, "");
function cleanPath(filePath) {
return (
filePath
.replace(/^\/?src\/pages/, "")
.replace(/^\/?pages/, "")
.replace(/\/index\.mdx$/, "")
.replace(/\.mdx$/, "") || "/"
);
}

export default async function handler(req, res) {
const endpoint = process.env.NEXT_PUBLIC_SEARCH_ENDPOINT;
Expand All @@ -15,16 +20,16 @@ export default async function handler(req, res) {
}

const graphqlQuery = `
query FindDocuments($query: String!) {
find(query: $query) {
total
documents {
id
data
}
query FindDocuments($query: String!) {
find(query: $query) {
total
documents {
id
data
}
}
`;
}
`;

try {
const response = await fetch(endpoint, {
Expand All @@ -42,33 +47,59 @@ export default async function handler(req, res) {
const result = await response.json();

if (result.errors) {
console.error("Elasticsearch errors:", result.errors);
console.error("Search errors:", result.errors);
return res.status(500).json({ errors: result.errors });
}

const formattedResults = result.data.find.documents.map((content) => {
const contentType = content.data.post_type ?? "doc";
const formattedResults = result.data.find.documents
.map((content) => {
const contentType = content.data.content_type || content.data.post_type;
let item; // Initialize the variable to hold the result

if (contentType === "mdx_doc" && content.data.title) {
// MDX Document
const path = content.data.path ? cleanPath(content.data.path) : "/";

item = {
id: content.id,
title: content.data.title,
path,
type: "mdx_doc",
};
} else if (
(contentType === "wp_post" || contentType === "post") &&
content.data.post_title &&
content.data.post_name
) {
// WordPress Post
item = {
id: content.id,
title: content.data.post_title,
path: `/blog/${content.data.post_name}`,
type: "post",
};
} else {
item = undefined;
}

return item;
})
.filter((item) => item !== undefined);

if (contentType === "doc") {
return {
id: content.id,
title: content.data.title || "Untitled",
path: content.data.path ? generateDocPath(content.data.path) : "#",
type: contentType,
};
// Remove duplicates based on ID
const seenIds = new Set();
const uniqueResults = formattedResults.filter((item) => {
if (seenIds.has(item.id)) {
return false; // Skip if already in the Set
}

return {
id: content.id,
title: content.data.post_title || "Untitled",
path: `/blog/${content.data.post_name}`,
type: contentType,
};
seenIds.add(item.id); // Add new ID to the Set
return true; // Keep this item
});

return res.status(200).json(formattedResults);
return res.status(200).json(uniqueResults);
} catch (error) {
console.error("Error fetching MDX data:", error);
console.error("Error fetching search data:", error);
return res.status(500).json({ error: error.message });
}
}
Loading