Skip to content

Commit d931a9c

Browse files
committed
rename page to chunk
1 parent 9eec452 commit d931a9c

File tree

1 file changed

+11
-11
lines changed

1 file changed

+11
-11
lines changed

src/tools/get-html-skeleton.ts

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -30,18 +30,18 @@ const getHtmlSkeletonArgs = z.object({
3030
.optional()
3131
.default(false)
3232
.describe('Whether to enable JavaScript rendering. Enabling this may increase the time taken to retrieve the HTML skeleton.'),
33-
page: z.number()
33+
chunk: z.number()
3434
.optional()
3535
.default(1)
36-
.describe('Page number to retrieve when paginating through content. Used to prevent exceeding the maximum tool output length.'),
36+
.describe('Chunk number to retrieve when getting the content. The content is split into chunks to prevent exceeding the maximum tool output length.'),
3737
});
3838

3939
export const getHtmlSkeleton: ToolEntry = {
4040
type: 'internal',
4141
tool: {
4242
name: HelperTools.GET_HTML_SKELETON,
4343
actorFullName: HelperTools.GET_HTML_SKELETON,
44-
description: `Retrieves the HTML skeleton (clean structure) from a given URL by stripping unwanted elements like scripts, styles, and non-essential attributes. This tool keeps only the core HTML structure, links, images, and data attributes for analysis. Supports optional JavaScript rendering for dynamic content and provides paginated output to handle large pages. This tool is useful for building web scrapers and data extraction tasks where a clean HTML structure is needed for writing concrete selectors or parsers.`,
44+
description: `Retrieves the HTML skeleton (clean structure) from a given URL by stripping unwanted elements like scripts, styles, and non-essential attributes. This tool keeps only the core HTML structure, links, images, and data attributes for analysis. Supports optional JavaScript rendering for dynamic content and provides chunked output to handle large HTML. This tool is useful for building web scrapers and data extraction tasks where a clean HTML structure is needed for writing concrete selectors or parsers.`,
4545
inputSchema: zodToJsonSchema(getHtmlSkeletonArgs),
4646
ajvValidate: ajv.compile(zodToJsonSchema(getHtmlSkeletonArgs)),
4747
call: async (toolArgs) => {
@@ -86,16 +86,16 @@ export const getHtmlSkeleton: ToolEntry = {
8686

8787
// Pagination logic
8888
const totalLength = strippedHtml.length;
89-
const pageSize = TOOL_MAX_OUTPUT_CHARS;
90-
const totalPages = Math.ceil(totalLength / pageSize);
91-
const startIndex = (parsed.page - 1) * pageSize;
92-
const endIndex = Math.min(startIndex + pageSize, totalLength);
93-
const pageContent = strippedHtml.slice(startIndex, endIndex);
94-
const hasNextPage = parsed.page < totalPages;
89+
const chunkSize = TOOL_MAX_OUTPUT_CHARS;
90+
const totalChunks = Math.ceil(totalLength / chunkSize);
91+
const startIndex = (parsed.chunk - 1) * chunkSize;
92+
const endIndex = Math.min(startIndex + chunkSize, totalLength);
93+
const chunkContent = strippedHtml.slice(startIndex, endIndex);
94+
const hasNextChunk = parsed.chunk < totalChunks;
9595

96-
const paginationInfo = `\n\n--- Page ${parsed.page} of ${totalPages} ---\n${hasNextPage ? `Next page: ${parsed.page + 1}` : 'End of content'}`;
96+
const chunkInfo = `\n\n--- Chunk ${parsed.chunk} of ${totalChunks} ---\n${hasNextChunk ? `Next chunk: ${parsed.chunk + 1}` : 'End of content'}`;
9797

98-
return buildMCPResponse([pageContent + paginationInfo]);
98+
return buildMCPResponse([chunkContent + chunkInfo]);
9999
},
100100
} as InternalTool,
101101
};

0 commit comments

Comments
 (0)