Skip to content

Commit b2d5e6e

Browse files
committed
refactor(@angular/cli): add logging and HTML removal to doc search tool
This commit enhances the `search_documentation` MCP tool by improving its error handling and the quality of the data it returns. The key changes are: - **Error Logging:** The content fetching logic now logs a warning if it fails to retrieve or parse the documentation page, providing better visibility for debugging without crashing the tool. - **HTML Removal:** The fetched HTML content now has all tags removed, providing the AI with clean, plain-text content. This reduces noise, lowers the token count, and improves the quality of the input for the language model. (cherry picked from commit bd1aa7b)
1 parent 5c2abff commit b2d5e6e

File tree

1 file changed

+27
-9
lines changed

1 file changed

+27
-9
lines changed

packages/angular/cli/src/commands/mcp/tools/doc-search.ts

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ import type { LegacySearchMethodProps, SearchResponse } from 'algoliasearch';
1010
import { createDecipheriv } from 'node:crypto';
1111
import { z } from 'zod';
1212
import { at, iv, k1 } from '../constants';
13-
import { declareTool } from './tool-registry';
13+
import { McpToolContext, declareTool } from './tool-registry';
1414

1515
const ALGOLIA_APP_ID = 'L1XWT2UJ7F';
1616
// https://www.algolia.com/doc/guides/security/api-keys/#search-only-api-key
@@ -84,7 +84,7 @@ tutorials, concepts, and best practices.
8484
factory: createDocSearchHandler,
8585
});
8686

87-
function createDocSearchHandler() {
87+
function createDocSearchHandler({ logger }: McpToolContext) {
8888
let client: import('algoliasearch').SearchClient | undefined;
8989

9090
return async ({ query, includeTopContent }: DocSearchInput) => {
@@ -124,21 +124,23 @@ function createDocSearchHandler() {
124124
const { title: topTitle, breadcrumb: topBreadcrumb } = formatHitToParts(topHit);
125125
let topContent: string | undefined;
126126

127-
try {
128-
if (includeTopContent && typeof topHit.url === 'string') {
129-
const url = new URL(topHit.url);
130-
127+
if (includeTopContent && typeof topHit.url === 'string') {
128+
const url = new URL(topHit.url);
129+
try {
131130
// Only fetch content from angular.dev
132131
if (url.hostname === 'angular.dev' || url.hostname.endsWith('.angular.dev')) {
133132
const response = await fetch(url);
134133
if (response.ok) {
135134
const html = await response.text();
136-
topContent = extractMainContent(html);
135+
const mainContent = extractMainContent(html);
136+
if (mainContent) {
137+
topContent = stripHtml(mainContent);
138+
}
137139
}
138140
}
141+
} catch (e) {
142+
logger.warn(`Failed to fetch or parse content from ${url}: ${e}`);
139143
}
140-
} catch {
141-
// Ignore errors fetching content
142144
}
143145

144146
structuredResults.push({
@@ -175,6 +177,22 @@ function createDocSearchHandler() {
175177
};
176178
}
177179

180+
/**
181+
* Strips HTML tags from a string.
182+
* @param html The HTML string to strip.
183+
* @returns The text content of the HTML.
184+
*/
185+
function stripHtml(html: string): string {
186+
// This is a basic regex to remove HTML tags.
187+
// It also decodes common HTML entities.
188+
return html
189+
.replace(/<[^>]*>/g, '')
190+
.replace(/&lt;/g, '<')
191+
.replace(/&gt;/g, '>')
192+
.replace(/&amp;/g, '&')
193+
.trim();
194+
}
195+
178196
/**
179197
* Extracts the content of the `<main>` element from an HTML string.
180198
*

0 commit comments

Comments
 (0)