Skip to content

Commit 10d46da

Browse files
authored
fix: capture tools (#40)
1 parent 73dc8b1 commit 10d46da

File tree

9 files changed

+38
-28
lines changed

9 files changed

+38
-28
lines changed

apps/m2/src/manifest.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"$schema": "https://json.schemastore.org/webextension.json",
33
"manifest_version": 2,
44
"name": "MCP Browser Kit M2",
5-
"version": "5.0.0",
5+
"version": "6.0.0",
66

77
"description": "Allow MCP Browser Kit to interact with the browser.",
88
"permissions": [

apps/m3/src/manifest.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"$schema": "https://json.schemastore.org/chrome-manifest.json",
33
"manifest_version": 3,
44
"name": "MCP Browser Kit M3",
5-
"version": "5.0.0",
5+
"version": "6.0.0",
66
"description": "Allow MCP Browser Kit to interact with this browser.",
77
"permissions": [
88
"tabs",

apps/server/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "@mcp-browser-kit/server",
3-
"version": "5.0.2",
3+
"version": "6.0.0",
44
"packageManager": "yarn@4.8.0",
55
"homepage": "https://github.com/ndthanhdev/mcp-browser-kit",
66
"bin": {

apps/server/src/services/mcp-server/browser-tools.ts

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ export class BrowserTools {
4141
*/
4242
register(server: McpServer): void {
4343
this.registerGetBasicBrowserContext(server);
44-
this.registerCaptureActiveTab(server);
44+
this.registerCaptureTab(server);
4545
this.registerInvokeJsFn(server);
4646
this.registerOpenTab(server);
4747
this.registerCloseTab(server);
@@ -81,20 +81,25 @@ export class BrowserTools {
8181
}
8282

8383
/**
84-
* Registers the captureActiveTab tool
84+
* Registers the captureTab tool
8585
*/
86-
private registerCaptureActiveTab(server: McpServer): void {
87-
this.logger.verbose("Registering tool: captureActiveTab");
86+
private registerCaptureTab(server: McpServer): void {
87+
this.logger.verbose("Registering tool: captureTab");
8888
server.tool(
89-
"captureActiveTab",
90-
this.toolDescriptionsInputPort.captureActiveTabInstruction(),
91-
{},
92-
async () => {
93-
this.logger.info("Executing captureActiveTab");
94-
const overScreenshot = await over(this.toolsInputPort.captureTab);
89+
"captureTab",
90+
this.toolDescriptionsInputPort.captureTabInstruction(),
91+
tabKeySchema,
92+
async ({ tabKey }) => {
93+
this.logger.info("Executing captureTab", {
94+
tabKey,
95+
});
96+
const overScreenshot = await over(() =>
97+
this.toolsInputPort.captureTab(tabKey),
98+
);
9599

96100
if (!overScreenshot.ok) {
97-
this.logger.error("Failed to capture active tab screenshot", {
101+
this.logger.error("Failed to capture tab screenshot", {
102+
tabKey,
98103
reason: overScreenshot.reason,
99104
});
100105
return createErrorResponse(
@@ -105,6 +110,7 @@ export class BrowserTools {
105110

106111
const screenshot = overScreenshot.value;
107112
this.logger.verbose("Screenshot captured", {
113+
tabKey,
108114
width: screenshot.width,
109115
height: screenshot.height,
110116
});

docs/architecture.md

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,6 @@ flowchart TD
5252
end
5353
5454
subgraph ServerDriven["Driven"]
55-
ExtensionDriver
5655
ConfigProvider["ConfigProvider"]
5756
ExtensionChannelProvider
5857
LoggerProvider["LoggerProvider"]
@@ -63,7 +62,6 @@ flowchart TD
6362
ToolDescriptions --> ToolDescriptionUseCases
6463
ManageChannels --> ManageChannelUseCases
6564
%% From Core
66-
ServerToolCallUseCases --"x"--> ExtensionDriver
6765
ServerToolCallUseCases--> ExtensionChannelManager
6866
ManageChannelUseCases --> ExtensionChannelManager
6967
ExtensionChannelManager --> ExtensionChannelProvider

packages/core-server/src/core/tool-descriptions.ts

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,22 +18,23 @@ export class ToolDescriptionsUseCases implements ToolDescriptionsInputPort {
1818
" 1) getBasicBrowserContext → get browser state and tabKey",
1919
" 2) Analyze page content based on your goal and manifest version:",
2020
" - If interaction is required (clicking, filling forms, etc.):",
21-
" · For Manifest Version 2: Use captureActiveTab for visual context or getReadableElements for element identification",
21+
" · For Manifest Version 2: Use captureTab for visual context or getReadableElements for element identification",
2222
" · For other Manifest Versions: Use only getReadableElements for element identification",
2323
" - If no interaction is required (just reading page content):",
2424
" · Use getReadableText to extract all visible text from the page",
2525
" 3) Interact using click/fill/enter tools with the obtained tabKey",
2626
].join("\n");
2727
};
2828

29-
captureActiveTabInstruction = (): string => {
29+
captureTabInstruction = (): string => {
3030
return [
31-
"📷 Captures a screenshot of the active browser tab",
32-
"* Use this tool after calling getBasicBrowserContext to obtain visual context of the current page",
31+
"📷 Captures a screenshot of a browser tab",
32+
"* Use this tool after calling getBasicBrowserContext to obtain visual context of the page",
3333
"* The screenshot helps you see what the browser is displaying to the user",
34-
"* No parameters are needed as it automatically captures the active tab",
34+
"* Requires tabKey from getBasicBrowserContext",
3535
"* Returns an image with width, height, and data in base64 format",
36-
"* Workflow: 1) getBasicBrowserContext → 2) captureActiveTab → 3) interact with elements",
36+
"* Workflow: 1) getBasicBrowserContext → 2) captureTab → 3) interact with elements",
37+
"* Parameters: tabKey",
3738
"* NOTE: This feature is only available in browsers supporting Manifest Version 2",
3839
].join("\n");
3940
};
@@ -65,7 +66,7 @@ export class ToolDescriptionsUseCases implements ToolDescriptionsInputPort {
6566
"👆 Clicks on an element at specific X,Y coordinates",
6667
"* Use this to click on elements by their position on the screen",
6768
"* Requires tabKey from getBasicBrowserContext and x,y coordinates from the screenshot",
68-
"* Coordinates are based on the captureActiveTab screenshot dimensions",
69+
"* Coordinates are based on the captureTab screenshot dimensions",
6970
"* Useful when you know the visual position of an element",
7071
"* Parameters: tabKey, x, y",
7172
].join("\n");
@@ -76,7 +77,7 @@ export class ToolDescriptionsUseCases implements ToolDescriptionsInputPort {
7677
"⌨️ Types text into an input field at specific X,Y coordinates",
7778
"* Use this to enter text into form fields by their position",
7879
"* Requires tabKey from getBasicBrowserContext, x,y coordinates, and the text to enter",
79-
"* Coordinates are based on the captureActiveTab screenshot dimensions",
80+
"* Coordinates are based on the captureTab screenshot dimensions",
8081
"* First clicks at the specified position, then types the provided text",
8182
"* After filling text, check for associated submit-like buttons (submit, search, send, etc.)",
8283
"* If submit button is visible, use clickOnViewableElement with that button",
@@ -90,7 +91,7 @@ export class ToolDescriptionsUseCases implements ToolDescriptionsInputPort {
9091
"↵ Hits the Enter key on an element at specific X,Y coordinates",
9192
"* Use this to trigger actions like form submission or button clicks",
9293
"* Requires tabKey from getBasicBrowserContext and x,y coordinates from the screenshot",
93-
"* Coordinates are based on the captureActiveTab screenshot dimensions",
94+
"* Coordinates are based on the captureTab screenshot dimensions",
9495
"* Parameters: tabKey, x, y",
9596
].join("\n");
9697
};

packages/core-server/src/input-ports/tool-descriptions.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@ export interface ToolDescriptionsInputPort {
55
// GetTabs
66
getBasicBrowserContextInstruction(): string;
77

8-
// CaptureActiveTab
9-
captureActiveTabInstruction(): string;
8+
// CaptureTab
9+
captureTabInstruction(): string;
1010

1111
// GetReadableText
1212
getReadableTextInstruction(): string;

packages/server-driven-trpc-channel-provider/src/routers/defer.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,9 @@ export const createDeferRouter = (container: Container) => {
6565
signal.onabort = () => {
6666
logger.info(`Subscription aborted for channel: ${channelId}`);
6767
stopped = true;
68-
defer.reject(new Error("Client closed subscription"));
6968
unsubscribe();
69+
defer.reject(new Error("Client closed subscription"));
70+
extensionChannelProvider.closeChannel(channelId);
7071
};
7172
}
7273
while (!stopped) {

packages/server-driven-trpc-channel-provider/src/services/server-driven-trpc-channel-provider.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,10 @@ export class ServerDrivenTrpcChannelProvider
5757
return this.baseExtensionChannelProvider.openChannel(id);
5858
};
5959

60+
closeChannel = (id: string) => {
61+
return this.baseExtensionChannelProvider.closeChannel(id);
62+
};
63+
6064
public async start() {
6165
this.logger.verbose("Starting HTTP Server");
6266
const httpServer = this.createHttpServer();

0 commit comments

Comments
 (0)