watercrawl
diff --git a/‎README.md‎
Lines changed: 1 addition & 17 deletions b/‎README.md‎
Lines changed: 1 addition & 17 deletions
diff --git a/‎package-lock.json‎
Lines changed: 914 additions & 865 deletions b/‎package-lock.json‎
Lines changed: 914 additions & 865 deletions
diff --git a/‎package.json‎
Lines changed: 3 additions & 3 deletions b/‎package.json‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎src/tools/crawl-manager.ts‎
Lines changed: 23 additions & 9 deletions b/‎src/tools/crawl-manager.ts‎
Lines changed: 23 additions & 9 deletions
diff --git a/‎src/tools/crawl.ts‎
Lines changed: 17 additions & 5 deletions b/‎src/tools/crawl.ts‎
Lines changed: 17 additions & 5 deletions
diff --git a/‎src/tools/index.ts‎
Lines changed: 1 addition & 5 deletions b/‎src/tools/index.ts‎
Lines changed: 1 addition & 5 deletions
diff --git a/‎src/tools/monitor.ts‎
Lines changed: 0 additions & 105 deletions b/‎src/tools/monitor.ts‎
Lines changed: 0 additions & 105 deletions
diff --git a/‎src/tools/scrape.ts‎
Lines changed: 21 additions & 15 deletions b/‎src/tools/scrape.ts‎
Lines changed: 21 additions & 15 deletions
diff --git a/‎src/tools/search-manager.ts‎
Lines changed: 0 additions & 63 deletions b/‎src/tools/search-manager.ts‎
Lines changed: 0 additions & 63 deletions
@@ -92,25 +92,9 @@ npm run build
 3. Link the package for local development:
 
 ```bash
-npm run dev:link
+npm link @watercrawl/mcp
 ```
 
-### Testing
-
-The project includes tests for both SSE and npx modes:
-
-```bash
-# Run all tests
-npm test
-
-# Run only SSE tests
-npm run test:sse
-
-# Run only npx tests
-npm run test:npx
-```
-
-Tests require a valid WaterCrawl API key to be set in the `.env` file or passed as an environment variable.
 
 ### Contribution Guidelines
 
 
@@ -9,8 +9,8 @@
   "scripts": {
     "clean": "rm -rf dist",
     "build": "npm run clean && node scripts/build.js",
-    "start": "tsx --require tsconfig-paths/register index.ts",
-    "cli": "tsx --require tsconfig-paths/register cli.ts",
+    "start": "tsx --require tsconfig-paths/register src/index.ts",
+    "cli": "tsx --require tsconfig-paths/register src/cli.ts",
     "fix-paths": "tsc-alias",
     "lint": "eslint . --ext .ts",
     "lint:fix": "eslint --fix . --ext .ts",
@@ -36,7 +36,7 @@
     "access": "public"
   },
   "dependencies": {
-    "@watercrawl/nodejs": "^1.1.0",
+    "@watercrawl/nodejs": "^1.2.1",
     "commander": "^13.1.0",
     "dotenv": "^16.5.0",
     "fastmcp": "^1.23.2",
 
@@ -3,7 +3,7 @@ import { Context, ToolParameters, UserError, Tool } from 'fastmcp';
 import { getClient } from '@utils/client';
 
 interface CrawlManagerArgs {
-  action: 'list' | 'get' | 'stop' | 'download';
+  action: 'list' | 'get' | 'get_results' | 'stop';
   crawlRequestId?: string;
   page?: number;
   pageSize?: number;
@@ -22,18 +22,24 @@ const manageCrawl = async (args: CrawlManagerArgs | any, { session }: Context<an
         }
         const getResult = await client.getCrawlRequest(args.crawlRequestId);
         return JSON.stringify(getResult);
+      case 'get_results':
+        if (!args.crawlRequestId) {
+          throw new UserError("crawlRequestId is required for 'get_results' action");
+        }
+
+        const results = await client.getCrawlRequestResults(
+          args.crawlRequestId,
+          args.page || 1,
+          args.pageSize || 10,
+          args.download !== false,
+        );
+        return JSON.stringify(results);
       case 'stop':
         if (!args.crawlRequestId) {
           throw new UserError("crawlRequestId is required for 'stop' action");
         }
         await client.stopCrawlRequest(args.crawlRequestId);
         return JSON.stringify({ success: true, message: 'Crawl request stopped successfully' });
-      case 'download':
-        if (!args.crawlRequestId) {
-          throw new UserError("crawlRequestId is required for 'download' action");
-        }
-        const downloadResult = await client.downloadCrawlRequest(args.crawlRequestId);
-        return JSON.stringify(downloadResult);
       default:
         throw new UserError(`Unknown action: ${args.action}`);
     }
@@ -50,8 +56,16 @@ const parameters = z.object({
     .string()
     .optional()
     .describe('UUID of the crawl request (required for get, stop, and download actions)'),
-  page: z.number().optional().default(1).describe('Page number for listing (1-indexed)'),
-  pageSize: z.number().optional().default(10).describe('Number of items per page for listing'),
+  page: z
+    .number()
+    .optional()
+    .default(1)
+    .describe('Page number for listing (1-indexed), can use for get_results and list actions'),
+  pageSize: z
+    .number()
+    .optional()
+    .default(10)
+    .describe('Number of items per page for listing, can use for get_results and list actions'),
 });
 
 export const CrawlManagerTool: Tool<any, ToolParameters> = {
 
@@ -29,10 +29,21 @@ const parameters = z.object({
     .object({
       max_depth: z.number().optional().describe('Maximum depth to crawl'),
       page_limit: z.number().optional().describe('Maximum number of pages to crawl'),
-      allowed_domains: z.string().array().optional().describe('Allowed domains to crawl example: ["*.example.com"]'),
-      exclude_paths: z.string().array().optional().describe('Paths to exclude from crawling example: ["/path/*"]'),
-      include_paths: z.string().array().optional().describe('Paths to include in crawling example: ["/path/*"]'),
-
+      allowed_domains: z
+        .string()
+        .array()
+        .optional()
+        .describe('Allowed domains to crawl example: ["*.example.com"]'),
+      exclude_paths: z
+        .string()
+        .array()
+        .optional()
+        .describe('Paths to exclude from crawling example: ["/path/*"]'),
+      include_paths: z
+        .string()
+        .array()
+        .optional()
+        .describe('Paths to include in crawling example: ["/path/*"]'),
     })
     .optional()
     .describe('Spider options'),
@@ -66,7 +77,8 @@ const parameters = z.object({
 
 export const CrawlTool: Tool<any, ToolParameters> = {
   name: 'crawl',
-  description: 'Crawl a URL and its subpages with customizable depth and spider limitations. This is an async operation, with crawl manager you can get status and results.',
+  description:
+    'Crawl a URL and its subpages with customizable depth and spider limitations. This is an async operation, with crawl manager you can get status and results.',
   parameters: parameters,
   execute: crawlUrl,
 };
@@ -3,16 +3,12 @@ import { Tool, ToolParameters } from 'fastmcp';
 import { SearchTool } from './search';
 import { SitemapTool } from './sitemap';
 import { CrawlManagerTool } from './crawl-manager';
-import { SearchManagerTool } from './search-manager';
-import { MonitorTool } from './monitor';
 import { CrawlTool } from './crawl';
 
 export const tools: Tool<any, ToolParameters>[] = [
   ScrapeTool,
   SearchTool,
   SitemapTool,
-  CrawlManagerTool,
-  SearchManagerTool,
-  MonitorTool,
   CrawlTool,
+  CrawlManagerTool,
 ];
@@ -1,33 +1,38 @@
 import { z } from 'zod';
 import { Context, ToolParameters, UserError, Tool } from 'fastmcp';
 import { getClient } from '@utils/client';
-import type { PageOptions } from '@watercrawl/nodejs/dist/types';
+import type { CrawlRequest, PageOptions } from '@watercrawl/nodejs/dist/types';
 
 interface ScrapeArgs {
-  url: string;
+  urls: string[];
   pageOptions?: PageOptions;
-  sync?: boolean;
-  download?: boolean;
 }
 
 const scrapeUrl = async (args: ScrapeArgs | any, { session }: Context<any>) => {
   const client = getClient(session?.apiKey);
   try {
-    const req = await client.scrapeUrl(
-      args.url,
-      args.pageOptions || {},
-      {},
-      args.sync === false ? false : true,
-      args.download === false ? false : true,
-    );
-    return JSON.stringify(req);
+    const req = await client.createBatchCrawlRequest(args.urls, {}, args.pageOptions || {});
+    const results = [];
+    for await (const data of client.monitorCrawlRequest(req.uuid, true)) {
+      if (data.type === 'result') {
+        results.push(data.data);
+      }
+      if (data.type === 'state' && (data.data as CrawlRequest).status === 'finished') {
+        break;
+      }
+    }
+
+    return JSON.stringify({
+      ...req,
+      results,
+    });
   } catch (e) {
     throw new UserError(String(e));
   }
 };
 
 const parameters = z.object({
-  url: z.string().describe('URL to scrape'),
+  urls: z.string().array().describe('List of URLs to scrape'),
   pageOptions: z
     .object({
       exclude_tags: z.string().array().optional().describe('HTML tags to exclude'),
@@ -59,8 +64,9 @@ const parameters = z.object({
 });
 
 export const ScrapeTool: Tool<any, ToolParameters> = {
-  name: 'scrape-url',
-  description: 'Scrape a URL with optional configuration for page options, and more',
+  name: 'scrape-urls',
+  description:
+    'Scrape multiple(or single) URL(s) with optional configuration for page options, and more',
   parameters: parameters,
   execute: scrapeUrl,
 };