chore: update readme.md

BatLeDev · BatLeDev · commit c6da5b722329 · 2025-08-12T11:42:07.000+02:00
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -1,22 +1,24 @@
 # Contribution guidelines
 
-## Development environment
+## Setup
 
-Switch to the appropriate nodejs version:
+Switch to the appropriate Node.js version:
 
     nvm use
 
 Install dependencies:
 
     npm install
 
-Run in development mode and stdio transport:
+## Development
 
-    npx @modelcontextprotocol/inspector -e DATA_FAIR_URL=https://koumoul.com/data-fair npm run dev
+Available scripts in `package.json`:
 
-## Docker image
+- `npm run dev-stdio`: run the MCP server in stdio transport (requires building the Docker image)
+- `npm run dev-http`: run the MCP server in HTTP transport
+- `npm run dev-inspector`: launch the Inspector (uses `dev/resources/inspector.json`)
+- `npm run dev-zellij`: launch both Inspector and MCP in HTTP mode using Zellij
 
-Test building and running the docker image:
+To build the Docker image locally:
 
-    docker build -t mcp-dev .
-    npx @modelcontextprotocol/inspector docker run -i --rm -e "DATA_FAIR_URL=https://koumoul.com/data-fair" mcp-dev
+    npm run build-image
diff --git a/README.md b/README.md
@@ -1,2 +1,10 @@
 # mcp
-MCP servers to help AI agents interact with the data-fair APIs
+
+Model Context Protocol (MCP) servers to allow AI agents to interact with the Data Fair ecosystem.
+
+## Environment Variables
+
+- `DATA_FAIR_URL`: URL of the data-fair portal (e.g., `https://opendata.koumoul.com` or `https://data.ademe.fr`)
+- `OBSERVER_ACTIVE`: enable observer (true/false)
+- `PORT`: port for the server to listen on
+- `TRANSPORT`: transport mode (`stdio` or `http`)
diff --git a/package.json b/package.json
@@ -5,10 +5,11 @@
   "type": "module",
   "main": "index.ts",
   "scripts": {
+    "build-image": "docker build -t mcp-dev .",
     "build-types": "df-build-types .",
     "dev-deps": "docker compose up -d --wait",
-    "dev-stdio": "TRANSPORT=stdio node --watch index.ts",
     "dev-http": "DEBUG=datasets-tools TRANSPORT=http node --watch index.ts",
+    "dev-stdio": "TRANSPORT=stdio node --watch index.ts",
     "dev-inspector": "npx @modelcontextprotocol/inspector --config dev/resources/inspector.json",
     "dev-zellij": "zellij --layout .zellij.kdl",
     "lint": "eslint .",
diff --git a/src/mcp-servers/datasets/tools.ts b/src/mcp-servers/datasets/tools.ts
@@ -229,11 +229,11 @@ const registerTools = (server: McpServer) => {
     'search_data',
     {
       title: 'Search data from a dataset',
-      description: 'Search for data rows in a specific dataset using either :\n- Full-text search across all columns (query) for quick, broad matches,\n- Precise filtering (filters) to apply exact conditions, comparisons, or column-specific searches.\nUse filters whenever your question involves multiple criteria or numerical/date ranges, as they yield more relevant and targeted results. The query parameter is better suited for simple, one-keyword searches across the entire dataset. Returns matching rows with relevance scores and a direct link to view filtered results in the dataset table interface. Always include dataset license, direct link and source information when presenting results to users. Use describe_dataset first to understand the data structure and available column keys.',
+      description: 'Search for data rows in a specific dataset using either :\n- Full-text search across all columns (query) for quick, broad matches,\n- Precise filtering (filters) to apply exact conditions, comparisons, or column-specific searches.\nUse filters whenever your question involves multiple criteria or numerical/date ranges, as they yield more relevant and targeted results. The query parameter is better suited for simple, one-keyword searches across the entire dataset. Returns matching rows with relevance scores and some metadata. ALWAYS include both the filtered view link AND the dataset source with license information when presenting results to users. Use describe_dataset first to understand the data structure and available column keys.',
       inputSchema: {
         datasetId: z.string().describe('The unique dataset ID obtained from search_datasets tool'),
         query: z.string().optional().describe('French keywords for full-text search across all dataset columns (simple keywords, not sentences). Do not use with filters parameter. Examples: "Jean Dupont", "Paris", "2025"'),
-        select: z.string().optional().describe('Optional comma-separated list of specific column keys to include in the results. Useful when the dataset has many columns to reduce output size. If not provided, all columns are returned. Use column keys from describe_dataset. Example: "nom,age,ville"'),
+        select: z.string().optional().describe('Optional comma-separated list of column keys to include in the results. Useful when the dataset has many columns to reduce output size. If not provided, all columns are returned. Use column keys from describe_dataset. Format: column1,column2,column3 (NO SPACES after commas). eExample: "nom,age,ville"'),
         filters: z.record(
           z.string().regex(/^.+_(search|eq|gte|lte)$/, {
             message: 'Filter key must follow pattern: column_key + suffix (_eq, _search, _gte, _lte)'
@@ -246,7 +246,7 @@ const registerTools = (server: McpServer) => {
       outputSchema: {
         totalCount: z.number().describe('Total number of data rows matching the search criteria and filters'),
         datasetId: z.string().describe('The dataset ID that was searched'),
-        sourceUrl: z.string().describe('Direct URL to view the filtered dataset results in table format (must be included in responses for citation and direct access to filtered view)'),
+        filteredViewUrl: z.string().describe('Direct URL to view the filtered dataset results in table format (must be included in responses for citation and direct access to filtered view)'),
         lines: z.array(
           z.record(z.any()).describe('Data row object containing column keys as object keys with their values, plus _score field indicating search relevance (higher score = more relevant)')
         ).describe('Array of matching data rows (top 10 results). Each row contains dataset columns (using column keys) plus _score field for search relevance ranking')
@@ -260,7 +260,6 @@ const registerTools = (server: McpServer) => {
 
       // Build common search parameters for both fetch and source URLs
       const searchParams = new URLSearchParams()
-      searchParams.append('size', '10')
       if (params.query) {
         searchParams.append('q', params.query)
         searchParams.append('q_mode', 'complete')
@@ -274,10 +273,11 @@ const registerTools = (server: McpServer) => {
         }
       }
 
-      const fetchUrl = new URL(`/datasets/${params.datasetId}/lines`)
+      const filteredViewUrlObj = new URL(`${config.dataFairUrl}/data-fair/next-ui/embed/dataset/${params.datasetId}/table`)
+      filteredViewUrlObj.search = searchParams.toString()
+      const fetchUrl = new URL(`${config.dataFairUrl}/data-fair/api/v1/datasets/${params.datasetId}/lines`)
+      searchParams.append('size', '10')
       fetchUrl.search = searchParams.toString()
-      const sourceUrlObj = new URL(`${config.dataFairUrl}/data-fair/next-ui/embed/dataset/${params.datasetId}/table`)
-      sourceUrlObj.search = searchParams.toString()
 
       // Fetch detailed dataset information
       const response = (await axios.get(
@@ -289,7 +289,7 @@ const registerTools = (server: McpServer) => {
       const structuredContent = {
         totalCount: response.total,
         datasetId: params.datasetId,
-        sourceUrl: sourceUrlObj.toString(),
+        filteredViewUrl: filteredViewUrlObj.toString(),
         lines: response.results
       }