Skip to content

Commit c6da5b7

Browse files
committed
chore: update readme.md
1 parent c151d19 commit c6da5b7

File tree

4 files changed

+29
-18
lines changed

4 files changed

+29
-18
lines changed

CONTRIBUTING.md

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,24 @@
11
# Contribution guidelines
22

3-
## Development environment
3+
## Setup
44

5-
Switch to the appropriate nodejs version:
5+
Switch to the appropriate Node.js version:
66

77
nvm use
88

99
Install dependencies:
1010

1111
npm install
1212

13-
Run in development mode and stdio transport:
13+
## Development
1414

15-
npx @modelcontextprotocol/inspector -e DATA_FAIR_URL=https://koumoul.com/data-fair npm run dev
15+
Available scripts in `package.json`:
1616

17-
## Docker image
17+
- `npm run dev-stdio`: run the MCP server in stdio transport (requires building the Docker image)
18+
- `npm run dev-http`: run the MCP server in HTTP transport
19+
- `npm run dev-inspector`: launch the Inspector (uses `dev/resources/inspector.json`)
20+
- `npm run dev-zellij`: launch both Inspector and MCP in HTTP mode using Zellij
1821

19-
Test building and running the docker image:
22+
To build the Docker image locally:
2023

21-
docker build -t mcp-dev .
22-
npx @modelcontextprotocol/inspector docker run -i --rm -e "DATA_FAIR_URL=https://koumoul.com/data-fair" mcp-dev
24+
npm run build-image

README.md

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,10 @@
11
# mcp
2-
MCP servers to help AI agents interact with the data-fair APIs
2+
3+
Model Context Protocol (MCP) servers to allow AI agents to interact with the Data Fair ecosystem.
4+
5+
## Environment Variables
6+
7+
- `DATA_FAIR_URL`: URL of the data-fair portal (e.g., `https://opendata.koumoul.com` or `https://data.ademe.fr`)
8+
- `OBSERVER_ACTIVE`: enable observer (true/false)
9+
- `PORT`: port for the server to listen on
10+
- `TRANSPORT`: transport mode (`stdio` or `http`)

package.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,11 @@
55
"type": "module",
66
"main": "index.ts",
77
"scripts": {
8+
"build-image": "docker build -t mcp-dev .",
89
"build-types": "df-build-types .",
910
"dev-deps": "docker compose up -d --wait",
10-
"dev-stdio": "TRANSPORT=stdio node --watch index.ts",
1111
"dev-http": "DEBUG=datasets-tools TRANSPORT=http node --watch index.ts",
12+
"dev-stdio": "TRANSPORT=stdio node --watch index.ts",
1213
"dev-inspector": "npx @modelcontextprotocol/inspector --config dev/resources/inspector.json",
1314
"dev-zellij": "zellij --layout .zellij.kdl",
1415
"lint": "eslint .",

src/mcp-servers/datasets/tools.ts

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -229,11 +229,11 @@ const registerTools = (server: McpServer) => {
229229
'search_data',
230230
{
231231
title: 'Search data from a dataset',
232-
description: 'Search for data rows in a specific dataset using either :\n- Full-text search across all columns (query) for quick, broad matches,\n- Precise filtering (filters) to apply exact conditions, comparisons, or column-specific searches.\nUse filters whenever your question involves multiple criteria or numerical/date ranges, as they yield more relevant and targeted results. The query parameter is better suited for simple, one-keyword searches across the entire dataset. Returns matching rows with relevance scores and a direct link to view filtered results in the dataset table interface. Always include dataset license, direct link and source information when presenting results to users. Use describe_dataset first to understand the data structure and available column keys.',
232+
description: 'Search for data rows in a specific dataset using either :\n- Full-text search across all columns (query) for quick, broad matches,\n- Precise filtering (filters) to apply exact conditions, comparisons, or column-specific searches.\nUse filters whenever your question involves multiple criteria or numerical/date ranges, as they yield more relevant and targeted results. The query parameter is better suited for simple, one-keyword searches across the entire dataset. Returns matching rows with relevance scores and some metadata. ALWAYS include both the filtered view link AND the dataset source with license information when presenting results to users. Use describe_dataset first to understand the data structure and available column keys.',
233233
inputSchema: {
234234
datasetId: z.string().describe('The unique dataset ID obtained from search_datasets tool'),
235235
query: z.string().optional().describe('French keywords for full-text search across all dataset columns (simple keywords, not sentences). Do not use with filters parameter. Examples: "Jean Dupont", "Paris", "2025"'),
236-
select: z.string().optional().describe('Optional comma-separated list of specific column keys to include in the results. Useful when the dataset has many columns to reduce output size. If not provided, all columns are returned. Use column keys from describe_dataset. Example: "nom,age,ville"'),
236+
select: z.string().optional().describe('Optional comma-separated list of column keys to include in the results. Useful when the dataset has many columns to reduce output size. If not provided, all columns are returned. Use column keys from describe_dataset. Format: column1,column2,column3 (NO SPACES after commas). eExample: "nom,age,ville"'),
237237
filters: z.record(
238238
z.string().regex(/^.+_(search|eq|gte|lte)$/, {
239239
message: 'Filter key must follow pattern: column_key + suffix (_eq, _search, _gte, _lte)'
@@ -246,7 +246,7 @@ const registerTools = (server: McpServer) => {
246246
outputSchema: {
247247
totalCount: z.number().describe('Total number of data rows matching the search criteria and filters'),
248248
datasetId: z.string().describe('The dataset ID that was searched'),
249-
sourceUrl: z.string().describe('Direct URL to view the filtered dataset results in table format (must be included in responses for citation and direct access to filtered view)'),
249+
filteredViewUrl: z.string().describe('Direct URL to view the filtered dataset results in table format (must be included in responses for citation and direct access to filtered view)'),
250250
lines: z.array(
251251
z.record(z.any()).describe('Data row object containing column keys as object keys with their values, plus _score field indicating search relevance (higher score = more relevant)')
252252
).describe('Array of matching data rows (top 10 results). Each row contains dataset columns (using column keys) plus _score field for search relevance ranking')
@@ -260,7 +260,6 @@ const registerTools = (server: McpServer) => {
260260

261261
// Build common search parameters for both fetch and source URLs
262262
const searchParams = new URLSearchParams()
263-
searchParams.append('size', '10')
264263
if (params.query) {
265264
searchParams.append('q', params.query)
266265
searchParams.append('q_mode', 'complete')
@@ -274,10 +273,11 @@ const registerTools = (server: McpServer) => {
274273
}
275274
}
276275

277-
const fetchUrl = new URL(`/datasets/${params.datasetId}/lines`)
276+
const filteredViewUrlObj = new URL(`${config.dataFairUrl}/data-fair/next-ui/embed/dataset/${params.datasetId}/table`)
277+
filteredViewUrlObj.search = searchParams.toString()
278+
const fetchUrl = new URL(`${config.dataFairUrl}/data-fair/api/v1/datasets/${params.datasetId}/lines`)
279+
searchParams.append('size', '10')
278280
fetchUrl.search = searchParams.toString()
279-
const sourceUrlObj = new URL(`${config.dataFairUrl}/data-fair/next-ui/embed/dataset/${params.datasetId}/table`)
280-
sourceUrlObj.search = searchParams.toString()
281281

282282
// Fetch detailed dataset information
283283
const response = (await axios.get(
@@ -289,7 +289,7 @@ const registerTools = (server: McpServer) => {
289289
const structuredContent = {
290290
totalCount: response.total,
291291
datasetId: params.datasetId,
292-
sourceUrl: sourceUrlObj.toString(),
292+
filteredViewUrl: filteredViewUrlObj.toString(),
293293
lines: response.results
294294
}
295295

0 commit comments

Comments
 (0)