Skip to content

Commit afd971e

Browse files
committed
feat: add filters like _eq, _search, _lte, _gte
1 parent 6e86bd1 commit afd971e

File tree

3 files changed

+72
-44
lines changed

3 files changed

+72
-44
lines changed

package-lock.json

Lines changed: 12 additions & 12 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/mcp-servers/datasets/index.ts

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
import packageJson from '../../../package.json' with { type: 'json' }
22
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'
33

4-
import registerResources from './resources.ts'
4+
// import registerResources from './resources.ts'
55
import registerTools from './tools.ts'
6-
import registerPrompts from './prompts.ts'
6+
// import registerPrompts from './prompts.ts'
77

88
/**
99
* The MCP server instance for datasets
@@ -26,8 +26,8 @@ const server = new McpServer({
2626
}
2727
})
2828

29-
registerResources(server)
29+
// registerResources(server)
3030
registerTools(server)
31-
registerPrompts(server)
31+
// registerPrompts(server)
3232

3333
export default server

src/mcp-servers/datasets/tools.ts

Lines changed: 56 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -21,33 +21,34 @@ const axiosOptions: AxiosRequestConfig = {
2121

2222
const registerTools = (server: McpServer) => {
2323
/**
24-
* Tool to search for datasets in DataFair.
25-
* This tool allows users to search for datasets using simple French keywords.
26-
* It returns essential dataset information for discovery purposes including ID, title,
27-
* description (if available), and source URL.
24+
* Tool to search for datasets in DataFair using full-text search.
25+
* This tool performs full-text search across dataset titles, descriptions, and metadata
26+
* using simple French keywords. It returns essential dataset information for discovery
27+
* purposes including ID, title, description (if available), and source URL.
2828
* Use this tool for dataset discovery, then use describe_dataset for detailed metadata
2929
* or search_data to query within a specific dataset.
30-
* @param {string} query - Simple French keywords to search for datasets (not full sentences).
31-
* Examples: "élus", "DPE", "entreprises"
30+
* @param {string} query - French keywords for full-text search (not full sentences).
31+
* The search looks across titles, descriptions, and metadata.
32+
* Examples: "élus", "DPE", "entreprises", "logement social"
3233
*/
3334
server.registerTool(
3435
'search_datasets',
3536
{
3637
title: 'Search Datasets',
37-
description: 'Search for datasets by topic, domain, or content in DataFair. Use simple French keywords (not full sentences). Returns a preview with essential metadata: a list of datasets containing ID, title, description, and link to the source URL that must be included in responses. Then use describe_dataset to get detailed metadata. Examples: "élus", "DPE", "entreprises"',
38+
description: 'Full-text search for datasets in DataFair. Uses French keywords to search across dataset titles, descriptions, and metadata (not full sentences). Returns a preview with essential metadata: a list of datasets containing ID, title, description, and link to the source URL that must be included in responses. Then use describe_dataset to get detailed metadata.',
3839
inputSchema: {
39-
query: z.string().min(3, 'Search term must be at least 3 characters long').describe('Search terms in French (simple keywords, not sentences). Examples: "élus", "DPE", "entreprises"')
40+
query: z.string().min(3, 'Search term must be at least 3 characters long').describe('French keywords for full-text search across dataset titles, descriptions, and metadata (simple keywords, not sentences). Examples: "élus", "DPE", "entreprises", "logement social"')
4041
},
4142
outputSchema: {
42-
totalCount: z.number().describe('Total number of datasets matching the search criteria'),
43+
totalCount: z.number().describe('Total number of datasets matching the full-text search criteria'),
4344
datasets: z.array(
4445
z.object({
4546
id: z.string().describe('Unique dataset ID (required for describe_dataset and search_data tools)'),
4647
title: z.string().describe('Dataset title'),
4748
description: z.string().optional().describe('A markdown description of the dataset content'),
4849
source: z.string().describe('Direct URL to the dataset page (must be included in AI responses as citation source)'),
4950
})
50-
).describe('Array of datasets matching the search criteria (top 10 results)')
51+
).describe('Array of datasets matching the full-text search criteria (top 10 results)')
5152
},
5253
annotations: { // https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations
5354
readOnlyHint: true
@@ -198,50 +199,77 @@ const registerTools = (server: McpServer) => {
198199
)
199200

200201
/**
201-
* Tool to search for specific data rows within a dataset.
202-
* This tool allows users to search for data within a specific dataset using simple French keywords.
203-
* It returns matching rows with their relevance scores and provides a direct link to view
204-
* the filtered results in the dataset's table interface.
205-
* Use this after describe_dataset to understand the dataset structure.
202+
* Tool to search for specific data rows within a dataset using either full-text search OR precise filters.
203+
* This tool can search data in two ways:
204+
* 1) Full-text search across all columns using keywords (quick and broad search)
205+
* 2) Precise filtering on specific columns with exact matches, comparisons, or column-specific searches (ideal for structured queries)
206+
*
207+
* Returns matching rows with their relevance scores and provides a direct link to view the filtered results in the dataset's table interface.
208+
* Use this after describe_dataset to understand the dataset structure and column keys.
206209
* @param {string} datasetId - The unique ID of the dataset to search in (obtained from search_datasets)
207-
* @param {string} query - Simple French keywords to search for within the dataset data
210+
* @param {string} query - French keywords for full-text search across all dataset columns
211+
* @param {string} select - Optional comma-separated list of column keys to reduce output size
212+
* @param {Object} filters - Optional precise filters on specific columns (alternative to query)
208213
*/
209214
server.registerTool(
210215
'search_data',
211216
{
212217
title: 'Search data from a dataset',
213-
description: 'Search for data rows within a specific dataset using simple French keywords. Returns matching rows with relevance scores and a direct link to view filtered results in the dataset table interface. Always include dataset license and source information when presenting results to users. Use describe_dataset first to understand the data structure.',
218+
description: 'Search for data rows in a specific dataset using either : - Full-text search across all columns (query) for quick, broad matches, - Precise filtering (filters) to apply exact conditions, comparisons, or column-specific searches. Use filters whenever your question involves multiple criteria or numerical/date ranges, as they yield more relevant and targeted results. The query parameter is better suited for simple, one-keyword searches across the entire dataset. Returns matching rows with relevance scores and a direct link to view filtered results in the dataset table interface. Always include dataset license, direct link and source information when presenting results to users. Use describe_dataset first to understand the data structure and available column keys.',
214219
inputSchema: {
215-
datasetId: z.string().describe('The unique dataset ID obtained from search_datasets'),
216-
query: z.string().min(1, 'Search query cannot be empty').describe('Simple French keywords to search within the dataset (not full sentences). Examples: "Jean Dupont", "Paris"'),
220+
datasetId: z.string().describe('The unique dataset ID obtained from search_datasets tool'),
221+
query: z.string().optional().describe('French keywords for full-text search across all dataset columns (simple keywords, not sentences). Do not use with filters parameter. Examples: "Jean Dupont", "Paris", "2025"'),
222+
select: z.string().optional().describe('Optional comma-separated list of specific column keys to include in the results. Useful when the dataset has many columns to reduce output size. If not provided, all columns are returned. Use column keys from describe_dataset. Example: "nom,age,ville"'),
223+
filters: z.record(
224+
z.string().regex(/^.+_(search|eq|gte|lte)$/, {
225+
message: 'Filter key must follow pattern: column_key + suffix (_eq, _search, _gte, _lte)'
226+
}),
227+
z.string()
228+
)
229+
.optional()
230+
.describe('Precise filters on specific columns. Ideal for multi-condition queries or range searches. Each filter key must be: column_key + suffix. Available suffixes: _eq (strictly equal - exact match), _search (full-text search within that column), _gte (greater than or equal), _lte (less than or equal). Use column keys from describe_dataset. Example: { "nom_search": "Jean", "age_lte": "30", "ville_eq": "Paris" } searches for people whose names contain "Jean", who are 30 years old or younger, and who live in Paris.')
217231
},
218232
outputSchema: {
219-
totalCount: z.number().describe('Total number of data rows matching the search criteria'),
233+
totalCount: z.number().describe('Total number of data rows matching the search criteria and filters'),
220234
datasetId: z.string().describe('The dataset ID that was searched'),
221-
searchQuery: z.string().describe('The search query that was used'),
222-
sourceUrl: z.string().describe('Direct URL to view the filtered dataset results in table format (for citation and direct access to filtered view)'),
235+
sourceUrl: z.string().describe('Direct URL to view the filtered dataset results in table format (must be included in responses for citation and direct access to filtered view)'),
223236
lines: z.array(
224-
z.record(z.any()).describe('Data row object with column keys and values, plus _score field indicating relevance')
225-
).describe('Array of matching data rows (top 10 results). Each row contains dataset columns plus _score for search relevance')
237+
z.record(z.any()).describe('Data row object containing column keys as object keys with their values, plus _score field indicating search relevance (higher score = more relevant)')
238+
).describe('Array of matching data rows (top 10 results). Each row contains dataset columns (using column keys) plus _score field for search relevance ranking')
226239
},
227240
annotations: {
228241
readOnlyHint: true
229242
}
230243
},
231-
async (params: { datasetId: string, query: string }) => {
232-
debug('Executing search_data tool with dataset:', params.datasetId, 'and query:', params.query)
244+
async (params: { datasetId: string, query?: string, select?: string, filters?: Record<string, any> }) => {
245+
debug('Executing search_data tool with dataset:', params.datasetId, 'query:', params.query, 'select:', params.select, 'filters:', params.filters)
246+
247+
// Build the url
248+
const url = new URL(`${config.dataFairUrl}/data-fair/api/v1/datasets/${params.datasetId}/lines`)
249+
url.searchParams.append('size', '10')
250+
if (params.query) {
251+
url.searchParams.append('q', params.query)
252+
url.searchParams.append('q_mode', 'complete')
253+
}
254+
if (params.select) {
255+
url.searchParams.append('select', params.select)
256+
}
257+
if (params.filters) {
258+
for (const [key, value] of Object.entries(params.filters)) {
259+
url.searchParams.append(key, value)
260+
}
261+
}
233262

234263
// Fetch detailed dataset information
235264
const response = (await axios.get(
236-
`/datasets/${params.datasetId}/lines?q=${params.query}&q_mode=complete&size=10`,
265+
url.toString(),
237266
axiosOptions
238267
)).data
239268

240269
// Format the fetched data into a structured content object
241270
const structuredContent = {
242271
totalCount: response.total,
243272
datasetId: params.datasetId,
244-
searchQuery: params.query,
245273
sourceUrl: `${config.dataFairUrl}/data-fair/next-ui/embed/dataset/${params.datasetId}/table?q=${params.query}&q_mode=complete`,
246274
lines: response.results
247275
}

0 commit comments

Comments
 (0)