Skip to content

Commit 4eda469

Browse files
fix: smart configuration path resolution for response-filter.json
- Automatically locate response-filter.json in same directory as server config - Fixes 'Response filter configuration not found' errors - Adds global DATAPROC_CONFIG_DIR for consistent path resolution - Enhanced logging for configuration path debugging - Maintains backward compatibility with fallback to process.cwd() Resolves configuration path issues when MCP server runs from different directories
1 parent 342a283 commit 4eda469

File tree

9 files changed

+337
-3
lines changed

9 files changed

+337
-3
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,3 +112,4 @@ SERVICE_ACCOUNT_AUTHENTICATION_GUIDE.md
112112
examples/web-apps/*
113113

114114
state/*
115+
scripts/copy-config-to-desktop.sh

config/response-filter.json

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,10 @@
4949
"url": "http://localhost:6334",
5050
"collectionName": "dataproc_knowledge",
5151
"vectorSize": 384,
52-
"distance": "Cosine"
52+
"distance": "Cosine",
53+
"timeout": 30000,
54+
"retryAttempts": 3,
55+
"healthCheckInterval": 60000
5356
},
5457
"formatting": {
5558
"useEmojis": true,
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
{
2+
"profileManager": {
3+
"rootConfigPath": "/Users/srivers/Documents/Cline/MCP/dataproc-server/profiles",
4+
"profileScanInterval": 300000
5+
},
6+
"clusterTracker": {
7+
"stateFilePath": "./state/dataproc-state.json",
8+
"stateSaveInterval": 60000
9+
},
10+
"authentication": {
11+
"impersonateServiceAccount": "grpn-sa-terraform-data-science@prj-grp-central-sa-prod-0b25.iam.gserviceaccount.com",
12+
"preferImpersonation": true,
13+
"useApplicationDefaultFallback": false
14+
},
15+
"defaultParameters": {
16+
"defaultEnvironment": "production",
17+
"parameters": [],
18+
"environments": [
19+
{
20+
"environment": "production",
21+
"parameters": {
22+
"machineType": "n1-standard-8",
23+
"numWorkers": 4,
24+
"projectId": "prj-grp-data-sci-prod-b425",
25+
"region": "us-central1"
26+
}
27+
}
28+
]
29+
},
30+
"semanticSearch": {
31+
"enabled": true,
32+
"qdrant": {
33+
"url": "http://localhost:6334",
34+
"collectionName": "dataproc_knowledge",
35+
"vectorSize": 384,
36+
"distance": "Cosine",
37+
"timeout": 30000,
38+
"retryAttempts": 3,
39+
"healthCheckInterval": 60000
40+
},
41+
"knowledgeBase": {
42+
"autoIndexing": true,
43+
"indexingInterval": 300000,
44+
"maxDocuments": 10000,
45+
"confidenceThreshold": 0.7
46+
}
47+
},
48+
"responseOptimization": {
49+
"enabled": true,
50+
"tokenLimits": {
51+
"list_clusters": 500,
52+
"get_cluster": 300,
53+
"default": 400
54+
},
55+
"caching": {
56+
"enabled": true,
57+
"ttlSeconds": 300,
58+
"maxCacheSize": 100
59+
}
60+
},
61+
"logging": {
62+
"level": "info",
63+
"enableConsole": true,
64+
"enableFile": false,
65+
"semanticSearchLogs": true
66+
}
67+
}

docker-compose.qdrant.yml

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
version: '3.8'
2+
3+
services:
4+
qdrant:
5+
image: qdrant/qdrant:latest
6+
container_name: dataproc-qdrant
7+
ports:
8+
- "6334:6333" # HTTP API
9+
- "6335:6334" # gRPC API (optional)
10+
volumes:
11+
- qdrant_storage:/qdrant/storage
12+
environment:
13+
- QDRANT__SERVICE__HTTP_PORT=6333
14+
- QDRANT__SERVICE__GRPC_PORT=6334
15+
- QDRANT__LOG_LEVEL=INFO
16+
restart: unless-stopped
17+
healthcheck:
18+
test: ["CMD", "curl", "-f", "http://localhost:6333/health"]
19+
interval: 30s
20+
timeout: 10s
21+
retries: 3
22+
start_period: 40s
23+
24+
volumes:
25+
qdrant_storage:
26+
driver: local

example_dataproc_opp.yml

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
customModes:
2+
- slug: dataproc-ops
3+
name: 🔧 DataprocOps
4+
roleDefinition: >-
5+
You are Roo, a Dataproc operations specialist with enhanced MCP capabilities, intelligent parameter management, and advanced semantic search. Your expertise includes:
6+
- Managing Google Cloud Dataproc clusters with smart default parameters
7+
- Executing and monitoring Hive/Spark jobs with minimal parameter requirements
8+
- Leveraging MCP resources for configuration access (dataproc://config/defaults, dataproc://profile/*)
9+
- Using memory tools to store and retrieve operational insights
10+
- Optimizing cluster and job configurations based on historical usage
11+
- Utilizing the enhanced Dataproc MCP server with 60-80% reduced parameter requirements
12+
- Performing semantic searches with natural language queries (e.g., "clusters with machine learning packages")
13+
- Extracting intelligent insights from cluster configurations using vector embeddings
14+
- Providing graceful degradation when optional semantic features are unavailable
15+
whenToUse: >-
16+
Use this mode when working with Google Cloud Dataproc operations, including:
17+
- Creating or managing Dataproc clusters (now requires minimal parameters)
18+
- Submitting and monitoring Hive/Spark jobs (simplified with smart defaults)
19+
- Managing cluster profiles and configurations via MCP resources
20+
- Analyzing job performance and cluster utilization
21+
- Leveraging the enhanced MCP server with intelligent default parameter injection
22+
- Accessing cluster configurations through dataproc:// resource URIs
23+
- Performing semantic searches for cluster discovery and analysis
24+
- Extracting insights from configurations using natural language queries
25+
- Analyzing infrastructure patterns and optimization opportunities
26+
groups:
27+
- read
28+
- - edit
29+
- fileRegex: \.(yaml|json|sql|hql)$
30+
description: sql/hql query files and YAML and JSON configuration files
31+
- mcp
32+
- command
33+
customInstructions: |-
34+
ENHANCED WORKFLOW (Updated for Smart Defaults, Resources & Semantic Search):
35+
36+
1. **Smart Parameter Management**:
37+
- Leverage default parameter injection (projectId/region auto-filled)
38+
- Use minimal parameters for tool calls (e.g., get_job_status with just jobId)
39+
- Access default configuration via 'dataproc://config/defaults' resource
40+
- Store custom parameters in memory only when they differ from defaults
41+
42+
2. **Resource-Enhanced Operations**:
43+
- Use 'dataproc://profile/{id}' resources to access cluster profiles
44+
- Leverage 'dataproc://config/defaults' for current environment settings
45+
- Access tracked clusters via dataproc:// resource URIs
46+
- Store resource URIs in memory for quick access
47+
48+
3. **Simplified Cluster Operations**:
49+
- Use 'start_dataproc_cluster' with just clusterName (defaults auto-inject)
50+
- Use 'list_clusters' with no parameters (uses configured defaults)
51+
- Apply profile-based configurations via 'create_cluster_from_profile'
52+
- Monitor cluster health with simplified parameter sets
53+
54+
4. **Streamlined Job Execution**:
55+
- Use 'get_job_status' with only jobId (projectId/region from defaults)
56+
- Submit jobs with minimal required parameters
57+
- Track job performance with simplified monitoring calls
58+
- Store successful job patterns with reduced parameter sets
59+
60+
5. **Enhanced Configuration Management**:
61+
- Access profiles via MCP resources instead of file system
62+
- Update default-params.json for environment-specific settings
63+
- Version control configurations with smart parameter awareness
64+
- Maintain profile templates accessible via dataproc:// URIs
65+
66+
6. **🧠 Semantic Search & Knowledge Base**:
67+
- Use 'query_cluster_data' for natural language infrastructure queries
68+
- Add semanticQuery parameter to 'list_clusters' and 'get_cluster' for intelligent filtering
69+
- Query with natural language: "clusters with machine learning packages", "high-memory configurations"
70+
- Store semantic insights in memory for pattern recognition and optimization
71+
- Leverage confidence scoring to prioritize relevant results
72+
- Use 'query_knowledge' for comprehensive knowledge base searches across clusters, jobs, and errors
73+
74+
7. **🎯 Intelligent Data Extraction**:
75+
- Extract meaningful insights from cluster configurations automatically
76+
- Identify patterns in machine types, pip packages, network configurations
77+
- Analyze component installations and optimization opportunities
78+
- Store extracted knowledge for future reference and comparison
79+
- Use vector embeddings for semantic similarity matching
80+
81+
8. **🔄 Graceful Degradation Handling**:
82+
- Provide helpful setup guidance when Qdrant is unavailable
83+
- Maintain full functionality with standard queries when semantic search is offline
84+
- Guide users through semantic search setup: "docker run -p 6334:6333 qdrant/qdrant"
85+
- Explain benefits of semantic search while providing standard alternatives
86+
87+
ALWAYS:
88+
- **Leverage smart defaults**: Use minimal parameters, let server inject defaults
89+
- **Access MCP resources**: Use dataproc:// URIs for configuration access
90+
- **Store operational insights**: Use memory for patterns, not basic parameters
91+
- **Optimize with defaults**: Configure default-params.json for your environment
92+
- **Maintain audit trail**: Track operations with simplified parameter logging
93+
- **Test resource access**: Verify dataproc:// resources are available before operations
94+
- **🧠 Use semantic search**: Leverage natural language queries for intelligent data discovery
95+
- **📊 Extract insights**: Store meaningful patterns and configurations in memory
96+
- **🎯 Provide guidance**: Help users understand semantic search benefits and setup
97+
98+
KEY ENHANCEMENTS:
99+
- 60-80% fewer parameters required for most operations
100+
- Direct access to configurations via MCP resources
101+
- Environment-independent authentication with service account impersonation
102+
- 53-58% faster operations with authentication caching
103+
- 🧠 **Natural language cluster discovery** with semantic search
104+
- 🎯 **Intelligent data extraction** from configurations and responses
105+
- 📊 **Confidence-scored results** for better decision making
106+
- 🔄 **Graceful degradation** maintaining functionality without dependencies
107+
108+
SEMANTIC SEARCH EXAMPLES:
109+
```javascript
110+
// Natural language cluster discovery
111+
query_cluster_data: { "query": "pip packages for data science" }
112+
list_clusters: { "semanticQuery": "high-memory instances with SSD" }
113+
get_cluster: { "clusterName": "ml-cluster", "semanticQuery": "Python libraries and packages" }
114+
115+
// Knowledge base queries
116+
query_knowledge: { "query": "machine learning configurations", "type": "clusters" }
117+
query_knowledge: { "query": "failed jobs with memory errors", "type": "errors" }
118+
```
119+
120+
SETUP GUIDANCE FOR SEMANTIC FEATURES:
121+
1. 🐳 Start Qdrant: `docker run -p 6334:6333 qdrant/qdrant`
122+
2. ✅ Verify: `curl http://localhost:6334/health`
123+
3. 🔄 Restart MCP server to enable semantic features
124+
4. 📖 Full docs: docs/KNOWLEDGE_BASE_SEMANTIC_SEARCH.md
125+
126+
127+
128+

scripts/setup-qdrant.sh

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
#!/bin/bash
2+
3+
# Qdrant Setup Script for Dataproc MCP Server
4+
# This script sets up Qdrant vector database for semantic search
5+
6+
set -e
7+
8+
echo "🐳 Setting up Qdrant for Dataproc MCP Server..."
9+
10+
# Check if Docker is running
11+
if ! docker info > /dev/null 2>&1; then
12+
echo "❌ Docker is not running. Please start Docker first."
13+
exit 1
14+
fi
15+
16+
# Stop existing Qdrant containers
17+
echo "🛑 Stopping existing Qdrant containers..."
18+
docker stop dataproc-qdrant 2>/dev/null || true
19+
docker rm dataproc-qdrant 2>/dev/null || true
20+
21+
# Start Qdrant using Docker Compose
22+
echo "🚀 Starting Qdrant with Docker Compose..."
23+
docker-compose -f docker-compose.qdrant.yml up -d
24+
25+
# Wait for Qdrant to be ready
26+
echo "⏳ Waiting for Qdrant to be ready..."
27+
for i in {1..30}; do
28+
if curl -s http://localhost:6334/health > /dev/null 2>&1; then
29+
echo "✅ Qdrant is ready!"
30+
break
31+
fi
32+
if [ $i -eq 30 ]; then
33+
echo "❌ Qdrant failed to start after 30 seconds"
34+
docker-compose -f docker-compose.qdrant.yml logs
35+
exit 1
36+
fi
37+
sleep 1
38+
done
39+
40+
# Verify Qdrant health
41+
echo "🔍 Verifying Qdrant health..."
42+
HEALTH_RESPONSE=$(curl -s http://localhost:6334/health)
43+
echo "Health check response: $HEALTH_RESPONSE"
44+
45+
# Check collections
46+
echo "📊 Checking existing collections..."
47+
curl -s http://localhost:6334/collections | jq '.' || echo "No collections yet (this is normal)"
48+
49+
echo ""
50+
echo "🎉 Qdrant setup complete!"
51+
echo ""
52+
echo "📋 Configuration Summary:"
53+
echo " • Qdrant URL: http://localhost:6334"
54+
echo " • Container: dataproc-qdrant"
55+
echo " • Storage: Persistent volume (qdrant_storage)"
56+
echo " • Health endpoint: http://localhost:6334/health"
57+
echo ""
58+
echo "🔧 Next Steps:"
59+
echo " 1. Restart your MCP server to enable semantic search"
60+
echo " 2. Test with: query_cluster_data or list_clusters with semanticQuery"
61+
echo " 3. Check logs: docker-compose -f docker-compose.qdrant.yml logs"
62+
echo ""
63+
echo "🛑 To stop: docker-compose -f docker-compose.qdrant.yml down"
64+
echo "🗑️ To reset: docker-compose -f docker-compose.qdrant.yml down -v"

src/config/server.ts

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,12 @@ import * as path from 'path';
77
import { fileURLToPath } from 'url';
88
import { ProfileManagerConfig, ClusterTrackerConfig } from '../types/profile.js';
99

10+
// Global type declaration for config directory
11+
declare global {
12+
// eslint-disable-next-line no-var
13+
var DATAPROC_CONFIG_DIR: string;
14+
}
15+
1016
// Determine the application root directory
1117
const __filename = fileURLToPath(import.meta.url);
1218
const __dirname = path.dirname(__filename);
@@ -124,6 +130,12 @@ export async function getServerConfig(configPath?: string): Promise<ServerConfig
124130
console.error(`[DIAGNOSTIC] Server Config: Current working directory: ${process.cwd()}`);
125131
console.error(`[DIAGNOSTIC] Server Config: Absolute config path: ${filePath}`);
126132

133+
// Store the config directory for other modules to use
134+
// eslint-disable-next-line no-undef
135+
global.DATAPROC_CONFIG_DIR = path.dirname(filePath);
136+
// eslint-disable-next-line no-undef
137+
console.error(`[DIAGNOSTIC] Server Config: Config directory: ${global.DATAPROC_CONFIG_DIR}`);
138+
127139
try {
128140
// Check if the config file exists
129141
try {

src/index.ts

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,23 @@ try {
126126
// Initialize response filter and Qdrant manager (async initialization)
127127
async function initializeResponseOptimization() {
128128
try {
129-
const responseFilterConfigPath = path.join(process.cwd(), 'config', 'response-filter.json');
129+
// Try to use the same directory as the main server config
130+
let responseFilterConfigPath: string;
131+
132+
if (global.DATAPROC_CONFIG_DIR) {
133+
// Use the same directory as server_main.json
134+
responseFilterConfigPath = path.join(global.DATAPROC_CONFIG_DIR, 'response-filter.json');
135+
console.log(
136+
`[INFO] Looking for response-filter.json in server config directory: ${responseFilterConfigPath}`
137+
);
138+
} else {
139+
// Fallback to the old behavior
140+
responseFilterConfigPath = path.join(process.cwd(), 'config', 'response-filter.json');
141+
console.log(
142+
`[INFO] Fallback: Looking for response-filter.json in: ${responseFilterConfigPath}`
143+
);
144+
}
145+
130146
if (fs.existsSync(responseFilterConfigPath)) {
131147
const responseFilterConfig = JSON.parse(fs.readFileSync(responseFilterConfigPath, 'utf8'));
132148

src/services/response-filter.ts

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,13 +124,30 @@ export class ResponseFilter {
124124
*/
125125
private static async loadConfig(): Promise<ResponseFilterConfig> {
126126
try {
127-
const configPath = path.join(process.cwd(), 'config', 'response-filter.json');
127+
// Try to use the same directory as the main server config
128+
let configPath: string;
129+
130+
// eslint-disable-next-line no-undef
131+
if (global.DATAPROC_CONFIG_DIR) {
132+
// Use the same directory as server_main.json
133+
// eslint-disable-next-line no-undef
134+
configPath = path.join(global.DATAPROC_CONFIG_DIR, 'response-filter.json');
135+
console.log(
136+
`[INFO] Looking for response-filter.json in server config directory: ${configPath}`
137+
);
138+
} else {
139+
// Fallback to the old behavior
140+
configPath = path.join(process.cwd(), 'config', 'response-filter.json');
141+
console.log(`[INFO] Fallback: Looking for response-filter.json in: ${configPath}`);
142+
}
143+
128144
const configData = await fs.readFile(configPath, 'utf-8');
129145
const config = JSON.parse(configData) as ResponseFilterConfig;
130146

131147
// Validate required fields
132148
ResponseFilter.validateConfig(config);
133149

150+
console.log(`[SUCCESS] Loaded response filter configuration from: ${configPath}`);
134151
return config;
135152
} catch (error) {
136153
console.warn('Failed to load response filter config, using defaults:', error);

0 commit comments

Comments
 (0)