From 70ee03bc647e0581b045759ade86e8134a49b845 Mon Sep 17 00:00:00 2001 From: Igor Rekun Date: Tue, 25 Nov 2025 19:42:53 +0000 Subject: [PATCH 01/18] Refactor apps-mcp to use CLI-based approach --- experimental/apps-mcp/README.md | 264 +++++++----------- experimental/apps-mcp/cmd/apps_mcp.go | 17 +- experimental/apps-mcp/lib/common/common.go | 9 + experimental/apps-mcp/lib/config.go | 38 --- experimental/apps-mcp/lib/prompts/apps.tmpl | 33 +++ .../apps-mcp/lib/prompts/auth_error.tmpl | 7 +- .../apps-mcp/lib/prompts/explore.tmpl | 150 ++++++++++ .../lib/prompts/initialization_message.tmpl | 27 +- .../lib/providers/clitools/explore.go | 197 +++++++++++++ .../clitools/invoke_databricks_cli.go | 40 +++ .../lib/providers/clitools/provider.go | 85 ++++++ experimental/apps-mcp/lib/providers/doc.go | 2 +- experimental/apps-mcp/lib/server/health.go | 9 +- experimental/apps-mcp/lib/server/server.go | 98 ++----- experimental/apps-mcp/lib/session/session.go | 9 +- .../apps-mcp/lib/trajectory/tracker.go | 12 - 16 files changed, 664 insertions(+), 333 deletions(-) create mode 100644 experimental/apps-mcp/lib/common/common.go create mode 100644 experimental/apps-mcp/lib/prompts/apps.tmpl create mode 100644 experimental/apps-mcp/lib/prompts/explore.tmpl create mode 100644 experimental/apps-mcp/lib/providers/clitools/explore.go create mode 100644 experimental/apps-mcp/lib/providers/clitools/invoke_databricks_cli.go create mode 100644 experimental/apps-mcp/lib/providers/clitools/provider.go diff --git a/experimental/apps-mcp/README.md b/experimental/apps-mcp/README.md index 93bf817035..b024943f02 100644 --- a/experimental/apps-mcp/README.md +++ b/experimental/apps-mcp/README.md @@ -1,26 +1,24 @@ # Databricks MCP Server -A Model Context Protocol (MCP) server for generating production-ready Databricks applications with testing, -linting and deployment setup from a single prompt. This agent relies heavily on scaffolding and -extensive validation to ensure high-quality outputs. +A Model Context Protocol (MCP) server for working with Databricks through natural language. This server provides tools for data exploration, workspace management, and executing Databricks CLI commands through AI-powered conversations. ## TL;DR -**Primary Goal:** Create and deploy production-ready Databricks applications from a single natural language prompt. This MCP server combines scaffolding, validation, and deployment into a seamless workflow that goes from idea to running application. +**Primary Goal:** Interact with Databricks workspaces, manage Databricks Asset Bundles (DABs), deploy Databricks Apps, and query data through natural language conversations. **How it works:** -1. **Explore your data** - Query Databricks catalogs, schemas, and tables to understand your data -2. **Generate the app** - Scaffold a full-stack TypeScript application (tRPC + React) with proper structure -3. **Customize with AI** - Use workspace tools to read, write, and edit files naturally through conversation -4. **Validate rigorously** - Run builds, type checks, and tests to ensure quality -5. **Deploy confidently** - Push validated apps directly to Databricks Apps platform +1. **Explore your workspace** - Discover workspace resources, get CLI command examples, and workflow recommendations +2. **Query your data** - Browse catalogs, schemas, and tables; execute SQL queries via CLI commands +3. **Manage bundles** - Initialize, validate, deploy, and run Databricks Asset Bundles +4. **Deploy apps** - Deploy and manage Databricks Apps through CLI commands +5. **Execute any CLI command** - Run the full Databricks CLI through the `invoke_databricks_cli` tool **Why use it:** -- **Speed**: Go from concept to deployed Databricks app in minutes, not hours or days -- **Quality**: Extensive validation ensures your app builds, passes tests, and is production-ready -- **Simplicity**: One natural language conversation handles the entire workflow +- **Conversational interface**: Work with Databricks using natural language instead of memorizing CLI commands +- **Context-aware**: Get relevant command suggestions based on your workspace configuration +- **Unified workflow**: Combine data exploration, bundle management, and app deployment in one tool -Perfect for data engineers and developers who want to build Databricks apps without the manual overhead of project setup, configuration, testing infrastructure, and deployment pipelines. +Perfect for data engineers and developers who want to streamline their Databricks workflows with AI-powered assistance. --- @@ -54,229 +52,164 @@ Perfect for data engineers and developers who want to build Databricks apps with } ``` -3. **Create your first Databricks app:** +3. **Start using Databricks with natural language:** Restart your MCP client and try: ``` - Create a Databricks app that shows sales data from main.sales.transactions - with a chart showing revenue by region. Deploy it as "sales-dashboard". + Explore my Databricks workspace and show me what catalogs are available ``` - The AI will: - - Explore your Databricks tables - - Generate a full-stack application - - Customize it based on your requirements - - Validate it passes all tests - - Deploy it to Databricks Apps - ---- - -## Features - -All features are designed to support the end-to-end workflow of creating production-ready Databricks applications: - -### 1. Data Exploration (Foundation) - -Understand your Databricks data before building: - -- **`databricks_list_catalogs`** - Discover available data catalogs -- **`databricks_list_schemas`** - Browse schemas in a catalog -- **`databricks_find_tables`** - Find tables in a schema -- **`databricks_describe_table`** - Get table details, columns, and sample data -- **`databricks_execute_query`** - Test queries and preview data - -*These tools help the AI understand your data structure so it can generate relevant application code.* - -### 2. Application Generation (Core) - -Create the application structure: - -- **`scaffold_data_app`** - Generate a full-stack TypeScript application - - Modern stack: Node.js, TypeScript, React, tRPC - - Pre-configured build system, linting, and testing - - Production-ready project structure - - Databricks SDK integration + ``` + Initialize a new Databricks Asset Bundle for a data pipeline project + ``` -*This is the foundation of your application - a working, tested template ready for customization.* + ``` + Query the main.sales.transactions table and show me the top 10 customers by revenue + ``` -### 3. Validation (Quality Assurance) + The AI will use the appropriate Databricks tools to help you complete these tasks. -Ensure production-readiness before deployment: +--- -- **`validate_data_app`** - Comprehensive validation - - Build verification (npm build) - - Type checking (TypeScript compiler) - - Test execution (full test suite) +## Features -*This step guarantees your application is tested and ready for production before deployment.* +The Databricks MCP server provides CLI-based tools for workspace interaction: -### 4. Deployment (Production Release) +Execute Databricks CLI commands and explore workspace resources: -Deploy validated applications to Databricks (enable with `--allow-deployment`): +- **`explore`** - Discover workspace resources and get CLI command recommendations + - Lists workspace URL, SQL warehouse details, and authentication profiles + - Provides command examples for jobs, clusters, catalogs, tables, and workspace files + - Gives workflow guidance for Databricks Asset Bundles and Apps -- **`deploy_databricks_app`** - Push to Databricks Apps platform - - Automatic deployment configuration - - Environment management - - Production-grade setup +- **`invoke_databricks_cli`** - Execute any Databricks CLI command + - Run bundle commands: `bundle init`, `bundle validate`, `bundle deploy`, `bundle run` + - Run apps commands: `apps deploy`, `apps list`, `apps get`, `apps start`, `apps stop` + - Run workspace commands: `workspace list`, `workspace export`, `jobs list`, `clusters list` + - Run catalog commands: `catalogs list`, `schemas list`, `tables list` + - Supports all Databricks CLI functionality with proper user allowlisting -*The final step: your validated application running on Databricks.* +*These tools provide a conversational interface to the full Databricks CLI, including Unity Catalog exploration and SQL query execution.* --- ## Example Usage -Here are example conversations showing the end-to-end workflow for creating Databricks applications: +Here are example conversations showing common workflows: -### Complete Workflow: Analytics Dashboard +### Data Exploration -This example shows how to go from data exploration to deployed application: - -**User:** +**Explore workspace resources:** ``` -I want to create a Databricks app that visualizes customer purchases. The data is -in the main.sales catalog. Show me what tables are available and create a dashboard -with charts for total revenue by region and top products. Deploy it as "sales-insights". +Explore my Databricks workspace and show me what's available ``` -**What happens:** -1. **Data Discovery** - AI lists schemas and tables in main.sales -2. **Data Inspection** - AI describes the purchases table structure -3. **App Generation** - AI scaffolds a TypeScript application -4. **Customization** - AI adds visualization components and queries -5. **Validation** - AI runs build, type check, and tests in container -6. **Deployment** - AI deploys to Databricks Apps as "sales-insights" - -**Result:** A production-ready Databricks app running in minutes with proper testing. - ---- - -### Quick Examples for Specific Use Cases - -#### Data App from Scratch - +**Query data:** ``` -Create a Databricks app in ~/projects/user-analytics that shows daily active users -from main.analytics.events. Include a line chart and data table. +Show me the schema of the main.sales.transactions table and give me a sample of 10 rows ``` -#### Real-Time Monitoring Dashboard - +**Find specific tables:** ``` -Build a monitoring dashboard for the main.logs.system_metrics table. Show CPU, -memory, and disk usage over time. Add alerts for values above thresholds. +Find all tables in the main catalog that contain the word "customer" ``` -#### Report Generator +### Databricks Asset Bundles (DABs) +**Create a new bundle project:** ``` -Create an app that generates weekly reports from main.sales.transactions. -Include revenue trends, top customers, and product performance. Add export to CSV. +Initialize a new Databricks Asset Bundle for a data pipeline project ``` -#### Data Quality Dashboard - +**Deploy a bundle:** ``` -Build a data quality dashboard for main.warehouse.inventory. Check for nulls, -duplicates, and out-of-range values. Show data freshness metrics. +Validate and deploy my Databricks bundle to the dev environment ``` ---- - -### Working with Existing Applications - -Once an app is scaffolded, you can continue development through conversation: - +**Run a job from a bundle:** ``` -Add a filter to show only transactions from the last 30 days +Run the data_processing job from my bundle ``` -``` -Update the chart to use a bar chart instead of line chart -``` +### Databricks Apps +**Initialize an app from template:** ``` -Add a new API endpoint to fetch customer details +Initialize a new Streamlit app using the Databricks bundle template ``` +**Deploy an app:** ``` -Run the tests and fix any failures +Deploy my app in the current directory to Databricks Apps as "sales-dashboard" ``` +**Manage apps:** ``` -Add error handling for failed database queries +List all my Databricks Apps and show me their status ``` ---- - -### Iterative Development Workflow +### Working with Jobs and Clusters -**Initial Request:** +**List and inspect jobs:** ``` -Create a simple dashboard for main.sales.orders +Show me all jobs in the workspace and their recent run status ``` -**Refinement:** +**Get cluster details:** ``` -Add a date range picker to filter orders +List all clusters and show me the configuration of the production cluster ``` -**Enhancement:** -``` -Include a summary card showing total orders and revenue -``` +### Complex Workflows -**Quality Check:** +**End-to-end data pipeline:** ``` -Validate the app and show me any test failures +1. Show me what tables are in the main.raw catalog +2. Create a new bundle for an ETL pipeline +3. Deploy it to the dev environment +4. Run the pipeline and show me the results ``` -**Production:** +**Multi-environment deployment:** ``` -Deploy the app to Databricks as "orders-dashboard" +Validate my bundle, then deploy it to dev, staging, and production environments ``` --- -## Why This Approach Works +## Benefits -### Traditional Development vs. Databricks MCP +### Natural Language Interface -| Traditional Approach | With Databricks MCP | -|---------------------|-------------| -| Manual project setup (hours) | Instant scaffolding (seconds) | -| Configure build tools manually | Pre-configured and tested | -| Set up testing infrastructure | Built-in test suite | -| Manual code changes and debugging | AI-powered development with validation | -| Local testing only | Containerized validation (reproducible) | -| Manual deployment setup | Automated deployment to Databricks | -| **Time to production: days/weeks** | **Time to production: minutes** | +Instead of memorizing complex CLI commands and flags, you can: +- Ask questions in plain English +- Get context-aware command suggestions +- Execute commands through conversation +- Receive explanations of results -### Key Advantages +### Workspace Awareness -**1. Scaffolding + Validation = Quality** -- Start with a working, tested template -- Every change is validated before deployment -- No broken builds reach production +The `explore` tool provides: +- Automatic workspace configuration detection +- SQL warehouse information +- Authentication profile details +- Relevant command examples based on your setup -**2. Natural Language = Productivity** -- Describe what you want, not how to build it -- AI handles implementation details -- Focus on requirements, not configuration +### Unified Workflow -**3. End-to-End Workflow = Simplicity** -- Single tool for entire lifecycle -- No context switching between tools -- Seamless progression from idea to deployment +Work with all Databricks functionality from one place: +- **Data exploration**: Query catalogs, schemas, and tables +- **Bundle management**: Create, validate, and deploy DABs +- **App deployment**: Deploy and manage Databricks Apps +- **Workspace operations**: Manage jobs, clusters, and notebooks -### What Makes It Production-Ready +### Safe Command Execution -The Databricks MCP server doesn't just generate code—it ensures quality: - -- ✅ **TypeScript** - Type safety catches errors early -- ✅ **Build verification** - Ensures code compiles -- ✅ **Test suite** - Validates functionality -- ✅ **Linting** - Enforces code quality -- ✅ **Databricks integration** - Native SDK usage +The `invoke_databricks_cli` tool: +- Allows users to allowlist specific commands +- Provides better tracking of executed operations +- Maintains audit trail of AI actions +- Prevents unauthorized operations --- @@ -290,18 +223,14 @@ databricks experimental apps-mcp --warehouse-id # Enable workspace tools databricks experimental apps-mcp --warehouse-id --with-workspace-tools - -# Enable deployment -databricks experimental apps-mcp --warehouse-id --allow-deployment ``` ### CLI Flags | Flag | Description | Default | |------|-------------|---------| -| `--warehouse-id` | Databricks SQL Warehouse ID (required) | - | +| `--warehouse-id` | Databricks SQL Warehouse ID (required for SQL queries) | - | | `--with-workspace-tools` | Enable workspace file operations | `false` | -| `--allow-deployment` | Enable deployment operations | `false` | | `--help` | Show help | - | ### Environment Variables @@ -312,7 +241,6 @@ databricks experimental apps-mcp --warehouse-id --allow-deploymen | `DATABRICKS_TOKEN` | Databricks personal access token | `dapi...` | | `WAREHOUSE_ID` | Databricks SQL warehouse ID (preferred) | `abc123def456` | | `DATABRICKS_WAREHOUSE_ID` | Alternative name for warehouse ID | `abc123def456` | -| `ALLOW_DEPLOYMENT` | Enable deployment operations | `true` or `false` | | `WITH_WORKSPACE_TOOLS` | Enable workspace tools | `true` or `false` | ### Authentication diff --git a/experimental/apps-mcp/cmd/apps_mcp.go b/experimental/apps-mcp/cmd/apps_mcp.go index 17a5f0ac3c..8e408aba01 100644 --- a/experimental/apps-mcp/cmd/apps_mcp.go +++ b/experimental/apps-mcp/cmd/apps_mcp.go @@ -9,7 +9,6 @@ import ( func NewMcpCmd() *cobra.Command { var warehouseID string - var allowDeployment bool var withWorkspaceTools bool cmd := &cobra.Command{ @@ -19,29 +18,22 @@ func NewMcpCmd() *cobra.Command { Long: `Start and manage an MCP server that provides AI agents with tools to interact with Databricks. The MCP server exposes the following capabilities: -- Databricks integration (query catalogs, schemas, tables, execute SQL) -- Project scaffolding (generate full-stack TypeScript applications) -- Sandboxed execution (isolated file/command execution) +- Data exploration (query catalogs, schemas, tables, execute SQL) +- CLI command execution (bundle, apps, workspace operations) +- Workspace resource discovery The server communicates via stdio using the Model Context Protocol.`, Example: ` # Start MCP server with required warehouse databricks experimental apps-mcp --warehouse-id abc123 # Start with workspace tools enabled - databricks experimental apps-mcp --warehouse-id abc123 --with-workspace-tools - - # Start with deployment tools enabled - databricks experimental apps-mcp --warehouse-id abc123 --allow-deployment`, + databricks experimental apps-mcp --warehouse-id abc123 --with-workspace-tools`, RunE: func(cmd *cobra.Command, args []string) error { ctx := cmd.Context() // Build MCP config from flags cfg := &mcplib.Config{ - AllowDeployment: allowDeployment, WithWorkspaceTools: withWorkspaceTools, - IoConfig: &mcplib.IoConfig{ - Validation: &mcplib.ValidationConfig{}, - }, } log.Infof(ctx, "Starting MCP server") @@ -62,7 +54,6 @@ The server communicates via stdio using the Model Context Protocol.`, // Define flags cmd.Flags().StringVar(&warehouseID, "warehouse-id", "", "Databricks SQL Warehouse ID") - cmd.Flags().BoolVar(&allowDeployment, "allow-deployment", false, "Enable deployment tools") cmd.Flags().BoolVar(&withWorkspaceTools, "with-workspace-tools", false, "Enable workspace tools (file operations, bash, grep, glob)") cmd.AddCommand(newInstallCmd()) diff --git a/experimental/apps-mcp/lib/common/common.go b/experimental/apps-mcp/lib/common/common.go new file mode 100644 index 0000000000..1b24ea5635 --- /dev/null +++ b/experimental/apps-mcp/lib/common/common.go @@ -0,0 +1,9 @@ +package common + +import "os" + +// GetCLIPath returns the path to the current CLI executable. +// This supports development testing with ./cli. +func GetCLIPath() string { + return os.Args[0] +} diff --git a/experimental/apps-mcp/lib/config.go b/experimental/apps-mcp/lib/config.go index 50c431c3d8..f816e1b48f 100644 --- a/experimental/apps-mcp/lib/config.go +++ b/experimental/apps-mcp/lib/config.go @@ -5,50 +5,12 @@ package mcp // Config holds MCP server configuration. // Configuration is populated from CLI flags and Databricks client context. type Config struct { - AllowDeployment bool WithWorkspaceTools bool - IoConfig *IoConfig -} - -// IoConfig configures the IO provider for project scaffolding and validation. -type IoConfig struct { - Template *TemplateConfig - Validation *ValidationConfig -} - -// TemplateConfig specifies which template to use for scaffolding new projects. -type TemplateConfig struct { - Name string - Path string -} - -// ValidationConfig defines custom validation commands for project validation. -type ValidationConfig struct { - Command string - Timeout int -} - -// SetDefaults applies default values to ValidationConfig if not explicitly set. -func (v *ValidationConfig) SetDefaults() { - if v.Timeout == 0 { - v.Timeout = 600 - } } // DefaultConfig returns a Config with sensible default values. func DefaultConfig() *Config { - validationCfg := &ValidationConfig{} - validationCfg.SetDefaults() - return &Config{ - AllowDeployment: false, WithWorkspaceTools: false, - IoConfig: &IoConfig{ - Template: &TemplateConfig{ - Name: "default", - Path: "", - }, - Validation: validationCfg, - }, } } diff --git a/experimental/apps-mcp/lib/prompts/apps.tmpl b/experimental/apps-mcp/lib/prompts/apps.tmpl new file mode 100644 index 0000000000..12d6c3ad6d --- /dev/null +++ b/experimental/apps-mcp/lib/prompts/apps.tmpl @@ -0,0 +1,33 @@ +{{- /* + * Guidance for Databricks Apps development. + * + * This guidance provides instructions for initializing and working with + * Databricks Apps using bundle templates. + * + */ -}} + +DATABRICKS APPS DEVELOPMENT +============================ + +Initialize a New App Bundle: + To create a new Databricks app using the Streamlit template: + invoke_databricks_cli 'bundle init https://github.com/databricks/bundle-examples --template-dir contrib/templates/streamlit-app --config-file /dev/stdin <' + +Deploying Apps: + If you have an app with app.yaml configuration: + invoke_databricks_cli 'bundle deploy' diff --git a/experimental/apps-mcp/lib/prompts/auth_error.tmpl b/experimental/apps-mcp/lib/prompts/auth_error.tmpl index 96f2be6162..e7972fec7e 100644 --- a/experimental/apps-mcp/lib/prompts/auth_error.tmpl +++ b/experimental/apps-mcp/lib/prompts/auth_error.tmpl @@ -8,8 +8,9 @@ Not authenticated to Databricks I need to know either the Databricks workspace URL or the Databricks profile name. You can list the available profiles by running `databricks auth profiles`. -ASK the user which of the configured profiles or databricks workspace URL they want to use. -Only then call the `databricks_configure_auth` tool to configure the authentication. +Please configure your authentication using one of these methods: +1. Set environment variables: DATABRICKS_HOST and DATABRICKS_TOKEN +2. Use Databricks CLI profiles: Set DATABRICKS_PROFILE or use the default profile +3. Provide authentication details in your MCP client configuration -Do not run anything else before authenticating successfully. Once authenticated, you can use this tool again diff --git a/experimental/apps-mcp/lib/prompts/explore.tmpl b/experimental/apps-mcp/lib/prompts/explore.tmpl new file mode 100644 index 0000000000..6bed77c18a --- /dev/null +++ b/experimental/apps-mcp/lib/prompts/explore.tmpl @@ -0,0 +1,150 @@ +{{- /* + * Guidance for exploring Databricks workspaces and resources. + * + * This guidance is offered by the explore tool to provide comprehensive + * instructions for discovering and querying workspace resources like + * jobs, clusters, catalogs, tables, and SQL warehouses. + * + */ -}} + +Databricks Data Exploration Guide +===================================== + +{{.WorkspaceInfo}}{{if .WarehouseName}} +Default SQL Warehouse: {{.WarehouseName}} ({{.WarehouseID}}){{else}} +Note: No SQL warehouse detected. SQL queries will require warehouse_id to be specified manually.{{end}}{{.ProfilesInfo}} + +IMPORTANT: Use the invoke_databricks_cli tool to run all commands below! + + +1. EXECUTING SQL QUERIES + Run queries with auto-wait (max 50s): + invoke_databricks_cli 'api post /api/2.0/sql/statements --json {"warehouse_id":"{{if .WarehouseID}}{{.WarehouseID}}{{else}}{{end}}","statement":"SELECT * FROM .. LIMIT 10","wait_timeout":"50s"}' + + Response has status.state: + - "SUCCEEDED" → Results in result.data_array (you're done!) + - "PENDING" → Warehouse starting or query slow. Poll with: + invoke_databricks_cli 'api get /api/2.0/sql/statements/' + Repeat every 5-10s until "SUCCEEDED" + + Note: First query on stopped warehouse takes 60-120s startup time + + +2. EXPLORING JOBS AND WORKFLOWS + List all jobs: + invoke_databricks_cli 'jobs list' + + Get job details: + invoke_databricks_cli 'jobs get ' + + List job runs: + invoke_databricks_cli 'jobs list-runs --job-id ' + + +3. EXPLORING CLUSTERS + List all clusters: + invoke_databricks_cli 'clusters list' + + Get cluster details: + invoke_databricks_cli 'clusters get ' + + +4. EXPLORING UNITY CATALOG DATA + Unity Catalog uses a three-level namespace: catalog.schema.table + + List all catalogs: + invoke_databricks_cli 'catalogs list' + + List schemas in a catalog: + invoke_databricks_cli 'schemas list ' + + List tables in a schema: + invoke_databricks_cli 'tables list ' + + Get table details (schema, columns, properties): + invoke_databricks_cli 'tables get ..
' + + +5. EXPLORING WORKSPACE FILES + List workspace files and notebooks: + invoke_databricks_cli 'workspace list ' + + Export a notebook: + invoke_databricks_cli 'workspace export ' + + +Getting Started: +- Use the commands above to explore what resources exist in the workspace +- All commands support --output json for programmatic access +- Remember to add --profile when working with non-default workspaces + + +DATABRICKS ASSET BUNDLES (DABs) WORKFLOW +========================================= + +Creating a New Bundle Project: + When to use: Building a new project from scratch with deployment to multiple environments + + 1. Initialize a new bundle (creates proper project structure): + invoke_databricks_cli 'bundle init' + + 2. Validate the bundle configuration: + invoke_databricks_cli 'bundle validate' + + 3. Deploy to a target environment (dev/staging/prod): + invoke_databricks_cli 'bundle deploy --target dev' + +Working with Existing Bundle Project: + When to use: databricks.yml file already exists in the directory + + 1. Validate changes: + invoke_databricks_cli 'bundle validate' + + 2. Deploy to environment: + invoke_databricks_cli 'bundle deploy --target ' + + 3. Run a resource (job/pipeline): + invoke_databricks_cli 'bundle run ' + + 4. Destroy deployed resources: + invoke_databricks_cli 'bundle destroy --target ' + +Bundle Commands Reference: + - bundle init # Initialize new bundle from template + - bundle validate # Validate bundle configuration + - bundle deploy # Deploy bundle to workspace + - bundle run # Run a job or pipeline + - bundle destroy # Remove deployed resources + - bundle schema # Show bundle configuration schema + +💡 Tip: Use 'invoke_databricks_cli bundle init' to see available templates + + +COMMON PATTERNS +=============== + +Multi-environment deployment: + Deploy to different environments using targets in databricks.yml: + invoke_databricks_cli 'bundle deploy --target dev' + invoke_databricks_cli 'bundle deploy --target prod' + +Working with pipelines/jobs in bundles: + Add resources to databricks.yml, then: + invoke_databricks_cli 'bundle validate' + invoke_databricks_cli 'bundle deploy' + invoke_databricks_cli 'bundle run ' + + +BEST PRACTICES +============== + +✅ DO use invoke_databricks_cli for all Databricks CLI commands + (Better for user allowlisting and tracking) + +✅ DO validate bundles before deploying: + invoke_databricks_cli 'bundle validate' + +✅ DO use bundle templates for new projects: + invoke_databricks_cli 'bundle init' + +✅ DO call explore during planning to get workspace context diff --git a/experimental/apps-mcp/lib/prompts/initialization_message.tmpl b/experimental/apps-mcp/lib/prompts/initialization_message.tmpl index f0990f9641..7529ad6a33 100644 --- a/experimental/apps-mcp/lib/prompts/initialization_message.tmpl +++ b/experimental/apps-mcp/lib/prompts/initialization_message.tmpl @@ -1,18 +1,15 @@ -Your session in Databricks MCP has been successfully initialized. Here are the guidelines to follow while working on projects using databricks_mcp tools: +Your session in Databricks MCP has been successfully initialized. -## Project State Management: -This project uses a state file (`.edda_state`) managed by edda MCP to enforce the correct workflow order: -1. **Scaffolded**: `scaffold_data_app` creates project structure from template (starts in this state) -2. **Validated**: `validate_data_app` runs build + tests, computes BLAKE3 checksum of package.json and all core source files -3. **Deployed**: `deploy_databricks_app` deploys to Databricks Apps, but ONLY if checksum hasn't changed since validation +## Available Tools: +- **explore**: Discover workspace resources, get CLI command examples, and workflow recommendations +- **invoke_databricks_cli**: Execute any Databricks CLI command (bundle, apps, workspace, etc.) +- **Databricks API tools**: Query catalogs, schemas, tables, and execute SQL queries -Re-validation is allowed (Deployed → Validated) to update the checksum after intentional changes. The databricks_mcp tools enforce these state transitions and prevent invalid state changes. - -## Workflow: -- Projects MUST end with validate_project to verify build + tests pass -- Bias towards backend code when the task allows implementation in multiple places -- Always add tests for what you're implementing, put them next to the code (e.g. src/*.test.ts) -- When working with Databricks or other services, use real API calls in tests (no mocks) to verify end-to-end functionality, unless explicitly instructed otherwise. It can be done on subset of data if applicable. +## Workflow Best Practices: +- Use `explore` at the start of tasks to understand workspace context and get relevant commands +- Use `invoke_databricks_cli` for all Databricks CLI operations (better for tracking and allowlisting) +- For Databricks Asset Bundles: Use `invoke_databricks_cli 'bundle validate'` before deploying +- For Databricks Apps: Use `invoke_databricks_cli 'apps deploy'` for deployment +- Always validate before deploying to ensure configuration is correct +- When not sure about the user's intent, ask clarifying questions before proceeding - Do NOT create summary files, reports, or README unless explicitly requested -- When not sure about the user's intent, ask clarifying questions before proceeding. For example, if user asks for "a data app to analyze sales data", ask for more details on data sources and analysis goals. Do not make assumptions regarding their needs and data sources. -- However, stick to the technical stack initialized by the `scaffold_data_app` as it has been approved by the management and battle-tested in production. diff --git a/experimental/apps-mcp/lib/providers/clitools/explore.go b/experimental/apps-mcp/lib/providers/clitools/explore.go new file mode 100644 index 0000000000..89ec5cd36b --- /dev/null +++ b/experimental/apps-mcp/lib/providers/clitools/explore.go @@ -0,0 +1,197 @@ +package clitools + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "strings" + + "github.com/databricks/cli/experimental/apps-mcp/lib/common" + "github.com/databricks/cli/experimental/apps-mcp/lib/prompts" + "github.com/databricks/cli/experimental/apps-mcp/lib/session" + "github.com/databricks/cli/libs/databrickscfg/profile" + "github.com/databricks/cli/libs/env" + "github.com/databricks/cli/libs/exec" + "github.com/databricks/cli/libs/log" +) + +type warehouse struct { + ID string `json:"id"` + Name string `json:"name"` + State string `json:"state"` +} + +// Explore provides guidance on exploring Databricks workspaces and resources. +func Explore(ctx context.Context) (string, error) { + warehouse, err := GetDefaultWarehouse(ctx) + if err != nil { + log.Debugf(ctx, "Failed to get default warehouse (non-fatal): %v", err) + warehouse = nil + } + + currentProfile := getCurrentProfile(ctx) + profiles := getAvailableProfiles(ctx) + + return generateExploreGuidance(ctx, warehouse, currentProfile, profiles), nil +} + +// GetDefaultWarehouse finds a suitable SQL warehouse for queries. +// It filters out warehouses the user cannot access and prefers RUNNING warehouses, +// then falls back to STOPPED ones (which auto-start). +func GetDefaultWarehouse(ctx context.Context) (*warehouse, error) { + executor, err := exec.NewCommandExecutor("") + if err != nil { + return nil, fmt.Errorf("failed to create command executor: %w", err) + } + + cliPath := common.GetCLIPath() + output, err := executor.Exec(ctx, fmt.Sprintf(`"%s" api get "/api/2.0/sql/warehouses?skip_cannot_use=true" --output json`, cliPath)) + if err != nil { + return nil, fmt.Errorf("failed to list warehouses: %w\nOutput: %s", err, output) + } + + var response struct { + Warehouses []warehouse `json:"warehouses"` + } + if err := json.Unmarshal(output, &response); err != nil { + return nil, fmt.Errorf("failed to parse warehouses: %w", err) + } + warehouses := response.Warehouses + + if len(warehouses) == 0 { + return nil, errors.New("no SQL warehouses found in workspace") + } + + // Prefer RUNNING warehouses + for i := range warehouses { + if strings.ToUpper(warehouses[i].State) == "RUNNING" { + return &warehouses[i], nil + } + } + + // Fall back to STOPPED warehouses (they auto-start when queried) + for i := range warehouses { + if strings.ToUpper(warehouses[i].State) == "STOPPED" { + return &warehouses[i], nil + } + } + + // Return first available warehouse regardless of state + return &warehouses[0], nil +} + +// getCurrentProfile returns the currently active profile name. +func getCurrentProfile(ctx context.Context) string { + // Check DATABRICKS_CONFIG_PROFILE env var + profileName := env.Get(ctx, "DATABRICKS_CONFIG_PROFILE") + if profileName == "" { + return "DEFAULT" + } + return profileName +} + +// getAvailableProfiles returns all available profiles from ~/.databrickscfg. +func getAvailableProfiles(ctx context.Context) profile.Profiles { + profiles, err := profile.DefaultProfiler.LoadProfiles(ctx, profile.MatchAllProfiles) + if err != nil { + // If we can't load profiles, return empty list (config file might not exist) + return profile.Profiles{} + } + return profiles +} + +// generateExploreGuidance creates comprehensive guidance for data exploration. +func generateExploreGuidance(ctx context.Context, warehouse *warehouse, currentProfile string, profiles profile.Profiles) string { + // Build workspace/profile information + workspaceInfo := "Current Workspace Profile: " + currentProfile + if len(profiles) > 0 { + // Find current profile details + var currentHost string + for _, p := range profiles { + if p.Name == currentProfile { + currentHost = p.Host + if cloud := p.Cloud(); cloud != "" { + currentHost = fmt.Sprintf("%s (%s)", currentHost, cloud) + } + break + } + } + if currentHost != "" { + workspaceInfo = fmt.Sprintf("Current Workspace Profile: %s - %s", currentProfile, currentHost) + } + } + + // Build available profiles list + profilesInfo := "" + if len(profiles) > 1 { + profilesInfo = "\n\nAvailable Workspace Profiles:\n" + for _, p := range profiles { + marker := "" + if p.Name == currentProfile { + marker = " (current)" + } + cloud := p.Cloud() + if cloud != "" { + profilesInfo += fmt.Sprintf(" - %s: %s (%s)%s\n", p.Name, p.Host, cloud, marker) + } else { + profilesInfo += fmt.Sprintf(" - %s: %s%s\n", p.Name, p.Host, marker) + } + } + profilesInfo += "\n To use a different workspace, add --profile to any command:\n" + profilesInfo += " invoke_databricks_cli '--profile prod catalogs list'\n" + } + + // Handle warehouse information (may be nil if lookup failed) + warehouseName := "" + warehouseID := "" + if warehouse != nil { + warehouseName = warehouse.Name + warehouseID = warehouse.ID + } + + // Prepare template data + data := map[string]string{ + "WorkspaceInfo": workspaceInfo, + "WarehouseName": warehouseName, + "WarehouseID": warehouseID, + "ProfilesInfo": profilesInfo, + } + + // Render base explore template + result := prompts.MustExecuteTemplate("explore.tmpl", data) + + // Get session and check for enabled capabilities + sess, err := session.GetSession(ctx) + if err != nil { + log.Debugf(ctx, "No session found, skipping capability-based instructions: %v", err) + return result + } + + capabilities, ok := sess.Get(session.CapabilitiesDataKey) + if !ok { + log.Debugf(ctx, "No capabilities set in session") + return result + } + + capList, ok := capabilities.([]string) + if !ok { + log.Warnf(ctx, "Capabilities is not a string slice, skipping") + return result + } + + // Inject additional templates based on enabled capabilities + for _, cap := range capList { + switch cap { + case "apps": + // Render and append apps template + appsContent := prompts.MustExecuteTemplate("apps.tmpl", data) + result = result + "\n\n" + appsContent + log.Debugf(ctx, "Injected apps instructions based on capability") + default: + log.Debugf(ctx, "Unknown capability: %s", cap) + } + } + + return result +} diff --git a/experimental/apps-mcp/lib/providers/clitools/invoke_databricks_cli.go b/experimental/apps-mcp/lib/providers/clitools/invoke_databricks_cli.go new file mode 100644 index 0000000000..150f295afa --- /dev/null +++ b/experimental/apps-mcp/lib/providers/clitools/invoke_databricks_cli.go @@ -0,0 +1,40 @@ +package clitools + +import ( + "context" + "errors" + "fmt" + + "github.com/databricks/cli/experimental/apps-mcp/lib/common" + "github.com/databricks/cli/libs/exec" +) + +// InvokeDatabricksCLI runs a Databricks CLI command and returns the output. +func InvokeDatabricksCLI(ctx context.Context, command string, workingDirectory *string) (string, error) { + if command == "" { + return "", errors.New("command is required") + } + + workDir := "." + if workingDirectory != nil && *workingDirectory != "" { + workDir = *workingDirectory + } + + executor, err := exec.NewCommandExecutor(workDir) + if err != nil { + return "", fmt.Errorf("failed to create command executor: %w", err) + } + + // GetCLIPath returns the path to the current CLI executable + cliPath := common.GetCLIPath() + fullCommand := fmt.Sprintf(`"%s" %s`, cliPath, command) + output, err := executor.Exec(ctx, fullCommand) + + result := string(output) + if err != nil { + result += fmt.Sprintf("\n\nCommand failed with error: %v", err) + return result, nil + } + + return result, nil +} diff --git a/experimental/apps-mcp/lib/providers/clitools/provider.go b/experimental/apps-mcp/lib/providers/clitools/provider.go new file mode 100644 index 0000000000..b119a97b3e --- /dev/null +++ b/experimental/apps-mcp/lib/providers/clitools/provider.go @@ -0,0 +1,85 @@ +package clitools + +import ( + "context" + + mcp "github.com/databricks/cli/experimental/apps-mcp/lib" + mcpsdk "github.com/databricks/cli/experimental/apps-mcp/lib/mcp" + "github.com/databricks/cli/experimental/apps-mcp/lib/providers" + "github.com/databricks/cli/experimental/apps-mcp/lib/session" + "github.com/databricks/cli/libs/log" +) + +func init() { + providers.Register("clitools", func(ctx context.Context, cfg *mcp.Config, sess *session.Session) (providers.Provider, error) { + return NewProvider(ctx, cfg, sess) + }, providers.ProviderConfig{ + Always: true, + }) +} + +// Provider represents the CLI provider that registers MCP tools for CLI operations +type Provider struct { + config *mcp.Config + session *session.Session + ctx context.Context +} + +// NewProvider creates a new CLI provider +func NewProvider(ctx context.Context, cfg *mcp.Config, sess *session.Session) (*Provider, error) { + return &Provider{ + config: cfg, + session: sess, + ctx: ctx, + }, nil +} + +// Name returns the name of the provider. +func (p *Provider) Name() string { + return "clitools" +} + +// RegisterTools registers all CLI tools with the MCP server +func (p *Provider) RegisterTools(server *mcpsdk.Server) error { + log.Info(p.ctx, "Registering CLI tools") + + // Register explore tool + mcpsdk.AddTool(server, + &mcpsdk.Tool{ + Name: "explore", + Description: "**REQUIRED DURING PLAN MODE** - Call this FIRST when planning ANY Databricks work. Use this to discover available workspaces, warehouses, and get workflow recommendations for your specific task. Even if you're just reading an assignment document, call this first. Especially important when task involves: creating Databricks projects/apps/pipelines/jobs, SQL pipelines or data transformation workflows, deploying code to multiple environments (dev/prod), or working with databricks.yml files. You DON'T need a workspace name - call this when starting ANY Databricks planning to understand workspace capabilities and recommended tooling before you create your plan.", + }, + func(ctx context.Context, req *mcpsdk.CallToolRequest, args struct{}) (*mcpsdk.CallToolResult, any, error) { + log.Debug(ctx, "explore called") + result, err := Explore(session.WithSession(ctx, p.session)) + if err != nil { + return nil, nil, err + } + return mcpsdk.CreateNewTextContentResult(result), nil, nil + }, + ) + + // Register invoke_databricks_cli tool + type InvokeDatabricksCLIInput struct { + Command string `json:"command" jsonschema:"required" jsonschema_description:"The full Databricks CLI command to run, e.g. 'bundle deploy' or 'bundle validate'. Do not include the 'databricks' prefix."` + WorkingDirectory *string `json:"working_directory,omitempty" jsonschema_description:"Optional. The directory to run the command in. Defaults to the current directory."` + } + + mcpsdk.AddTool(server, + &mcpsdk.Tool{ + Name: "invoke_databricks_cli", + Description: "Run any Databricks CLI command. Use this tool whenever you need to run databricks CLI commands like 'bundle deploy', 'bundle validate', 'bundle run', 'auth login', etc. The reason this tool exists (instead of invoking the databricks CLI directly) is to make it easier for users to allow-list commands.", + }, + func(ctx context.Context, req *mcpsdk.CallToolRequest, args InvokeDatabricksCLIInput) (*mcpsdk.CallToolResult, any, error) { + log.Debugf(ctx, "invoke_databricks_cli called: command=%s", args.Command) + result, err := InvokeDatabricksCLI(ctx, args.Command, args.WorkingDirectory) + if err != nil { + return nil, nil, err + } + return mcpsdk.CreateNewTextContentResult(result), nil, nil + }, + ) + + log.Infof(p.ctx, "Registered CLI tools: count=%d", 2) + return nil +} diff --git a/experimental/apps-mcp/lib/providers/doc.go b/experimental/apps-mcp/lib/providers/doc.go index ea4be0acbb..3652669d96 100644 --- a/experimental/apps-mcp/lib/providers/doc.go +++ b/experimental/apps-mcp/lib/providers/doc.go @@ -5,7 +5,7 @@ Each provider implements a set of related tools: - databricks: Databricks API integration - io: Project scaffolding and validation -- workspace: File and command operations +- clitools: CLI exploration and invocation - deployment: Application deployment (optional) Provider Interface: diff --git a/experimental/apps-mcp/lib/server/health.go b/experimental/apps-mcp/lib/server/health.go index 3af828c4d2..4dfe1eeac0 100644 --- a/experimental/apps-mcp/lib/server/health.go +++ b/experimental/apps-mcp/lib/server/health.go @@ -29,19 +29,14 @@ func (s *Server) CheckHealth(ctx context.Context) *HealthStatus { status.Providers["databricks"] = "healthy" } - // I/O provider doesn't need health checks (no external dependencies) - status.Providers["io"] = "healthy" + // Check CLI tools provider + status.Providers["clitools"] = "healthy" // Check workspace provider if enabled if s.config.WithWorkspaceTools { status.Providers["workspace"] = "healthy" } - // Check deployment provider if enabled - if s.config.AllowDeployment { - status.Providers["deployment"] = "healthy" - } - return status } diff --git a/experimental/apps-mcp/lib/server/server.go b/experimental/apps-mcp/lib/server/server.go index b013da3e19..19e72b4dae 100644 --- a/experimental/apps-mcp/lib/server/server.go +++ b/experimental/apps-mcp/lib/server/server.go @@ -7,13 +7,12 @@ import ( mcp "github.com/databricks/cli/experimental/apps-mcp/lib" mcpsdk "github.com/databricks/cli/experimental/apps-mcp/lib/mcp" "github.com/databricks/cli/experimental/apps-mcp/lib/middlewares" - "github.com/databricks/cli/experimental/apps-mcp/lib/providers/databricks" - "github.com/databricks/cli/experimental/apps-mcp/lib/providers/deployment" - "github.com/databricks/cli/experimental/apps-mcp/lib/providers/io" + "github.com/databricks/cli/experimental/apps-mcp/lib/providers/clitools" "github.com/databricks/cli/experimental/apps-mcp/lib/session" "github.com/databricks/cli/experimental/apps-mcp/lib/trajectory" "github.com/databricks/cli/internal/build" "github.com/databricks/cli/libs/log" + "github.com/databricks/databricks-sdk-go" ) // Server manages the MCP server lifecycle, provider registration, and session tracking. @@ -35,6 +34,9 @@ func NewServer(ctx context.Context, cfg *mcp.Config) *Server { server := mcpsdk.NewServer(impl, nil) sess := session.NewSession() + // Set enabled capabilities for this MCP server + sess.Set(session.CapabilitiesDataKey, []string{"apps"}) + tracker, err := trajectory.NewTracker(ctx, sess, cfg) if err != nil { log.Warnf(ctx, "failed to create trajectory tracker: %v", err) @@ -42,7 +44,7 @@ func NewServer(ctx context.Context, cfg *mcp.Config) *Server { } server.AddMiddleware(middlewares.NewToolCounterMiddleware(sess)) - server.AddMiddleware(middlewares.NewDatabricksClientMiddleware([]string{"databricks_configure_auth"})) + server.AddMiddleware(middlewares.NewDatabricksClientMiddleware(nil)) server.AddMiddleware(middlewares.NewEngineGuideMiddleware()) server.AddMiddleware(middlewares.NewTrajectoryMiddleware(tracker)) @@ -57,8 +59,7 @@ func NewServer(ctx context.Context, cfg *mcp.Config) *Server { } // RegisterTools registers all configured providers and their tools with the server. -// Databricks and IO providers are always registered, while workspace and deployment -// providers are conditional based on configuration flags. +// CLItools provider is always registered. func (s *Server) RegisterTools(ctx context.Context) error { log.Info(ctx, "Registering tools") @@ -72,75 +73,22 @@ func (s *Server) RegisterTools(ctx context.Context) error { // Don't fail - authentication will be attempted on first tool call via middleware } - // Always register databricks provider - if err := s.registerDatabricksProvider(ctx); err != nil { - return err - } - - // Always register io provider - if err := s.registerIOProvider(ctx); err != nil { - return err - } - - // Register deployment provider if enabled - if s.config.AllowDeployment { - log.Info(ctx, "Deployment provider enabled") - if err := s.registerDeploymentProvider(ctx); err != nil { - return err - } - } else { - log.Info(ctx, "Deployment provider disabled (enable with allow_deployment: true)") - } - - return nil -} - -// registerDatabricksProvider registers the Databricks provider -func (s *Server) registerDatabricksProvider(ctx context.Context) error { - log.Info(ctx, "Registering Databricks provider") - - // Add session to context - ctx = session.WithSession(ctx, s.session) - - provider, err := databricks.NewProvider(ctx, s.config, s.session) - if err != nil { - return err - } - - if err := provider.RegisterTools(s.server); err != nil { + // Always register clitools provider + if err := s.registerCLIToolsProvider(ctx); err != nil { return err } return nil } -// registerIOProvider registers the I/O provider -func (s *Server) registerIOProvider(ctx context.Context) error { - log.Info(ctx, "Registering I/O provider") +// registerCLIToolsProvider registers the CLI tools provider +func (s *Server) registerCLIToolsProvider(ctx context.Context) error { + log.Info(ctx, "Registering CLI tools provider") // Add session to context ctx = session.WithSession(ctx, s.session) - provider, err := io.NewProvider(ctx, s.config.IoConfig, s.session) - if err != nil { - return err - } - - if err := provider.RegisterTools(s.server); err != nil { - return err - } - - return nil -} - -// registerDeploymentProvider registers the deployment provider -func (s *Server) registerDeploymentProvider(ctx context.Context) error { - log.Info(ctx, "Registering deployment provider") - - // Add session to context - ctx = session.WithSession(ctx, s.session) - - provider, err := deployment.NewProvider(ctx, s.config, s.session) + provider, err := clitools.NewProvider(ctx, s.config, s.session) if err != nil { return err } @@ -188,17 +136,23 @@ func (s *Server) GetServer() *mcpsdk.Server { // This improves the user experience by making the first tool call faster. // If authentication fails, tools will still work via lazy authentication in the middleware. func (s *Server) initializeDatabricksAuth(ctx context.Context) error { - client, err := databricks.ConfigureAuth(ctx, s.session, nil, nil) + client, err := databricks.NewWorkspaceClient() if err != nil { return err } - // Get current user info for logging - if client != nil { - me, err := client.CurrentUser.Me(ctx) - if err == nil && me.UserName != "" { - log.Infof(ctx, "Authenticated with Databricks as: %s", me.UserName) - } + // Verify authentication by getting current user + me, err := client.CurrentUser.Me(ctx) + if err != nil { + return err + } + + // Store client in session for reuse + s.session.Set(middlewares.DatabricksClientKey, client) + + // Log authenticated user + if me.UserName != "" { + log.Infof(ctx, "Authenticated with Databricks as: %s", me.UserName) } return nil diff --git a/experimental/apps-mcp/lib/session/session.go b/experimental/apps-mcp/lib/session/session.go index 3ad1960bac..d7005ee485 100644 --- a/experimental/apps-mcp/lib/session/session.go +++ b/experimental/apps-mcp/lib/session/session.go @@ -20,10 +20,11 @@ const ( // Data keys for session storage const ( - WorkDirDataKey = "workDir" - StartTimeDataKey = "startTime" - ToolCallsDataKey = "toolCalls" - TrackerDataKey = "tracker" + WorkDirDataKey = "workDir" + StartTimeDataKey = "startTime" + ToolCallsDataKey = "toolCalls" + TrackerDataKey = "tracker" + CapabilitiesDataKey = "capabilities" ) // Session represents an MCP session with state tracking diff --git a/experimental/apps-mcp/lib/trajectory/tracker.go b/experimental/apps-mcp/lib/trajectory/tracker.go index 594bf8c5a0..2d5a7df560 100644 --- a/experimental/apps-mcp/lib/trajectory/tracker.go +++ b/experimental/apps-mcp/lib/trajectory/tracker.go @@ -49,20 +49,8 @@ func NewTracker(ctx context.Context, sess *session.Session, cfg *mcp.Config) (*T func (t *Tracker) writeSessionEntry(cfg *mcp.Config) error { configMap := make(map[string]any) - configMap["allow_deployment"] = cfg.AllowDeployment configMap["with_workspace_tools"] = cfg.WithWorkspaceTools - if cfg.IoConfig != nil { - ioConfigMap := make(map[string]any) - if cfg.IoConfig.Template != nil { - ioConfigMap["template"] = fmt.Sprintf("%v", cfg.IoConfig.Template) - } - if cfg.IoConfig.Validation != nil { - ioConfigMap["validation"] = "***" - } - configMap["io_config"] = ioConfigMap - } - entry := NewSessionEntry(t.sessionID, configMap) return t.writer.WriteEntry(entry) } From 952032c00c5db735ae4a823d20c86ccff0379f5b Mon Sep 17 00:00:00 2001 From: Igor Rekun Date: Tue, 25 Nov 2025 19:55:10 +0000 Subject: [PATCH 02/18] Drop unused providers. --- .../lib/providers/deployment/provider.go | 334 ----------------- .../apps-mcp/lib/providers/io/format.go | 37 -- .../apps-mcp/lib/providers/io/provider.go | 112 ------ .../apps-mcp/lib/providers/io/scaffold.go | 125 ------- .../apps-mcp/lib/providers/io/state.go | 349 ------------------ .../apps-mcp/lib/providers/io/validate.go | 87 ----- .../apps-mcp/lib/providers/io/validation.go | 217 ----------- 7 files changed, 1261 deletions(-) delete mode 100644 experimental/apps-mcp/lib/providers/deployment/provider.go delete mode 100644 experimental/apps-mcp/lib/providers/io/format.go delete mode 100644 experimental/apps-mcp/lib/providers/io/provider.go delete mode 100644 experimental/apps-mcp/lib/providers/io/scaffold.go delete mode 100644 experimental/apps-mcp/lib/providers/io/state.go delete mode 100644 experimental/apps-mcp/lib/providers/io/validate.go delete mode 100644 experimental/apps-mcp/lib/providers/io/validation.go diff --git a/experimental/apps-mcp/lib/providers/deployment/provider.go b/experimental/apps-mcp/lib/providers/deployment/provider.go deleted file mode 100644 index c232392e03..0000000000 --- a/experimental/apps-mcp/lib/providers/deployment/provider.go +++ /dev/null @@ -1,334 +0,0 @@ -package deployment - -import ( - "context" - "fmt" - "os" - "os/exec" - "path/filepath" - "time" - - mcp "github.com/databricks/cli/experimental/apps-mcp/lib" - mcpsdk "github.com/databricks/cli/experimental/apps-mcp/lib/mcp" - "github.com/databricks/cli/experimental/apps-mcp/lib/providers" - "github.com/databricks/cli/experimental/apps-mcp/lib/providers/databricks" - "github.com/databricks/cli/experimental/apps-mcp/lib/providers/io" - "github.com/databricks/cli/experimental/apps-mcp/lib/session" - "github.com/databricks/cli/libs/log" - "github.com/databricks/databricks-sdk-go/service/apps" -) - -func init() { - // Register deployment provider with conditional enablement based on AllowDeployment - providers.Register("deployment", func(ctx context.Context, cfg *mcp.Config, sess *session.Session) (providers.Provider, error) { - return NewProvider(ctx, cfg, sess) - }, providers.ProviderConfig{ - EnabledWhen: func(cfg *mcp.Config) bool { - return cfg.AllowDeployment - }, - }) -} - -const deployRetries = 3 - -// Provider implements Databricks app deployment functionality. -type Provider struct { - config *mcp.Config - session *session.Session - ctx context.Context -} - -// DeployDatabricksAppInput contains parameters for deploying a Databricks app. -type DeployDatabricksAppInput struct { - WorkDir string `json:"work_dir" jsonschema:"required" jsonschema_description:"Absolute path to the work directory containing the app to deploy"` - Name string `json:"name" jsonschema:"required" jsonschema_description:"Name of the Databricks app (alphanumeric and dash characters only)"` - Description string `json:"description" jsonschema:"required" jsonschema_description:"Description of the Databricks app"` - Force bool `json:"force,omitempty" jsonschema_description:"Force re-deployment if the app already exists"` -} - -func NewProvider(ctx context.Context, cfg *mcp.Config, sess *session.Session) (*Provider, error) { - return &Provider{ - config: cfg, - session: sess, - ctx: ctx, - }, nil -} - -// Name returns the name of the provider. -func (p *Provider) Name() string { - return "deployment" -} - -func (p *Provider) RegisterTools(server *mcpsdk.Server) error { - log.Info(p.ctx, "Registering deployment tools") - - mcpsdk.AddTool(server, - &mcpsdk.Tool{ - Name: "deploy_databricks_app", - Description: "Deploy a generated app to Databricks Apps. Creates the app if it doesn't exist, syncs local files to workspace, and deploys the app. Returns deployment status and app URL. Only use after direct user request and running validation.", - }, - func(ctx context.Context, req *mcpsdk.CallToolRequest, args DeployDatabricksAppInput) (*mcpsdk.CallToolResult, any, error) { - log.Debugf(ctx, "deploy_databricks_app called: work_dir=%s, name=%s, force=%v", - args.WorkDir, args.Name, args.Force) - - if !filepath.IsAbs(args.WorkDir) { - return nil, nil, fmt.Errorf("work_dir must be an absolute path, got: '%s'. Relative paths are not supported", args.WorkDir) - } - - result, err := p.deployDatabricksApp(ctx, &args) - if err != nil { - return nil, nil, err - } - - if !result.Success { - return nil, nil, fmt.Errorf("%s", result.Message) - } - - text := formatDeployResult(result) - return mcpsdk.CreateNewTextContentResult(text), nil, nil - }, - ) - - return nil -} - -// DeployResult contains the outcome of a Databricks app deployment. -type DeployResult struct { - Success bool - Message string - AppURL string - AppName string -} - -func (p *Provider) deployDatabricksApp(ctx context.Context, args *DeployDatabricksAppInput) (*DeployResult, error) { - startTime := time.Now() - - workPath := args.WorkDir - if _, err := os.Stat(workPath); os.IsNotExist(err) { - return &DeployResult{ - Success: false, - Message: "Work directory does not exist: " + workPath, - AppName: args.Name, - }, nil - } - - fileInfo, err := os.Stat(workPath) - if err != nil { - return &DeployResult{ - Success: false, - Message: fmt.Sprintf("Failed to stat work directory: %v", err), - AppName: args.Name, - }, nil - } - - if !fileInfo.IsDir() { - return &DeployResult{ - Success: false, - Message: "Work path is not a directory: " + workPath, - AppName: args.Name, - }, nil - } - - projectState, err := io.LoadState(ctx, workPath) - if err != nil { - return &DeployResult{ - Success: false, - Message: fmt.Sprintf("Failed to load project state: %v", err), - AppName: args.Name, - }, nil - } - - if projectState == nil { - return &DeployResult{ - Success: false, - Message: "Project must be scaffolded before deployment", - AppName: args.Name, - }, nil - } - - expectedChecksum, hasChecksum := projectState.Checksum() - if !hasChecksum { - return &DeployResult{ - Success: false, - Message: "Project must be validated before deployment. Run validate_data_app first.", - AppName: args.Name, - }, nil - } - - checksumValid, err := io.VerifyChecksum(ctx, workPath, expectedChecksum) - if err != nil { - return &DeployResult{ - Success: false, - Message: fmt.Sprintf("Failed to verify project checksum: %v", err), - AppName: args.Name, - }, nil - } - - if !checksumValid { - return &DeployResult{ - Success: false, - Message: "Project files changed since validation. Re-run validate_data_app before deployment.", - AppName: args.Name, - }, nil - } - - log.Infof(ctx, "Installing dependencies: work_dir=%s", workPath) - if err := p.runCommand(workPath, "npm", "install"); err != nil { - return &DeployResult{ - Success: false, - Message: fmt.Sprintf("Failed to install dependencies: %v", err), - AppName: args.Name, - }, nil - } - - log.Infof(ctx, "Building frontend: work_dir=%s", workPath) - if err := p.runCommand(workPath, "npm", "run", "build"); err != nil { - return &DeployResult{ - Success: false, - Message: fmt.Sprintf("Failed to build frontend: %v", err), - AppName: args.Name, - }, nil - } - - appInfo, err := p.getOrCreateApp(ctx, args.Name, args.Description, args.Force) - if err != nil { - return &DeployResult{ - Success: false, - Message: fmt.Sprintf("Failed to get or create app: %v", err), - AppName: args.Name, - }, nil - } - - serverDir := filepath.Join(workPath, "server") - syncStart := time.Now() - log.Infof(ctx, "Syncing workspace: source=%s, target=%s", serverDir, databricks.GetSourcePath(appInfo)) - - if err := databricks.SyncWorkspace(appInfo, serverDir); err != nil { - return &DeployResult{ - Success: false, - Message: fmt.Sprintf("Failed to sync workspace: %v", err), - AppName: args.Name, - }, nil - } - - log.Infof(ctx, "Workspace sync completed: duration_seconds=%.2f", time.Since(syncStart).Seconds()) - - deployStart := time.Now() - log.Infof(ctx, "Deploying app: name=%s", args.Name) - - var deployErr error - for attempt := 1; attempt <= deployRetries; attempt++ { - deployErr = databricks.DeployApp(ctx, p.config, appInfo) - if deployErr == nil { - break - } - - if attempt < deployRetries { - log.Warnf(ctx, "Deploy attempt failed, retrying: attempt=%d, error=%s", - attempt, deployErr.Error()) - } - } - - if deployErr != nil { - return &DeployResult{ - Success: false, - Message: fmt.Sprintf("Failed to deploy app after %d attempts: %v", deployRetries, deployErr), - AppName: args.Name, - }, nil - } - - log.Infof(ctx, "App deployment completed: duration_seconds=%.2f", time.Since(deployStart).Seconds()) - - deployedState, err := projectState.Deploy() - if err != nil { - return &DeployResult{ - Success: false, - Message: fmt.Sprintf("Failed to transition state: %v", err), - AppName: args.Name, - }, nil - } - - if err := io.SaveState(ctx, workPath, deployedState); err != nil { - log.Warnf(ctx, "Failed to save deployed state: error=%v", err) - } - - totalDuration := time.Since(startTime) - log.Infof(ctx, "Full deployment completed: duration_seconds=%.2f, app_url=%s", - totalDuration.Seconds(), appInfo.Url) - - return &DeployResult{ - Success: true, - Message: "Deployment completed successfully", - AppURL: appInfo.Url, - AppName: args.Name, - }, nil -} - -func (p *Provider) getOrCreateApp(ctx context.Context, name, description string, force bool) (*apps.App, error) { - appInfo, err := databricks.GetAppInfo(ctx, p.config, name) - if err == nil { - log.Infof(ctx, "Found existing app: name=%s", name) - - if !force { - userInfo, err := databricks.GetUserInfo(ctx, p.config) - if err != nil { - return nil, fmt.Errorf("failed to get user info: %w", err) - } - - if appInfo.Creator != userInfo.UserName { - return nil, fmt.Errorf( - "app '%s' already exists and was created by another user: %s. Use 'force' option to override", - name, - appInfo.Creator, - ) - } - } - - return appInfo, nil - } - - log.Infof(ctx, "App not found, creating new app: name=%s", name) - - resources, err := databricks.ResourcesFromEnv(ctx, p.config) - if err != nil { - return nil, err - } - - createApp := &apps.CreateAppRequest{ - App: apps.App{ - Name: name, - Description: description, - Resources: []apps.AppResource{*resources}, - }, - } - - return databricks.CreateApp(ctx, p.config, createApp) -} - -func (p *Provider) runCommand(dir, name string, args ...string) error { - cmd := exec.Command(name, args...) - cmd.Dir = dir - - output, err := cmd.CombinedOutput() - if err != nil { - return fmt.Errorf("%s failed: %w (output: %s)", name, err, string(output)) - } - - return nil -} - -func formatDeployResult(result *DeployResult) string { - if result.Success { - return fmt.Sprintf( - "Successfully deployed app '%s'\nURL: %s\n%s", - result.AppName, - result.AppURL, - result.Message, - ) - } - return fmt.Sprintf( - "Deployment failed for app '%s': %s", - result.AppName, - result.Message, - ) -} diff --git a/experimental/apps-mcp/lib/providers/io/format.go b/experimental/apps-mcp/lib/providers/io/format.go deleted file mode 100644 index cc9a1c1c45..0000000000 --- a/experimental/apps-mcp/lib/providers/io/format.go +++ /dev/null @@ -1,37 +0,0 @@ -package io - -import "fmt" - -// formatScaffoldResult formats a ScaffoldResult for display -func formatScaffoldResult(result *ScaffoldResult) string { - return fmt.Sprintf( - "Successfully scaffolded %s template to %s\n\n"+ - "Files copied: %d\n\n"+ - "Template: %s\n\n"+ - "%s", - result.TemplateName, - result.WorkDir, - result.FilesCopied, - result.TemplateName, - result.TemplateDescription, - ) -} - -// formatValidateResult formats a ValidateResult for display -func formatValidateResult(result *ValidateResult) string { - if result.Success { - return "✓ " + result.Message - } - - if result.Details == nil { - return "✗ " + result.Message - } - - return fmt.Sprintf( - "✗ %s\n\nExit code: %d\n\nStdout:\n%s\n\nStderr:\n%s", - result.Message, - result.Details.ExitCode, - result.Details.Stdout, - result.Details.Stderr, - ) -} diff --git a/experimental/apps-mcp/lib/providers/io/provider.go b/experimental/apps-mcp/lib/providers/io/provider.go deleted file mode 100644 index b8a8eea776..0000000000 --- a/experimental/apps-mcp/lib/providers/io/provider.go +++ /dev/null @@ -1,112 +0,0 @@ -package io - -import ( - "context" - - mcp "github.com/databricks/cli/experimental/apps-mcp/lib" - mcpsdk "github.com/databricks/cli/experimental/apps-mcp/lib/mcp" - "github.com/databricks/cli/experimental/apps-mcp/lib/providers" - "github.com/databricks/cli/experimental/apps-mcp/lib/session" - "github.com/databricks/cli/experimental/apps-mcp/lib/templates" - "github.com/databricks/cli/libs/log" -) - -func init() { - providers.Register("io", func(ctx context.Context, cfg *mcp.Config, sess *session.Session) (providers.Provider, error) { - return NewProvider(ctx, cfg.IoConfig, sess) - }, providers.ProviderConfig{ - Always: true, - }) -} - -// Provider implements the I/O provider for scaffolding and validation -type Provider struct { - config *mcp.IoConfig - session *session.Session - ctx context.Context - defaultTemplate templates.Template -} - -// NewProvider creates a new I/O provider -func NewProvider(ctx context.Context, cfg *mcp.IoConfig, sess *session.Session) (*Provider, error) { - return &Provider{ - config: cfg, - session: sess, - ctx: ctx, - defaultTemplate: templates.GetTRPCTemplate(), - }, nil -} - -// Name returns the name of the provider. -func (p *Provider) Name() string { - return "io" -} - -// RegisterTools registers all I/O tools with the MCP server -func (p *Provider) RegisterTools(server *mcpsdk.Server) error { - log.Info(p.ctx, "Registering I/O tools") - - // Register scaffold_data_app - type ScaffoldInput struct { - WorkDir string `json:"work_dir" jsonschema:"required" jsonschema_description:"Absolute path to the work directory"` - } - - mcpsdk.AddTool(server, - &mcpsdk.Tool{ - Name: "scaffold_data_app", - Description: "Initialize a project by copying template files from the default TypeScript (tRPC + React) template to a work directory. It sets up a basic project structure, and should be ALWAYS used as the first step in creating a new data or web app.", - }, - func(ctx context.Context, req *mcpsdk.CallToolRequest, args ScaffoldInput) (*mcpsdk.CallToolResult, any, error) { - log.Debugf(ctx, "scaffold_data_app called: work_dir=%s", args.WorkDir) - - scaffoldArgs := &ScaffoldArgs{ - WorkDir: args.WorkDir, - } - - result, err := p.Scaffold(ctx, scaffoldArgs) - if err != nil { - return nil, nil, err - } - - // Set work directory in session for workspace tools - if err := session.SetWorkDir(ctx, result.WorkDir); err != nil { - log.Warnf(ctx, "Failed to set work directory in session: error=%v", err) - } else { - log.Infof(ctx, "Work directory set in session: work_dir=%s", result.WorkDir) - } - - text := formatScaffoldResult(result) - return mcpsdk.CreateNewTextContentResult(text), nil, nil - }, - ) - - // Register validate_data_app - type ValidateInput struct { - WorkDir string `json:"work_dir" jsonschema:"required" jsonschema_description:"Absolute path to the work directory"` - } - - mcpsdk.AddTool(server, - &mcpsdk.Tool{ - Name: "validate_data_app", - Description: "Validate a project by running validation checks. Project should be scaffolded first. Returns validation result with success status and details.", - }, - func(ctx context.Context, req *mcpsdk.CallToolRequest, args ValidateInput) (*mcpsdk.CallToolResult, any, error) { - log.Debugf(ctx, "validate_data_app called: work_dir=%s", args.WorkDir) - - validateArgs := &ValidateArgs{ - WorkDir: args.WorkDir, - } - - result, err := p.Validate(ctx, validateArgs) - if err != nil { - return nil, nil, err - } - - text := formatValidateResult(result) - return mcpsdk.CreateNewTextContentResult(text), nil, nil - }, - ) - - log.Infof(p.ctx, "Registered I/O tools: count=%d", 2) - return nil -} diff --git a/experimental/apps-mcp/lib/providers/io/scaffold.go b/experimental/apps-mcp/lib/providers/io/scaffold.go deleted file mode 100644 index a7273feaab..0000000000 --- a/experimental/apps-mcp/lib/providers/io/scaffold.go +++ /dev/null @@ -1,125 +0,0 @@ -package io - -import ( - "bytes" - "context" - "errors" - "fmt" - "io/fs" - "os" - "path/filepath" - - "github.com/databricks/cli/experimental/apps-mcp/lib/middlewares" - "github.com/databricks/cli/experimental/apps-mcp/lib/templates" - "github.com/databricks/cli/libs/filer" - "github.com/databricks/cli/libs/log" -) - -// ScaffoldArgs contains arguments for scaffolding operation -type ScaffoldArgs struct { - WorkDir string `json:"work_dir"` -} - -// ScaffoldResult contains the result of a scaffold operation -type ScaffoldResult struct { - FilesCopied int `json:"files_copied"` - WorkDir string `json:"work_dir"` - TemplateName string `json:"template_name"` - TemplateDescription string `json:"template_description"` -} - -// Scaffold copies template files to the work directory -func (p *Provider) Scaffold(ctx context.Context, args *ScaffoldArgs) (*ScaffoldResult, error) { - // Validate work directory - workDir, err := filepath.Abs(args.WorkDir) - if err != nil { - return nil, fmt.Errorf("invalid work directory: %w", err) - } - - f, err := filer.NewLocalClient(workDir) - if err != nil { - return nil, fmt.Errorf("failed to create filer: %w", err) - } - - // Check if directory exists - if stat, err := f.Stat(ctx, "."); err == nil { - if !stat.IsDir() { - return nil, errors.New("work_dir exists but is not a directory") - } - - // Check if empty - entries, err := f.ReadDir(ctx, ".") - if err != nil { - return nil, err - } - - allowedEntries := map[string]bool{ - ".git": true, - ".claude": true, - } - - for _, entry := range entries { - if !allowedEntries[entry.Name()] { - return nil, fmt.Errorf("work_dir is not empty: %s", entry.Name()) - } - } - } else if !errors.Is(err, fs.ErrNotExist) { - // Some other error - // filer.FileDoesNotExistError implements Is(fs.ErrNotExist) - return nil, fmt.Errorf("failed to check work directory: %w", err) - } - - // Create directory - if err := f.Mkdir(ctx, "."); err != nil { - return nil, fmt.Errorf("failed to create directory: %w", err) - } - - // Get template - template := p.getTemplate() - files, err := template.Files() - if err != nil { - return nil, fmt.Errorf("failed to read template: %w", err) - } - - // Copy files - filesCopied := 0 - for path, content := range files { - // filer.Write handles creating parent directories if requested - if err := f.Write(ctx, path, bytes.NewReader([]byte(content)), filer.CreateParentDirectories); err != nil { - return nil, fmt.Errorf("failed to write %s: %w", path, err) - } - - filesCopied++ - } - - // write .env file - warehouseID, err := middlewares.GetWarehouseID(ctx) - if err != nil { - return nil, fmt.Errorf("failed to get warehouse ID: %w", err) - } - host := middlewares.MustGetDatabricksClient(ctx).Config.Host - - envContent := fmt.Sprintf("DATABRICKS_WAREHOUSE_ID=%s\nDATABRICKS_HOST=%s", warehouseID, host) - envPath := filepath.Join(workDir, ".env") - if err := os.WriteFile(envPath, []byte(envContent), 0o644); err != nil { - return nil, fmt.Errorf("failed to write .env file: %w", err) - } - - log.Infof(ctx, "scaffolded project (template=%s, work_dir=%s, files=%d)", - template.Name(), workDir, filesCopied) - - return &ScaffoldResult{ - FilesCopied: filesCopied, - WorkDir: workDir, - TemplateName: template.Name(), - TemplateDescription: template.Description(), - }, nil -} - -func (p *Provider) getTemplate() templates.Template { - // TODO: Support custom templates by checking p.config.Template.Path - // and loading from filesystem. Not yet implemented. - - // Default to TRPC template - return p.defaultTemplate -} diff --git a/experimental/apps-mcp/lib/providers/io/state.go b/experimental/apps-mcp/lib/providers/io/state.go deleted file mode 100644 index f8a72e985c..0000000000 --- a/experimental/apps-mcp/lib/providers/io/state.go +++ /dev/null @@ -1,349 +0,0 @@ -package io - -import ( - "bytes" - "context" - "crypto/sha256" - "encoding/hex" - "encoding/json" - "errors" - "fmt" - "io" - "io/fs" - "path" - "path/filepath" - "sort" - "time" - - "github.com/databricks/cli/libs/filer" -) - -const StateFileName = ".edda_state" - -// StateType represents the lifecycle state of a scaffolded project. -type StateType string - -const ( - StateScaffolded StateType = "Scaffolded" - StateValidated StateType = "Validated" - StateDeployed StateType = "Deployed" -) - -// String returns the string representation of the state. -func (s StateType) String() string { - return string(s) -} - -// IsValid checks if the state type is a valid value. -func (s StateType) IsValid() bool { - switch s { - case StateScaffolded, StateValidated, StateDeployed: - return true - default: - return false - } -} - -// ValidatedData contains metadata for a validated project state. -type ValidatedData struct { - ValidatedAt time.Time `json:"validated_at"` - Checksum string `json:"checksum"` -} - -// DeployedData contains metadata for a deployed project state. -type DeployedData struct { - ValidatedAt time.Time `json:"validated_at"` - Checksum string `json:"checksum"` - DeployedAt time.Time `json:"deployed_at"` -} - -// ProjectState tracks the current state and metadata of a scaffolded project. -type ProjectState struct { - State StateType `json:"state"` - Data any `json:"data,omitempty"` -} - -func NewProjectState() *ProjectState { - return &ProjectState{ - State: StateScaffolded, - } -} - -func (ps *ProjectState) Validate(checksum string) *ProjectState { - return &ProjectState{ - State: StateValidated, - Data: ValidatedData{ - ValidatedAt: time.Now().UTC(), - Checksum: checksum, - }, - } -} - -func (ps *ProjectState) extractValidatedData() (*ValidatedData, error) { - if data, ok := ps.Data.(ValidatedData); ok { - return &data, nil - } - - dataMap, ok := ps.Data.(map[string]any) - if !ok { - return nil, errors.New("invalid validated state data") - } - - validatedAtStr, ok := dataMap["validated_at"].(string) - if !ok { - return nil, errors.New("missing validated_at in state data") - } - - validatedAt, err := time.Parse(time.RFC3339, validatedAtStr) - if err != nil { - return nil, fmt.Errorf("invalid validated_at format: %w", err) - } - - checksum, ok := dataMap["checksum"].(string) - if !ok { - return nil, errors.New("missing checksum in state data") - } - - return &ValidatedData{ - ValidatedAt: validatedAt, - Checksum: checksum, - }, nil -} - -func (ps *ProjectState) extractChecksumFromMap() (string, bool) { - dataMap, ok := ps.Data.(map[string]any) - if !ok { - return "", false - } - checksum, ok := dataMap["checksum"].(string) - return checksum, ok -} - -func (ps *ProjectState) Deploy() (*ProjectState, error) { - if !ps.CanTransitionTo(StateDeployed) { - if ps.State == StateScaffolded { - return nil, errors.New("cannot deploy: project not validated") - } - if ps.State == StateDeployed { - return nil, errors.New("cannot deploy: project already deployed (re-validate first)") - } - return nil, fmt.Errorf("invalid state transition: %s -> Deployed", ps.State) - } - - data, err := ps.extractValidatedData() - if err != nil { - return nil, err - } - - return &ProjectState{ - State: StateDeployed, - Data: DeployedData{ - ValidatedAt: data.ValidatedAt, - Checksum: data.Checksum, - DeployedAt: time.Now().UTC(), - }, - }, nil -} - -func (ps *ProjectState) Checksum() (string, bool) { - switch ps.State { - case StateValidated: - if data, ok := ps.Data.(ValidatedData); ok { - return data.Checksum, true - } - return ps.extractChecksumFromMap() - case StateDeployed: - if data, ok := ps.Data.(DeployedData); ok { - return data.Checksum, true - } - return ps.extractChecksumFromMap() - case StateScaffolded: - return "", false - } - return "", false -} - -func (ps *ProjectState) IsValidated() bool { - return ps.State == StateValidated || ps.State == StateDeployed -} - -// CanTransitionTo checks if a state transition is valid according to the state machine rules. -// Valid transitions: -// - Scaffolded -> Validated -// - Validated -> Deployed (or re-validate to Validated) -// - Deployed -> Validated (re-validation allowed before re-deployment) -func (ps *ProjectState) CanTransitionTo(next StateType) bool { - switch ps.State { - case StateScaffolded: - return next == StateValidated - case StateValidated: - return next == StateDeployed || next == StateValidated - case StateDeployed: - return next == StateValidated - default: - return false - } -} - -// TransitionTo attempts to transition to a new state, returning an error if invalid. -func (ps *ProjectState) TransitionTo(next StateType) error { - if !next.IsValid() { - return fmt.Errorf("invalid target state: %s", next) - } - if !ps.CanTransitionTo(next) { - return fmt.Errorf("invalid state transition: %s -> %s", ps.State, next) - } - ps.State = next - return nil -} - -func LoadState(ctx context.Context, workDir string) (*ProjectState, error) { - f, err := filer.NewLocalClient(workDir) - if err != nil { - return nil, fmt.Errorf("failed to create filer: %w", err) - } - - r, err := f.Read(ctx, StateFileName) - if err != nil { - if errors.Is(err, fs.ErrNotExist) || errors.As(err, &filer.FileDoesNotExistError{}) { - return nil, nil - } - return nil, fmt.Errorf("failed to read state file: %w", err) - } - defer r.Close() - - content, err := io.ReadAll(r) - if err != nil { - return nil, fmt.Errorf("failed to read state file content: %w", err) - } - - var state ProjectState - if err := json.Unmarshal(content, &state); err != nil { - return nil, fmt.Errorf("failed to parse state file: %w", err) - } - - return &state, nil -} - -func SaveState(ctx context.Context, workDir string, state *ProjectState) error { - f, err := filer.NewLocalClient(workDir) - if err != nil { - return fmt.Errorf("failed to create filer: %w", err) - } - - content, err := json.MarshalIndent(state, "", " ") - if err != nil { - return fmt.Errorf("failed to serialize state: %w", err) - } - - if err := f.Write(ctx, StateFileName, bytes.NewReader(content), filer.OverwriteIfExists); err != nil { - return fmt.Errorf("failed to write state file: %w", err) - } - - return nil -} - -func ComputeChecksum(ctx context.Context, workDir string) (string, error) { - f, err := filer.NewLocalClient(workDir) - if err != nil { - return "", fmt.Errorf("failed to create filer: %w", err) - } - - var filesToHash []string - - for _, dir := range []string{"client", "server"} { - // Check if directory exists - info, err := f.Stat(ctx, dir) - if err == nil && info.IsDir() { - if err := collectSourceFiles(ctx, f, dir, &filesToHash); err != nil { - return "", err - } - } - } - - packageJSON := "package.json" - if _, err := f.Stat(ctx, packageJSON); err == nil { - filesToHash = append(filesToHash, packageJSON) - } - - sort.Strings(filesToHash) - - if len(filesToHash) == 0 { - return "", errors.New("no files to hash - project structure appears invalid") - } - - hasher := sha256.New() - - for _, filePath := range filesToHash { - r, err := f.Read(ctx, filePath) - if err != nil { - return "", fmt.Errorf("failed to read %s: %w", filePath, err) - } - - if _, err := io.Copy(hasher, r); err != nil { - r.Close() - return "", fmt.Errorf("failed to hash %s: %w", filePath, err) - } - r.Close() - } - - return hex.EncodeToString(hasher.Sum(nil)), nil -} - -func VerifyChecksum(ctx context.Context, workDir, expected string) (bool, error) { - current, err := ComputeChecksum(ctx, workDir) - if err != nil { - return false, err - } - return current == expected, nil -} - -func collectSourceFiles(ctx context.Context, f filer.Filer, dir string, files *[]string) error { - entries, err := f.ReadDir(ctx, dir) - if err != nil { - return fmt.Errorf("failed to read directory %s: %w", dir, err) - } - - excludedDirs := map[string]bool{ - "node_modules": true, - "dist": true, - ".git": true, - "build": true, - "coverage": true, - } - - validExtensions := map[string]bool{ - ".ts": true, - ".tsx": true, - ".js": true, - ".jsx": true, - ".json": true, - ".css": true, - ".html": true, - ".yaml": true, - ".yml": true, - } - - for _, entry := range entries { - // Use path.Join for forward slashes in relative paths (filer compatible) - // filepath.Join might use backslashes on Windows, but here we are in CLI running on darwin. - // Ideally use path.Join for abstract filesystem paths. - relativePath := path.Join(dir, entry.Name()) - - if entry.IsDir() { - if excludedDirs[entry.Name()] { - continue - } - if err := collectSourceFiles(ctx, f, relativePath, files); err != nil { - return err - } - } else { - ext := filepath.Ext(entry.Name()) - if validExtensions[ext] { - *files = append(*files, relativePath) - } - } - } - - return nil -} diff --git a/experimental/apps-mcp/lib/providers/io/validate.go b/experimental/apps-mcp/lib/providers/io/validate.go deleted file mode 100644 index 1058e34121..0000000000 --- a/experimental/apps-mcp/lib/providers/io/validate.go +++ /dev/null @@ -1,87 +0,0 @@ -package io - -import ( - "context" - "errors" - "fmt" - "os" - "path/filepath" - - "github.com/databricks/cli/libs/log" -) - -type ValidateArgs struct { - WorkDir string `json:"work_dir"` -} - -func (p *Provider) Validate(ctx context.Context, args *ValidateArgs) (*ValidateResult, error) { - workDir, err := filepath.Abs(args.WorkDir) - if err != nil { - return nil, fmt.Errorf("invalid work directory: %w", err) - } - - if !filepath.IsAbs(workDir) { - return nil, errors.New("work_dir must be an absolute path") - } - - if _, err := os.Stat(workDir); os.IsNotExist(err) { - return nil, errors.New("work directory does not exist") - } - - state, err := LoadState(ctx, workDir) - if err != nil { - log.Warnf(ctx, "failed to load project state: error=%v", err) - } - if state == nil { - state = NewProjectState() - } - - log.Infof(ctx, "starting validation: work_dir=%s, state=%s", workDir, string(state.State)) - - var validation Validation - if p.config != nil && p.config.Validation != nil { - valConfig := p.config.Validation - if valConfig.Command != "" { - log.Infof(ctx, "using custom validation command: command=%s", valConfig.Command) - validation = NewValidationCmd(valConfig.Command) - } - } - - if validation == nil { - log.Info(ctx, "using default Node.js validation strategy") - validation = NewValidationNodeJs() - } - - result, err := validation.Validate(ctx, workDir) - if err != nil { - return nil, fmt.Errorf("validation execution failed: %w", err) - } - - if !result.Success { - log.Warnf(ctx, "validation failed: message=%s", result.Message) - return result, nil - } - - checksum, err := ComputeChecksum(ctx, workDir) - if err != nil { - log.Warnf(ctx, "failed to compute checksum: error=%s", err.Error()) - return &ValidateResult{ - Success: false, - Message: fmt.Sprintf("Validation passed but failed to compute checksum: %v", err), - }, nil - } - - validatedState := state.Validate(checksum) - if err := SaveState(ctx, workDir, validatedState); err != nil { - log.Warnf(ctx, "failed to save state: error=%s", err.Error()) - return &ValidateResult{ - Success: false, - Message: fmt.Sprintf("Validation passed but failed to save state: %v", err), - }, nil - } - - log.Infof(ctx, "validation successful: checksum=%s, state=%s", - checksum, string(validatedState.State)) - - return result, nil -} diff --git a/experimental/apps-mcp/lib/providers/io/validation.go b/experimental/apps-mcp/lib/providers/io/validation.go deleted file mode 100644 index b38a5ecc8e..0000000000 --- a/experimental/apps-mcp/lib/providers/io/validation.go +++ /dev/null @@ -1,217 +0,0 @@ -package io - -import ( - "bytes" - "context" - "fmt" - "os/exec" - "time" - - "github.com/databricks/cli/libs/log" -) - -// ValidationDetail contains detailed output from a failed validation. -type ValidationDetail struct { - ExitCode int `json:"exit_code"` - Stdout string `json:"stdout"` - Stderr string `json:"stderr"` -} - -func (vd *ValidationDetail) Error() string { - return fmt.Sprintf("validation failed (exit code %d)\nStdout:\n%s\nStderr:\n%s", - vd.ExitCode, vd.Stdout, vd.Stderr) -} - -// ValidateResult contains the outcome of a validation operation. -type ValidateResult struct { - Success bool `json:"success"` - Message string `json:"message"` - Details *ValidationDetail `json:"details,omitempty"` - ProgressLog []string `json:"progress_log,omitempty"` -} - -func (vr *ValidateResult) String() string { - var result string - - if len(vr.ProgressLog) > 0 { - result = "Validation Progress:\n" - for _, log := range vr.ProgressLog { - result += log + "\n" - } - result += "\n" - } - - if vr.Success { - result += "✓ " + vr.Message - } else { - result += "✗ " + vr.Message - if vr.Details != nil { - result += fmt.Sprintf("\n\nExit code: %d\n\nStdout:\n%s\n\nStderr:\n%s", - vr.Details.ExitCode, vr.Details.Stdout, vr.Details.Stderr) - } - } - - return result -} - -// Validation defines the interface for project validation strategies. -type Validation interface { - Validate(ctx context.Context, workDir string) (*ValidateResult, error) -} - -// ValidationNodeJs implements validation for Node.js-based projects using build, type check, and tests. -type ValidationNodeJs struct{} - -func NewValidationNodeJs() Validation { - return &ValidationNodeJs{} -} - -type validationStep struct { - name string - command string - errorPrefix string - displayName string -} - -func (v *ValidationNodeJs) Validate(ctx context.Context, workDir string) (*ValidateResult, error) { - log.Info(ctx, "Starting Node.js validation: build + typecheck + tests") - startTime := time.Now() - var progressLog []string - - progressLog = append(progressLog, "🔄 Starting Node.js validation: build + typecheck + tests") - - steps := []validationStep{ - { - name: "build", - command: "npm run build --if-present", - errorPrefix: "Failed to run npm build", - displayName: "Build", - }, - { - name: "typecheck", - command: "npm run typecheck --if-present", - errorPrefix: "Failed to run client typecheck", - displayName: "Type check", - }, - { - name: "tests", - command: "npm run test --if-present", - errorPrefix: "Failed to run tests", - displayName: "Tests", - }, - } - - for i, step := range steps { - stepNum := fmt.Sprintf("%d/%d", i+1, len(steps)) - log.Infof(ctx, "step %s: running %s...", stepNum, step.name) - progressLog = append(progressLog, fmt.Sprintf("⏳ Step %s: Running %s...", stepNum, step.displayName)) - - stepStart := time.Now() - err := runCommand(ctx, workDir, step.command) - if err != nil { - stepDuration := time.Since(stepStart) - log.Errorf(ctx, "%s failed (duration: %.1fs)", step.name, stepDuration.Seconds()) - progressLog = append(progressLog, fmt.Sprintf("❌ %s failed (%.1fs)", step.displayName, stepDuration.Seconds())) - return &ValidateResult{ - Success: false, - Message: step.errorPrefix, - Details: err, - ProgressLog: progressLog, - }, nil - } - stepDuration := time.Since(stepStart) - log.Infof(ctx, "✓ %s passed: duration=%.1fs", step.name, stepDuration.Seconds()) - progressLog = append(progressLog, fmt.Sprintf("✅ %s passed (%.1fs)", step.displayName, stepDuration.Seconds())) - } - - totalDuration := time.Since(startTime) - log.Infof(ctx, "✓ all validation checks passed: total_duration=%.1fs, steps=%s", - totalDuration.Seconds(), "build + type check + tests") - progressLog = append(progressLog, fmt.Sprintf("✅ All checks passed! Total: %.1fs", totalDuration.Seconds())) - - return &ValidateResult{ - Success: true, - Message: "All validation checks passed", - ProgressLog: progressLog, - }, nil -} - -// runCommand executes a shell command in the specified directory -func runCommand(ctx context.Context, workDir, command string) *ValidationDetail { - cmd := exec.CommandContext(ctx, "sh", "-c", command) - cmd.Dir = workDir - - var stdout, stderr bytes.Buffer - cmd.Stdout = &stdout - cmd.Stderr = &stderr - - err := cmd.Run() - exitCode := 0 - if err != nil { - if exitErr, ok := err.(*exec.ExitError); ok { - exitCode = exitErr.ExitCode() - } else { - return &ValidationDetail{ - ExitCode: -1, - Stdout: stdout.String(), - Stderr: fmt.Sprintf("Failed to execute command: %v\nStderr: %s", err, stderr.String()), - } - } - } - - if exitCode != 0 { - return &ValidationDetail{ - ExitCode: exitCode, - Stdout: stdout.String(), - Stderr: stderr.String(), - } - } - - return nil -} - -// ValidationCmd implements validation using a custom command specified by the user. -type ValidationCmd struct { - Command string -} - -func NewValidationCmd(command string) Validation { - return &ValidationCmd{ - Command: command, - } -} - -func (v *ValidationCmd) Validate(ctx context.Context, workDir string) (*ValidateResult, error) { - log.Infof(ctx, "starting custom validation: command=%s", v.Command) - startTime := time.Now() - var progressLog []string - - progressLog = append(progressLog, "🔄 Starting custom validation: "+v.Command) - - fullCommand := v.Command - err := runCommand(ctx, workDir, fullCommand) - if err != nil { - duration := time.Since(startTime) - log.Errorf(ctx, "custom validation command failed (duration: %.1fs, error: %v)", duration.Seconds(), err) - progressLog = append(progressLog, fmt.Sprintf("❌ Command failed (%.1fs): %v", duration.Seconds(), err)) - return &ValidateResult{ - Success: false, - Message: "Custom validation command failed", - Details: &ValidationDetail{ - ExitCode: -1, - Stdout: "", - Stderr: fmt.Sprintf("Failed to run validation command: %v", err), - }, - ProgressLog: progressLog, - }, nil - } - - duration := time.Since(startTime) - log.Infof(ctx, "✓ custom validation passed: duration=%.1fs", duration.Seconds()) - progressLog = append(progressLog, fmt.Sprintf("✅ Custom validation passed (%.1fs)", duration.Seconds())) - return &ValidateResult{ - Success: true, - Message: "Custom validation passed", - ProgressLog: progressLog, - }, nil -} From bf5ce04b1d4a3ba7771dabf258734f6346a70aa5 Mon Sep 17 00:00:00 2001 From: Igor Rekun Date: Tue, 25 Nov 2025 19:57:52 +0000 Subject: [PATCH 03/18] Remove unused deployment provider in favor of bundle / apps deploy commands. --- .../lib/providers/databricks/auth_test.go | 96 -- .../lib/providers/databricks/databricks.go | 1003 ----------------- .../lib/providers/databricks/deployment.go | 119 -- .../lib/providers/databricks/provider.go | 252 ----- 4 files changed, 1470 deletions(-) delete mode 100644 experimental/apps-mcp/lib/providers/databricks/auth_test.go delete mode 100644 experimental/apps-mcp/lib/providers/databricks/databricks.go delete mode 100644 experimental/apps-mcp/lib/providers/databricks/deployment.go delete mode 100644 experimental/apps-mcp/lib/providers/databricks/provider.go diff --git a/experimental/apps-mcp/lib/providers/databricks/auth_test.go b/experimental/apps-mcp/lib/providers/databricks/auth_test.go deleted file mode 100644 index 1f6e0068c0..0000000000 --- a/experimental/apps-mcp/lib/providers/databricks/auth_test.go +++ /dev/null @@ -1,96 +0,0 @@ -package databricks - -import ( - "context" - "os" - "testing" - - "github.com/databricks/cli/experimental/apps-mcp/lib/middlewares" - "github.com/databricks/cli/experimental/apps-mcp/lib/session" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -func TestConfigureAuthWithSkipCheck(t *testing.T) { - // Set skip auth check for testing - os.Setenv("DATABRICKS_MCP_SKIP_AUTH_CHECK", "1") - defer os.Unsetenv("DATABRICKS_MCP_SKIP_AUTH_CHECK") - - ctx := context.Background() - sess := session.NewSession() - - host := "https://test.cloud.databricks.com" - profile := "test-profile" - - client, err := ConfigureAuth(ctx, sess, &host, &profile) - require.NoError(t, err) - assert.Nil(t, client) // Should be nil when skip check is enabled - - // Verify nothing was stored in session when skip check is on - _, ok := sess.Get(middlewares.DatabricksClientKey) - assert.False(t, ok) -} - -func TestConfigureAuthStoresClientInSession(t *testing.T) { - // This test requires a valid Databricks configuration - // Skip if no config is available - if os.Getenv("DATABRICKS_HOST") == "" && os.Getenv("DATABRICKS_PROFILE") == "" { - t.Skip("Skipping test: no Databricks configuration found") - } - - ctx := context.Background() - sess := session.NewSession() - - client, err := ConfigureAuth(ctx, sess, nil, nil) - require.NoError(t, err) - require.NotNil(t, client) - - // Verify client was stored in session - stored, ok := sess.Get(middlewares.DatabricksClientKey) - assert.True(t, ok) - assert.Equal(t, client, stored) -} - -func TestConfigureAuthWithCustomHost(t *testing.T) { - // This test requires valid credentials - // Skip if no config is available - if os.Getenv("DATABRICKS_HOST") == "" { - t.Skip("Skipping test: DATABRICKS_HOST not set") - } - - ctx := context.Background() - sess := session.NewSession() - - host := os.Getenv("DATABRICKS_HOST") - client, err := ConfigureAuth(ctx, sess, &host, nil) - require.NoError(t, err) - require.NotNil(t, client) - - // Verify the host was set correctly - assert.Equal(t, host, client.Config.Host) - - // Verify client was stored in session - _, ok := sess.Get(middlewares.DatabricksClientKey) - assert.True(t, ok) -} - -func TestWrapAuthError(t *testing.T) { - tests := []struct { - name string - err error - expected string - }{ - { - name: "regular error", - err: assert.AnError, - expected: assert.AnError.Error(), - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - wrapped := wrapAuthError(tt.err) - assert.Contains(t, wrapped.Error(), tt.expected) - }) - } -} diff --git a/experimental/apps-mcp/lib/providers/databricks/databricks.go b/experimental/apps-mcp/lib/providers/databricks/databricks.go deleted file mode 100644 index e3bca7f902..0000000000 --- a/experimental/apps-mcp/lib/providers/databricks/databricks.go +++ /dev/null @@ -1,1003 +0,0 @@ -package databricks - -import ( - "context" - "errors" - "fmt" - "net/url" - "os" - "strconv" - "strings" - "time" - - mcp "github.com/databricks/cli/experimental/apps-mcp/lib" - "github.com/databricks/cli/experimental/apps-mcp/lib/middlewares" - "github.com/databricks/cli/experimental/apps-mcp/lib/prompts" - "github.com/databricks/cli/experimental/apps-mcp/lib/session" - "github.com/databricks/cli/libs/log" - "github.com/databricks/databricks-sdk-go" - "github.com/databricks/databricks-sdk-go/config" - "github.com/databricks/databricks-sdk-go/httpclient" - "github.com/databricks/databricks-sdk-go/service/catalog" - "github.com/databricks/databricks-sdk-go/service/sql" -) - -const ( - DefaultWaitTimeout = 30 * time.Second - MaxPollAttempts = 30 - DefaultLimit = 500 - DefaultSampleSize = 5 - DefaultMaxRows = 1000 - MaxMaxRows = 10000 - PollInterval = 2 * time.Second - MaxRowDisplayLimit = 100 -) - -// ============================================================================ -// Authentication Functions -// ============================================================================ - -// ConfigureAuth creates and validates a Databricks workspace client with optional host and profile. -// The authenticated client is stored in the session data for reuse across tool calls. -func ConfigureAuth(ctx context.Context, sess *session.Session, host, profile *string) (*databricks.WorkspaceClient, error) { - // Skip auth check if testing - if os.Getenv("DATABRICKS_MCP_SKIP_AUTH_CHECK") == "1" { - return nil, nil - } - - var cfg *databricks.Config - if host != nil || profile != nil { - cfg = &databricks.Config{} - if host != nil { - cfg.Host = *host - } - if profile != nil { - cfg.Profile = *profile - } - } - - var client *databricks.WorkspaceClient - var err error - if cfg != nil { - client, err = databricks.NewWorkspaceClient(cfg) - } else { - client, err = databricks.NewWorkspaceClient() - } - if err != nil { - return nil, err - } - - _, err = client.CurrentUser.Me(ctx) - if err != nil { - if profile == nil && host != nil { - return nil, errors.New(prompts.MustExecuteTemplate("auth_u2m.tmpl", map[string]string{ - "WorkspaceURL": *host, - })) - } - return nil, wrapAuthError(err) - } - - // Store client in session data - sess.Set(middlewares.DatabricksClientKey, client) - - return client, nil -} - -// wrapAuthError wraps configuration errors with helpful messages -func wrapAuthError(err error) error { - if errors.Is(err, config.ErrCannotConfigureDefault) { - return errors.New(prompts.MustExecuteTemplate("auth_error.tmpl", nil)) - } - return err -} - -// ============================================================================ -// Helper Functions -// ============================================================================ - -// applyPagination applies limit and offset to a slice and returns paginated results with counts -func applyPagination[T any](items []T, limit, offset int) ([]T, int, int) { - total := len(items) - start := min(offset, total) - end := min(start+limit, total) - paginated := items[start:end] - shown := len(paginated) - return paginated, total, shown -} - -// validateIdentifier validates that an identifier (catalog, schema, table name) contains only safe characters. -// Allows alphanumeric, underscore, hyphen, and dot (for qualified names). -func validateIdentifier(id string) error { - if id == "" { - return errors.New("identifier cannot be empty") - } - - // Allow alphanumeric, underscore, hyphen, and dot for qualified names - for _, ch := range id { - isValid := (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || - (ch >= '0' && ch <= '9') || ch == '_' || ch == '-' || ch == '.' - - if !isValid { - return fmt.Errorf("invalid identifier '%s': contains unsafe characters", id) - } - } - - return nil -} - -// escapeLikePattern escapes a user-provided pattern for safe use in SQL LIKE clauses. -// - Escapes the backslash escape character itself -// - Escapes SQL wildcards (%, _) to treat them as literals -// - Converts glob-style wildcards (* and ?) to SQL wildcards -// Must be used with ESCAPE '\\' clause in SQL query. -func escapeLikePattern(input string) string { - result := strings.ReplaceAll(input, `\`, `\\`) // escape the escape char first! - result = strings.ReplaceAll(result, `%`, `\%`) // escape SQL wildcard % - result = strings.ReplaceAll(result, `_`, `\_`) // escape SQL wildcard _ - result = strings.ReplaceAll(result, `*`, `%`) // convert glob * to SQL % - result = strings.ReplaceAll(result, `?`, `_`) // convert glob ? to SQL _ - return result -} - -// ============================================================================ -// Argument Types (shared between agent and MCP) -// ============================================================================ - -type DatabricksListCatalogsArgs struct { - // no parameters needed - lists all available catalogs -} - -type DatabricksListSchemasArgs struct { - CatalogName string `json:"catalog_name"` - Filter *string `json:"filter,omitempty"` - Limit int `json:"limit,omitempty"` - Offset int `json:"offset,omitempty"` -} - -type DatabricksListTablesArgs struct { - // Optional catalog name. If omitted, searches across all catalogs. - CatalogName *string `json:"catalog_name,omitempty"` - // Optional schema name. If omitted, searches across all schemas (requires catalog_name to also be omitted). - SchemaName *string `json:"schema_name,omitempty"` - // Optional filter pattern for table name (supports wildcards when searching across catalogs/schemas) - Filter *string `json:"filter,omitempty"` - Limit int `json:"limit,omitempty"` - Offset int `json:"offset,omitempty"` -} - -type DatabricksDescribeTableArgs struct { - TableFullName string `json:"table_full_name"` - SampleSize int `json:"sample_size,omitempty"` -} - -type DatabricksExecuteQueryArgs struct { - Query string `json:"query"` -} - -// ============================================================================ -// Request Types (internal to client) -// ============================================================================ - -type ExecuteSqlRequest struct { - Query string `json:"query"` -} - -type ListSchemasRequest struct { - CatalogName string `json:"catalog_name"` - Filter *string `json:"filter,omitempty"` - Limit int `json:"limit,omitempty"` - Offset int `json:"offset,omitempty"` -} - -type ListTablesRequest struct { - CatalogName *string `json:"catalog_name,omitempty"` - SchemaName *string `json:"schema_name,omitempty"` - Filter *string `json:"filter,omitempty"` - Limit int `json:"limit,omitempty"` - Offset int `json:"offset,omitempty"` -} - -type DescribeTableRequest struct { - TableFullName string `json:"table_full_name"` - SampleSize int `json:"sample_size,omitempty"` -} - -// ============================================================================ -// Response Types -// ============================================================================ - -type TableDetailsResponse struct { - FullName string `json:"full_name"` - TableType string `json:"table_type"` - Owner *string `json:"owner,omitempty"` - Comment *string `json:"comment,omitempty"` - StorageLocation *string `json:"storage_location,omitempty"` - DataSourceFormat *string `json:"data_source_format,omitempty"` - Columns []ColumnMetadataResponse `json:"columns"` - SampleData []map[string]any `json:"sample_data,omitempty"` - RowCount *int64 `json:"row_count,omitempty"` -} - -type ColumnMetadataResponse struct { - Name string `json:"name"` - DataType string `json:"data_type"` - Comment *string `json:"comment,omitempty"` - Nullable bool `json:"nullable"` -} - -type TableInfoResponse struct { - Name string `json:"name"` - CatalogName string `json:"catalog_name"` - SchemaName string `json:"schema_name"` - FullName string `json:"full_name"` - TableType string `json:"table_type"` - Owner *string `json:"owner,omitempty"` - Comment *string `json:"comment,omitempty"` -} - -type ListCatalogsResultResponse struct { - Catalogs []string `json:"catalogs"` -} - -type ListSchemasResultResponse struct { - Schemas []string `json:"schemas"` - TotalCount int `json:"total_count"` - ShownCount int `json:"shown_count"` - Offset int `json:"offset"` - Limit int `json:"limit"` -} - -type ListTablesResultResponse struct { - Tables []TableInfoResponse `json:"tables"` - TotalCount int `json:"total_count"` - ShownCount int `json:"shown_count"` - Offset int `json:"offset"` - Limit int `json:"limit"` -} - -type ExecuteSqlResultResponse struct { - Rows []map[string]any `json:"rows"` -} - -// ============================================================================ -// Display Trait for Tool Results -// ============================================================================ - -func (r *ListCatalogsResultResponse) Display() string { - if len(r.Catalogs) == 0 { - return "No catalogs found." - } - - var lines []string - lines = append(lines, fmt.Sprintf("Found %d catalogs:", len(r.Catalogs))) - lines = append(lines, "") - - for _, catalog := range r.Catalogs { - lines = append(lines, "• "+catalog) - } - - return strings.Join(lines, "\n") -} - -func (r *ListSchemasResultResponse) Display() string { - if len(r.Schemas) == 0 { - return "No schemas found." - } - - var lines []string - lines = append(lines, - fmt.Sprintf("Showing %d of %d schemas (offset: %d, limit: %d):", - r.ShownCount, r.TotalCount, r.Offset, r.Limit)) - lines = append(lines, "") - - for _, schema := range r.Schemas { - lines = append(lines, "• "+schema) - } - - return strings.Join(lines, "\n") -} - -func (r *ListTablesResultResponse) Display() string { - if len(r.Tables) == 0 { - return "No tables found." - } - - var lines []string - lines = append(lines, - fmt.Sprintf("Showing %d of %d tables (offset: %d, limit: %d):", - r.ShownCount, r.TotalCount, r.Offset, r.Limit)) - lines = append(lines, "") - - for _, table := range r.Tables { - info := fmt.Sprintf("• %s (%s)", table.FullName, table.TableType) - if table.Owner != nil { - info += " - Owner: " + *table.Owner - } - if table.Comment != nil { - info += " - " + *table.Comment - } - lines = append(lines, info) - } - - return strings.Join(lines, "\n") -} - -func (r *TableDetailsResponse) Display() string { - var lines []string - - lines = append(lines, "Table: "+r.FullName) - lines = append(lines, "Table Type: "+r.TableType) - - if r.Owner != nil { - lines = append(lines, "Owner: "+*r.Owner) - } - if r.Comment != nil { - lines = append(lines, "Comment: "+*r.Comment) - } - if r.RowCount != nil { - lines = append(lines, fmt.Sprintf("Row Count: %d", *r.RowCount)) - } - if r.StorageLocation != nil { - lines = append(lines, "Storage Location: "+*r.StorageLocation) - } - if r.DataSourceFormat != nil { - lines = append(lines, "Data Source Format: "+*r.DataSourceFormat) - } - - if len(r.Columns) > 0 { - lines = append(lines, fmt.Sprintf("\nColumns (%d):", len(r.Columns))) - for _, col := range r.Columns { - nullableStr := "nullable" - if !col.Nullable { - nullableStr = "required" - } - colInfo := fmt.Sprintf(" - %s: %s (%s)", col.Name, col.DataType, nullableStr) - if col.Comment != nil { - colInfo += " - " + *col.Comment - } - lines = append(lines, colInfo) - } - } - - if len(r.SampleData) > 0 { - lines = append(lines, fmt.Sprintf("\nSample Data (%d rows):", len(r.SampleData))) - displayCount := min(len(r.SampleData), 5) - for i := range displayCount { - row := r.SampleData[i] - var rowParts []string - for k, v := range row { - rowParts = append(rowParts, fmt.Sprintf("%s: %v", k, formatValue(v))) - } - lines = append(lines, fmt.Sprintf(" Row %d: %s", i+1, strings.Join(rowParts, ", "))) - } - if len(r.SampleData) > 5 { - lines = append(lines, "...") - } - } - - return strings.Join(lines, "\n") -} - -func (r *ExecuteSqlResultResponse) Display() string { - if len(r.Rows) == 0 { - return "Query executed successfully but returned no results." - } - - var lines []string - lines = append(lines, fmt.Sprintf("Query returned %d rows:", len(r.Rows))) - lines = append(lines, "") - - if len(r.Rows) > 0 { - var columns []string - for k := range r.Rows[0] { - columns = append(columns, k) - } - lines = append(lines, "Columns: "+strings.Join(columns, ", ")) - lines = append(lines, "") - lines = append(lines, "Results:") - } - - limit := min(len(r.Rows), MaxRowDisplayLimit) - for i := range limit { - row := r.Rows[i] - var rowParts []string - for k, v := range row { - rowParts = append(rowParts, fmt.Sprintf("%s: %v", k, formatValue(v))) - } - lines = append(lines, fmt.Sprintf(" Row %d: %s", i+1, strings.Join(rowParts, ", "))) - } - - if len(r.Rows) > MaxRowDisplayLimit { - lines = append(lines, fmt.Sprintf("\n... showing first %d of %d total rows", - MaxRowDisplayLimit, len(r.Rows))) - } - - return strings.Join(lines, "\n") -} - -func formatValue(value any) string { - if value == nil { - return "null" - } - return fmt.Sprintf("%v", value) -} - -// ============================================================================ -// DatabricksRestClient -// ============================================================================ - -type DatabricksRestClient struct { - client *databricks.WorkspaceClient - warehouseID string -} - -// NewDatabricksRestClient creates a new Databricks REST client using the SDK -func NewDatabricksRestClient(ctx context.Context, cfg *mcp.Config) (*DatabricksRestClient, error) { - client := middlewares.MustGetDatabricksClient(ctx) - - warehouseID, err := middlewares.GetWarehouseID(ctx) - if err != nil { - return nil, fmt.Errorf("failed to get warehouse ID: %w", err) - } - - return &DatabricksRestClient{ - client: client, - warehouseID: warehouseID, - }, nil -} - -// ExecuteSql executes a SQL query and returns the results -func (c *DatabricksRestClient) ExecuteSql(ctx context.Context, request *ExecuteSqlRequest) (*ExecuteSqlResultResponse, error) { - rows, err := c.executeSqlImpl(ctx, request.Query) - if err != nil { - return nil, err - } - return &ExecuteSqlResultResponse{Rows: rows}, nil -} - -// executeSqlWithParams executes SQL with named parameters for safe dynamic queries -func (c *DatabricksRestClient) executeSqlWithParams(ctx context.Context, query string, parameters []sql.StatementParameterListItem) ([]map[string]any, error) { - result, err := c.client.StatementExecution.ExecuteStatement(ctx, sql.ExecuteStatementRequest{ - Statement: query, - Parameters: parameters, - WarehouseId: c.warehouseID, - WaitTimeout: fmt.Sprintf("%ds", int(DefaultWaitTimeout.Seconds())), - Format: sql.FormatJsonArray, - Disposition: sql.DispositionInline, - }) - if err != nil { - return nil, fmt.Errorf("failed to execute statement: %w", err) - } - - // Check if we need to poll for results - if result.Status != nil { - state := result.Status.State - switch state { - case sql.StatementStatePending, sql.StatementStateRunning: - return c.pollForResults(ctx, result.StatementId) - case sql.StatementStateFailed: - errMsg := "unknown error" - if result.Status.Error != nil && result.Status.Error.Message != "" { - errMsg = result.Status.Error.Message - } - return nil, fmt.Errorf("SQL execution failed: %s", errMsg) - case sql.StatementStateCanceled, sql.StatementStateClosed, sql.StatementStateSucceeded: - break - } - } - - return c.processStatementResult(ctx, result) -} - -func (c *DatabricksRestClient) executeSqlImpl(ctx context.Context, query string) ([]map[string]any, error) { - result, err := c.client.StatementExecution.ExecuteStatement(ctx, sql.ExecuteStatementRequest{ - Statement: query, - WarehouseId: c.warehouseID, - WaitTimeout: fmt.Sprintf("%ds", int(DefaultWaitTimeout.Seconds())), - OnWaitTimeout: sql.ExecuteStatementRequestOnWaitTimeoutContinue, - Format: sql.FormatJsonArray, - Disposition: sql.DispositionInline, - RowLimit: 100, - }) - if err != nil { - return nil, fmt.Errorf("failed to execute statement: %w", err) - } - - // Check if we need to poll for results - if result.Status != nil { - state := result.Status.State - switch state { - case sql.StatementStatePending, sql.StatementStateRunning: - return c.pollForResults(ctx, result.StatementId) - case sql.StatementStateFailed: - errMsg := "unknown error" - if result.Status.Error != nil && result.Status.Error.Message != "" { - errMsg = result.Status.Error.Message - } - return nil, fmt.Errorf("SQL execution failed: %s", errMsg) - case sql.StatementStateCanceled, sql.StatementStateClosed, sql.StatementStateSucceeded: - break - } - } - - return c.processStatementResult(ctx, result) -} - -func (c *DatabricksRestClient) pollForResults(ctx context.Context, statementID string) ([]map[string]any, error) { - for attempt := range MaxPollAttempts { - log.Debugf(ctx, "Polling attempt %d for statement %s", attempt+1, statementID) - - result, err := c.client.StatementExecution.GetStatement(ctx, sql.GetStatementRequest{ - StatementId: statementID, - }) - if err != nil { - return nil, fmt.Errorf("polling attempt %d failed: %w", attempt+1, err) - } - - if result.Status != nil { - switch result.Status.State { - case sql.StatementStateSucceeded: - return c.processStatementResult(ctx, result) - case sql.StatementStateFailed: - errMsg := "unknown error" - if result.Status.Error != nil && result.Status.Error.Message != "" { - errMsg = result.Status.Error.Message - } - return nil, fmt.Errorf("SQL execution failed: %s", errMsg) - case sql.StatementStatePending, sql.StatementStateRunning: - time.Sleep(PollInterval) - continue - default: - return nil, fmt.Errorf("unexpected statement state: %s", result.Status.State) - } - } - } - - return nil, fmt.Errorf("polling timeout exceeded for statement %s", statementID) -} - -func (c *DatabricksRestClient) processStatementResult(ctx context.Context, result *sql.StatementResponse) ([]map[string]any, error) { - log.Debugf(ctx, "Processing statement result: %+v", result) - - if result.Manifest == nil || result.Manifest.Schema == nil { - log.Debugf(ctx, "No schema in response") - return nil, errors.New("no schema in response") - } - - schema := result.Manifest.Schema - - // Check if statement returns no result set (DDL, DML writes, etc.) - if len(schema.Columns) == 0 { - log.Debugf(ctx, "Statement executed successfully (no result set)") - return []map[string]any{}, nil - } - - // Try to get inline data - if result.Result != nil && result.Result.DataArray != nil { - log.Debugf(ctx, "Found %d rows of inline data", len(result.Result.DataArray)) - return c.processDataArray(schema, result.Result.DataArray) - } - - // Query executed successfully but returned 0 rows (empty result set is valid) - log.Debugf(ctx, "Query executed successfully with empty result set") - return []map[string]any{}, nil -} - -func (c *DatabricksRestClient) processDataArray(schema *sql.ResultSchema, dataArray [][]string) ([]map[string]any, error) { - var results []map[string]any - - for _, row := range dataArray { - rowMap := make(map[string]any) - - for i, column := range schema.Columns { - var value any - if i < len(row) { - value = row[i] - } - rowMap[column.Name] = value - } - - results = append(results, rowMap) - } - - return results, nil -} - -// ListCatalogs lists all available Databricks Unity Catalog catalogs -func (c *DatabricksRestClient) ListCatalogs(ctx context.Context) (*ListCatalogsResultResponse, error) { - catalogs, err := c.listCatalogsImpl(ctx) - if err != nil { - return nil, err - } - return &ListCatalogsResultResponse{Catalogs: catalogs}, nil -} - -func (c *DatabricksRestClient) listCatalogsImpl(ctx context.Context) ([]string, error) { - var allCatalogs []string - - iter := c.client.Catalogs.List(ctx, catalog.ListCatalogsRequest{}) - for iter.HasNext(ctx) { - cat, err := iter.Next(ctx) - if err != nil { - return nil, fmt.Errorf("failed to iterate catalogs: %w", err) - } - allCatalogs = append(allCatalogs, cat.Name) - } - - return allCatalogs, nil -} - -// ListSchemas lists schemas in a catalog with optional filtering and pagination -func (c *DatabricksRestClient) ListSchemas(ctx context.Context, request *ListSchemasRequest) (*ListSchemasResultResponse, error) { - schemas, err := c.listSchemasImpl(ctx, request.CatalogName) - if err != nil { - return nil, err - } - - // Apply filter if provided - if request.Filter != nil { - filterLower := strings.ToLower(*request.Filter) - var filtered []string - for _, s := range schemas { - if strings.Contains(strings.ToLower(s), filterLower) { - filtered = append(filtered, s) - } - } - schemas = filtered - } - - limit := request.Limit - if limit == 0 { - limit = DefaultLimit - } - - paginated, totalCount, shownCount := applyPagination(schemas, limit, request.Offset) - - return &ListSchemasResultResponse{ - Schemas: paginated, - TotalCount: totalCount, - ShownCount: shownCount, - Offset: request.Offset, - Limit: limit, - }, nil -} - -func (c *DatabricksRestClient) listSchemasImpl(ctx context.Context, catalogName string) ([]string, error) { - var allSchemas []string - - iter := c.client.Schemas.List(ctx, catalog.ListSchemasRequest{ - CatalogName: catalogName, - }) - for iter.HasNext(ctx) { - schema, err := iter.Next(ctx) - if err != nil { - return nil, fmt.Errorf("failed to iterate schemas: %w", err) - } - allSchemas = append(allSchemas, schema.Name) - } - - return allSchemas, nil -} - -// ListTables lists tables with support for wildcard searches and pagination -func (c *DatabricksRestClient) ListTables(ctx context.Context, request *ListTablesRequest) (*ListTablesResultResponse, error) { - if request.CatalogName != nil && request.SchemaName != nil { - // Fast path - use REST API for specific catalog/schema - tables, err := c.listTablesImpl(ctx, *request.CatalogName, *request.SchemaName, true) - if err != nil { - return nil, err - } - - // Apply filter if provided - if request.Filter != nil { - filterLower := strings.ToLower(*request.Filter) - var filtered []TableInfoResponse - for _, t := range tables { - // Match against both table name and schema name - if strings.Contains(strings.ToLower(t.Name), filterLower) || - strings.Contains(strings.ToLower(t.SchemaName), filterLower) { - filtered = append(filtered, t) - } - } - tables = filtered - } - - limit := request.Limit - if limit == 0 { - limit = DefaultLimit - } - - paginated, totalCount, shownCount := applyPagination(tables, limit, request.Offset) - - return &ListTablesResultResponse{ - Tables: paginated, - TotalCount: totalCount, - ShownCount: shownCount, - Offset: request.Offset, - Limit: limit, - }, nil - } - - // Wildcard search - use system.information_schema.tables - return c.listTablesViaInformationSchema(ctx, request) -} - -// listTablesViaInformationSchema searches tables across catalogs/schemas using system.information_schema -func (c *DatabricksRestClient) listTablesViaInformationSchema(ctx context.Context, request *ListTablesRequest) (*ListTablesResultResponse, error) { - // Validate invalid combination - if request.CatalogName == nil && request.SchemaName != nil { - return nil, errors.New("schema_name requires catalog_name to be specified") - } - - // Validate identifiers for SQL safety - if request.CatalogName != nil { - if err := validateIdentifier(*request.CatalogName); err != nil { - return nil, err - } - } - if request.SchemaName != nil { - if err := validateIdentifier(*request.SchemaName); err != nil { - return nil, err - } - } - - // Build WHERE conditions with parameterized queries - var conditions []string - var parameters []sql.StatementParameterListItem - - if request.CatalogName != nil { - conditions = append(conditions, "table_catalog = :catalog") - parameters = append(parameters, sql.StatementParameterListItem{ - Name: "catalog", - Value: *request.CatalogName, - Type: "STRING", - }) - } - - if request.SchemaName != nil { - conditions = append(conditions, "table_schema = :schema") - parameters = append(parameters, sql.StatementParameterListItem{ - Name: "schema", - Value: *request.SchemaName, - Type: "STRING", - }) - } - - if request.Filter != nil { - // Use dedicated escape function for LIKE patterns - pattern := escapeLikePattern(*request.Filter) - - // Wrap pattern for substring match if no wildcards at boundaries - if !strings.HasPrefix(pattern, "%") && !strings.HasSuffix(pattern, "%") { - pattern = "%" + pattern + "%" - } - - // Match against both table name and schema name - conditions = append(conditions, "(table_name LIKE :pattern ESCAPE '\\\\' OR table_schema LIKE :pattern ESCAPE '\\\\')") - parameters = append(parameters, sql.StatementParameterListItem{ - Name: "pattern", - Value: pattern, - Type: "STRING", - }) - } - - whereClause := "" - if len(conditions) > 0 { - whereClause = "WHERE " + strings.Join(conditions, " AND ") - } - - limit := request.Limit - if limit == 0 { - limit = DefaultLimit - } - - // Build SQL query with parameter markers - query := fmt.Sprintf(` - SELECT table_catalog, table_schema, table_name, table_type - FROM system.information_schema.tables - %s - ORDER BY table_catalog, table_schema, table_name - LIMIT %d - `, whereClause, limit+request.Offset) - - // Execute query with parameters - rows, err := c.executeSqlWithParams(ctx, query, parameters) - if err != nil { - return nil, fmt.Errorf("failed to query information_schema: %w", err) - } - - // Parse results into TableInfo with explicit error handling - var tables []TableInfoResponse - for _, row := range rows { - catalogVal, ok := row["table_catalog"] - if !ok { - return nil, errors.New("missing table_catalog in row") - } - catalog := fmt.Sprintf("%v", catalogVal) - - schemaVal, ok := row["table_schema"] - if !ok { - return nil, errors.New("missing table_schema in row") - } - schema := fmt.Sprintf("%v", schemaVal) - - nameVal, ok := row["table_name"] - if !ok { - return nil, errors.New("missing table_name in row") - } - name := fmt.Sprintf("%v", nameVal) - - tableTypeVal, ok := row["table_type"] - if !ok { - return nil, errors.New("missing table_type in row") - } - tableType := fmt.Sprintf("%v", tableTypeVal) - - tables = append(tables, TableInfoResponse{ - Name: name, - CatalogName: catalog, - SchemaName: schema, - TableType: tableType, - FullName: fmt.Sprintf("%s.%s.%s", catalog, schema, name), - Owner: nil, - Comment: nil, - }) - } - - paginated, totalCount, shownCount := applyPagination(tables, limit, request.Offset) - - return &ListTablesResultResponse{ - Tables: paginated, - TotalCount: totalCount, - ShownCount: shownCount, - Offset: request.Offset, - Limit: limit, - }, nil -} - -func (c *DatabricksRestClient) listTablesImpl(ctx context.Context, catalogName, schemaName string, excludeInaccessible bool) ([]TableInfoResponse, error) { - var tables []TableInfoResponse - - w := middlewares.MustGetDatabricksClient(ctx) - clientCfg, err := config.HTTPClientConfigFromConfig(w.Config) - if err != nil { - return nil, fmt.Errorf("failed to create HTTP client config: %w", err) - } - apiClient := httpclient.NewApiClient(clientCfg) - - nextPageToken := "" - for { - apiPath := "/api/2.1/unity-catalog/tables" - params := url.Values{} - params.Add("catalog_name", catalogName) - params.Add("schema_name", schemaName) - if excludeInaccessible { - params.Add("include_browse", "false") - } - params.Add("max_results", strconv.Itoa(DefaultLimit)) - if nextPageToken != "" { - params.Add("page_token", nextPageToken) - } - fullPath := apiPath + "?" + params.Encode() - - var response catalog.ListTablesResponse - err = apiClient.Do(ctx, "GET", fullPath, httpclient.WithResponseUnmarshal(&response)) - if err != nil { - return nil, fmt.Errorf("failed to list tables: %w", err) - } - - for _, table := range response.Tables { - tables = append(tables, TableInfoResponse{ - Name: table.Name, - CatalogName: table.CatalogName, - SchemaName: table.SchemaName, - TableType: string(table.TableType), - FullName: fmt.Sprintf("%s.%s.%s", table.CatalogName, table.SchemaName, table.Name), - Owner: &table.Owner, - Comment: &table.Comment, - }) - } - - if response.NextPageToken != "" { - nextPageToken = response.NextPageToken - } else { - break - } - } - - return tables, nil -} - -// DescribeTable retrieves detailed information about a table including metadata and sample data -func (c *DatabricksRestClient) DescribeTable(ctx context.Context, request *DescribeTableRequest) (*TableDetailsResponse, error) { - sampleRows := request.SampleSize - if sampleRows == 0 { - sampleRows = DefaultSampleSize - } - tableName := request.TableFullName - - // Get basic table metadata from Unity Catalog - tableResponse, err := c.client.Tables.Get(ctx, catalog.GetTableRequest{ - FullName: tableName, - }) - if err != nil { - return nil, fmt.Errorf("failed to get table metadata: %w", err) - } - - // Build column metadata - columns := make([]ColumnMetadataResponse, len(tableResponse.Columns)) - for i, col := range tableResponse.Columns { - var comment *string - if col.Comment != "" { - comment = &col.Comment - } - - columns[i] = ColumnMetadataResponse{ - Name: col.Name, - DataType: string(col.TypeName), - Comment: comment, - Nullable: col.Nullable, - } - } - - // Get sample data and row count - var sampleData []map[string]any - if sampleRows > 0 { - query := fmt.Sprintf("SELECT * FROM %s LIMIT %d", tableName, sampleRows) - if rows, err := c.executeSqlImpl(ctx, query); err == nil { - sampleData = rows - } - } - - var rowCount *int64 - countQuery := "SELECT COUNT(*) as count FROM " + tableName - if rows, err := c.executeSqlImpl(ctx, countQuery); err == nil && len(rows) > 0 { - if countVal, ok := rows[0]["count"]; ok { - switch v := countVal.(type) { - case int64: - rowCount = &v - case float64: - count := int64(v) - rowCount = &count - case string: - // Try to parse string as int64 - var count int64 - if _, parseErr := fmt.Sscanf(v, "%d", &count); parseErr == nil { - rowCount = &count - } - } - } - } - - var owner, comment, storageLocation, dataSourceFormat *string - if tableResponse.Owner != "" { - owner = &tableResponse.Owner - } - if tableResponse.Comment != "" { - comment = &tableResponse.Comment - } - if tableResponse.StorageLocation != "" { - storageLocation = &tableResponse.StorageLocation - } - if tableResponse.DataSourceFormat != "" { - dsf := string(tableResponse.DataSourceFormat) - dataSourceFormat = &dsf - } - - tableType := "UNKNOWN" - if tableResponse.TableType != "" { - tableType = string(tableResponse.TableType) - } - - return &TableDetailsResponse{ - FullName: tableName, - TableType: tableType, - Owner: owner, - Comment: comment, - StorageLocation: storageLocation, - DataSourceFormat: dataSourceFormat, - Columns: columns, - SampleData: sampleData, - RowCount: rowCount, - }, nil -} diff --git a/experimental/apps-mcp/lib/providers/databricks/deployment.go b/experimental/apps-mcp/lib/providers/databricks/deployment.go deleted file mode 100644 index ac24d6120a..0000000000 --- a/experimental/apps-mcp/lib/providers/databricks/deployment.go +++ /dev/null @@ -1,119 +0,0 @@ -package databricks - -import ( - "context" - "fmt" - "os/exec" - "time" - - mcp "github.com/databricks/cli/experimental/apps-mcp/lib" - "github.com/databricks/cli/experimental/apps-mcp/lib/middlewares" - "github.com/databricks/cli/libs/cmdctx" - "github.com/databricks/databricks-sdk-go/service/apps" - "github.com/databricks/databricks-sdk-go/service/iam" -) - -func GetSourcePath(app *apps.App) string { - if app.DefaultSourceCodePath == "" { - return fmt.Sprintf("/Workspace/Users/%s/%s/", app.Creator, app.Name) - } - return app.DefaultSourceCodePath -} - -func GetAppInfo(ctx context.Context, cfg *mcp.Config, name string) (*apps.App, error) { - w := cmdctx.WorkspaceClient(ctx) - app, err := w.Apps.GetByName(ctx, name) - if err != nil { - return nil, fmt.Errorf("failed to get app info: %w", err) - } - - return app, nil -} - -func CreateApp(ctx context.Context, cfg *mcp.Config, createAppRequest *apps.CreateAppRequest) (*apps.App, error) { - w := cmdctx.WorkspaceClient(ctx) - - wait, err := w.Apps.Create(ctx, *createAppRequest) - if err != nil { - return nil, fmt.Errorf("failed to create app: %w", err) - } - - createdApp, err := wait.GetWithTimeout(5 * time.Minute) - if err != nil { - return nil, fmt.Errorf("failed to wait for app creation: %w", err) - } - - return createdApp, nil -} - -func GetUserInfo(ctx context.Context, cfg *mcp.Config) (*iam.User, error) { - w := cmdctx.WorkspaceClient(ctx) - user, err := w.CurrentUser.Me(ctx) - if err != nil { - return nil, fmt.Errorf("failed to get user info: %w", err) - } - - return user, nil -} - -func SyncWorkspace(appInfo *apps.App, sourceDir string) error { - targetPath := GetSourcePath(appInfo) - - cmd := exec.Command( - "databricks", - "sync", - "--include", "public", - "--exclude", "node_modules", - ".", - targetPath, - ) - cmd.Dir = sourceDir - - output, err := cmd.CombinedOutput() - if err != nil { - return fmt.Errorf("failed to sync workspace: %w (output: %s)", err, string(output)) - } - - return nil -} - -func DeployApp(ctx context.Context, cfg *mcp.Config, appInfo *apps.App) error { - w := cmdctx.WorkspaceClient(ctx) - sourcePath := GetSourcePath(appInfo) - - req := apps.CreateAppDeploymentRequest{ - AppName: appInfo.Name, - AppDeployment: apps.AppDeployment{ - SourceCodePath: sourcePath, - Mode: apps.AppDeploymentModeSnapshot, - }, - } - - wait, err := w.Apps.Deploy(ctx, req) - if err != nil { - return fmt.Errorf("failed to deploy app: %w", err) - } - - _, err = wait.GetWithTimeout(10 * time.Minute) - if err != nil { - return fmt.Errorf("failed to wait for app deployment: %w", err) - } - - return nil -} - -func ResourcesFromEnv(ctx context.Context, cfg *mcp.Config) (*apps.AppResource, error) { - warehouseID, err := middlewares.GetWarehouseID(ctx) - if err != nil { - return nil, fmt.Errorf("failed to get warehouse ID: %w", err) - } - - return &apps.AppResource{ - Name: "base", - Description: "template resources", - SqlWarehouse: &apps.AppResourceSqlWarehouse{ - Id: warehouseID, - Permission: apps.AppResourceSqlWarehouseSqlWarehousePermissionCanUse, - }, - }, nil -} diff --git a/experimental/apps-mcp/lib/providers/databricks/provider.go b/experimental/apps-mcp/lib/providers/databricks/provider.go deleted file mode 100644 index efac26602e..0000000000 --- a/experimental/apps-mcp/lib/providers/databricks/provider.go +++ /dev/null @@ -1,252 +0,0 @@ -package databricks - -import ( - "context" - - mcp "github.com/databricks/cli/experimental/apps-mcp/lib" - mcpsdk "github.com/databricks/cli/experimental/apps-mcp/lib/mcp" - "github.com/databricks/cli/experimental/apps-mcp/lib/providers" - "github.com/databricks/cli/experimental/apps-mcp/lib/session" - "github.com/databricks/cli/libs/log" -) - -func init() { - providers.Register("databricks", func(ctx context.Context, cfg *mcp.Config, sess *session.Session) (providers.Provider, error) { - return NewProvider(ctx, cfg, sess) - }, providers.ProviderConfig{ - Always: true, - }) -} - -// Provider represents the Databricks provider that registers MCP tools -type Provider struct { - config *mcp.Config - session *session.Session - ctx context.Context -} - -// NewProvider creates a new Databricks provider -func NewProvider(ctx context.Context, cfg *mcp.Config, sess *session.Session) (*Provider, error) { - return &Provider{ - config: cfg, - session: sess, - ctx: ctx, - }, nil -} - -// Name returns the name of the provider. -func (p *Provider) Name() string { - return "databricks" -} - -type FindTablesInput struct { - CatalogName *string `json:"catalog_name,omitempty" jsonschema_description:"Name of the catalog (optional - searches all catalogs if not provided)"` - SchemaName *string `json:"schema_name,omitempty" jsonschema_description:"Name of the schema (optional - searches all schemas if not provided)"` - Filter *string `json:"filter,omitempty" jsonschema_description:"Filter pattern for table names (supports * and ? wildcards)"` - Limit int `json:"limit,omitempty" jsonschema_description:"Maximum number of tables to return (default: 1000)"` - Offset int `json:"offset,omitempty" jsonschema_description:"Offset for pagination (default: 0)"` -} - -// RegisterTools registers all Databricks tools with the MCP server -func (p *Provider) RegisterTools(server *mcpsdk.Server) error { - log.Info(p.ctx, "Registering Databricks tools") - - // Register databricks_configure_auth - type ConfigureAuthInput struct { - Host *string `json:"host,omitempty" jsonschema_description:"Databricks workspace URL (e.g., https://example.cloud.databricks.com). If not provided, uses default from environment or config file"` - Profile *string `json:"profile,omitempty" jsonschema_description:"Profile name from ~/.databrickscfg. If not provided, uses default profile"` - } - - mcpsdk.AddTool(server, - &mcpsdk.Tool{ - Name: "databricks_configure_auth", - Description: "Configure authentication for Databricks. Only call when Databricks authentication has has failed to authenticate automatically or when the user explicitly asks for using a specific host or profile. Validates credentials and stores the authenticated client in the session.", - }, - func(ctx context.Context, req *mcpsdk.CallToolRequest, args ConfigureAuthInput) (*mcpsdk.CallToolResult, any, error) { - log.Debug(ctx, "databricks_configure_auth called") - - sess, err := session.GetSession(ctx) - if err != nil { - return nil, nil, err - } - - client, err := ConfigureAuth(ctx, sess, args.Host, args.Profile) - if err != nil { - return nil, nil, err - } - - message := "Authentication configured successfully" - if args.Host != nil { - message += " for host: " + *args.Host - } - if args.Profile != nil { - message += " using profile: " + *args.Profile - } - - // Get user info to confirm auth - me, err := client.CurrentUser.Me(ctx) - if err == nil && me.UserName != "" { - message += "\nAuthenticated as: " + me.UserName - } - - return mcpsdk.CreateNewTextContentResult(message), nil, nil - }, - ) - - // Register databricks_list_catalogs - mcpsdk.AddTool(server, - &mcpsdk.Tool{ - Name: "databricks_list_catalogs", - Description: "List all available Databricks catalogs", - }, - func(ctx context.Context, req *mcpsdk.CallToolRequest, args struct{}) (*mcpsdk.CallToolResult, any, error) { - log.Debug(ctx, "databricks_list_catalogs called") - client, err := NewDatabricksRestClient(ctx, p.config) - if err != nil { - return nil, nil, err - } - - result, err := client.ListCatalogs(ctx) - if err != nil { - return nil, nil, err - } - - return mcpsdk.CreateNewTextContentResult(result.Display()), nil, nil - }, - ) - - // Register databricks_list_schemas - type ListSchemasInput struct { - CatalogName string `json:"catalog_name" jsonschema:"required" jsonschema_description:"Name of the catalog"` - Filter string `json:"filter,omitempty" jsonschema_description:"Optional filter string to search schema names"` - Limit int `json:"limit,omitempty" jsonschema_description:"Maximum number of schemas to return (default: 1000)"` - Offset int `json:"offset,omitempty" jsonschema_description:"Offset for pagination (default: 0)"` - } - - mcpsdk.AddTool(server, - &mcpsdk.Tool{ - Name: "databricks_list_schemas", - Description: "List all schemas in a Databricks catalog with pagination support", - }, - func(ctx context.Context, req *mcpsdk.CallToolRequest, args ListSchemasInput) (*mcpsdk.CallToolResult, any, error) { - log.Debugf(ctx, "databricks_list_schemas called: catalog=%s", args.CatalogName) - - client, err := NewDatabricksRestClient(ctx, p.config) - if err != nil { - return nil, nil, err - } - - result, err := client.ListSchemas(ctx, &ListSchemasRequest{ - CatalogName: args.CatalogName, - Filter: &args.Filter, - Limit: args.Limit, - Offset: args.Offset, - }) - if err != nil { - return nil, nil, err - } - - return mcpsdk.CreateNewTextContentResult(result.Display()), nil, nil - }, - ) - - // Register databricks_find_tables - mcpsdk.AddTool(server, - &mcpsdk.Tool{ - Name: "databricks_find_tables", - Description: "Find tables in Databricks Unity Catalog. Supports searching within a specific catalog and schema, across all schemas in a catalog, or across all catalogs. Supports wildcard patterns (* for multiple characters, ? for single character) in table name and schema name filtering.", - }, - func(ctx context.Context, req *mcpsdk.CallToolRequest, args FindTablesInput) (*mcpsdk.CallToolResult, any, error) { - catalogName := "" - if args.CatalogName != nil { - catalogName = *args.CatalogName - } - schemaName := "" - if args.SchemaName != nil { - schemaName = *args.SchemaName - } - log.Debugf(ctx, "databricks_find_tables called: catalog=%s, schema=%s", catalogName, schemaName) - - client, err := NewDatabricksRestClient(ctx, p.config) - if err != nil { - return nil, nil, err - } - - result, err := client.ListTables(ctx, &ListTablesRequest{ - CatalogName: args.CatalogName, - SchemaName: args.SchemaName, - Filter: args.Filter, - Limit: args.Limit, - Offset: args.Offset, - }) - if err != nil { - return nil, nil, err - } - return mcpsdk.CreateNewTextContentResult(result.Display()), nil, nil - }, - ) - - // Register databricks_describe_table - type DescribeTableInput struct { - TableFullName string `json:"table_full_name" jsonschema:"required" jsonschema_description:"Full name of the table (catalog.schema.table)"` - SampleSize int `json:"sample_size,omitempty" jsonschema_description:"Number of sample rows to return (default: 5)"` - } - - mcpsdk.AddTool(server, - &mcpsdk.Tool{ - Name: "databricks_describe_table", - Description: "Get detailed information about a Databricks table including schema and optional sample data", - }, - func(ctx context.Context, req *mcpsdk.CallToolRequest, args DescribeTableInput) (*mcpsdk.CallToolResult, any, error) { - log.Debugf(ctx, "databricks_describe_table called: table=%s", args.TableFullName) - client, err := NewDatabricksRestClient(ctx, p.config) - if err != nil { - return nil, nil, err - } - - result, err := client.DescribeTable(ctx, &DescribeTableRequest{ - TableFullName: args.TableFullName, - SampleSize: args.SampleSize, - }) - if err != nil { - return nil, nil, err - } - - return mcpsdk.CreateNewTextContentResult(result.Display()), nil, nil - }, - ) - - // Register databricks_execute_query - type ExecuteQueryInput struct { - Query string `json:"query" jsonschema:"required" jsonschema_description:"SQL query to execute"` - WarehouseID *string `json:"warehouse_id,omitempty" jsonschema_description:"SQL warehouse ID (uses default from config if not provided)"` - MaxRows *int `json:"max_rows,omitempty" jsonschema_description:"Maximum rows to return (default: 1000, max: 10000)"` - Timeout *int `json:"timeout,omitempty" jsonschema_description:"Query timeout in seconds (default: 60)"` - } - - mcpsdk.AddTool(server, - &mcpsdk.Tool{ - Name: "databricks_execute_query", - Description: "Execute SQL query in Databricks. Only single SQL statements are supported - do not send multiple statements separated by semicolons. For multiple statements, call this tool separately for each one. DO NOT create catalogs, schemas or tables - requires metastore admin privileges. Query existing data instead. Returns execution time and supports configurable timeouts and row limits.", - }, - func(ctx context.Context, req *mcpsdk.CallToolRequest, args ExecuteQueryInput) (*mcpsdk.CallToolResult, any, error) { - log.Debugf(ctx, "databricks_execute_query called: query=%s", args.Query) - client, err := NewDatabricksRestClient(ctx, p.config) - if err != nil { - return nil, nil, err - } - - result, err := client.ExecuteSql(ctx, &ExecuteSqlRequest{ - Query: args.Query, - }) - if err != nil { - return nil, nil, err - } - - return mcpsdk.CreateNewTextContentResult(result.Display()), nil, nil - }, - ) - - log.Info(p.ctx, "Registered Databricks tools") - return nil -} From badd0195584a094dfcd3eba0900bff29d77ea0c1 Mon Sep 17 00:00:00 2001 From: Igor Rekun Date: Tue, 25 Nov 2025 20:18:09 +0000 Subject: [PATCH 04/18] Port back configure_auth into cli tools. --- .../apps-mcp/lib/prompts/auth_error.tmpl | 7 +- .../lib/providers/clitools/configure_auth.go | 67 +++++++++++++ .../providers/clitools/configure_auth_test.go | 96 +++++++++++++++++++ .../lib/providers/clitools/provider.go | 44 ++++++++- experimental/apps-mcp/lib/server/server.go | 2 +- 5 files changed, 210 insertions(+), 6 deletions(-) create mode 100644 experimental/apps-mcp/lib/providers/clitools/configure_auth.go create mode 100644 experimental/apps-mcp/lib/providers/clitools/configure_auth_test.go diff --git a/experimental/apps-mcp/lib/prompts/auth_error.tmpl b/experimental/apps-mcp/lib/prompts/auth_error.tmpl index e7972fec7e..96f2be6162 100644 --- a/experimental/apps-mcp/lib/prompts/auth_error.tmpl +++ b/experimental/apps-mcp/lib/prompts/auth_error.tmpl @@ -8,9 +8,8 @@ Not authenticated to Databricks I need to know either the Databricks workspace URL or the Databricks profile name. You can list the available profiles by running `databricks auth profiles`. -Please configure your authentication using one of these methods: -1. Set environment variables: DATABRICKS_HOST and DATABRICKS_TOKEN -2. Use Databricks CLI profiles: Set DATABRICKS_PROFILE or use the default profile -3. Provide authentication details in your MCP client configuration +ASK the user which of the configured profiles or databricks workspace URL they want to use. +Only then call the `databricks_configure_auth` tool to configure the authentication. +Do not run anything else before authenticating successfully. Once authenticated, you can use this tool again diff --git a/experimental/apps-mcp/lib/providers/clitools/configure_auth.go b/experimental/apps-mcp/lib/providers/clitools/configure_auth.go new file mode 100644 index 0000000000..00a40713cf --- /dev/null +++ b/experimental/apps-mcp/lib/providers/clitools/configure_auth.go @@ -0,0 +1,67 @@ +package clitools + +import ( + "context" + "errors" + "os" + + "github.com/databricks/cli/experimental/apps-mcp/lib/middlewares" + "github.com/databricks/cli/experimental/apps-mcp/lib/prompts" + "github.com/databricks/cli/experimental/apps-mcp/lib/session" + "github.com/databricks/databricks-sdk-go" + "github.com/databricks/databricks-sdk-go/config" +) + +// ConfigureAuth creates and validates a Databricks workspace client with optional host and profile. +// The authenticated client is stored in the session data for reuse across tool calls. +func ConfigureAuth(ctx context.Context, sess *session.Session, host, profile *string) (*databricks.WorkspaceClient, error) { + // Skip auth check if testing + if os.Getenv("DATABRICKS_MCP_SKIP_AUTH_CHECK") == "1" { + return nil, nil + } + + var cfg *databricks.Config + if host != nil || profile != nil { + cfg = &databricks.Config{} + if host != nil { + cfg.Host = *host + } + if profile != nil { + cfg.Profile = *profile + } + } + + var client *databricks.WorkspaceClient + var err error + if cfg != nil { + client, err = databricks.NewWorkspaceClient(cfg) + } else { + client, err = databricks.NewWorkspaceClient() + } + if err != nil { + return nil, err + } + + _, err = client.CurrentUser.Me(ctx) + if err != nil { + if profile == nil && host != nil { + return nil, errors.New(prompts.MustExecuteTemplate("auth_u2m.tmpl", map[string]string{ + "WorkspaceURL": *host, + })) + } + return nil, wrapAuthError(err) + } + + // Store client in session data + sess.Set(middlewares.DatabricksClientKey, client) + + return client, nil +} + +// wrapAuthError wraps configuration errors with helpful messages +func wrapAuthError(err error) error { + if errors.Is(err, config.ErrCannotConfigureDefault) { + return errors.New(prompts.MustExecuteTemplate("auth_error.tmpl", nil)) + } + return err +} diff --git a/experimental/apps-mcp/lib/providers/clitools/configure_auth_test.go b/experimental/apps-mcp/lib/providers/clitools/configure_auth_test.go new file mode 100644 index 0000000000..da2df50c78 --- /dev/null +++ b/experimental/apps-mcp/lib/providers/clitools/configure_auth_test.go @@ -0,0 +1,96 @@ +package clitools + +import ( + "context" + "os" + "testing" + + "github.com/databricks/cli/experimental/apps-mcp/lib/middlewares" + "github.com/databricks/cli/experimental/apps-mcp/lib/session" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestConfigureAuthWithSkipCheck(t *testing.T) { + // Set skip auth check for testing + os.Setenv("DATABRICKS_MCP_SKIP_AUTH_CHECK", "1") + defer os.Unsetenv("DATABRICKS_MCP_SKIP_AUTH_CHECK") + + ctx := context.Background() + sess := session.NewSession() + + host := "https://test.cloud.databricks.com" + profile := "test-profile" + + client, err := ConfigureAuth(ctx, sess, &host, &profile) + require.NoError(t, err) + assert.Nil(t, client) // Should be nil when skip check is enabled + + // Verify nothing was stored in session when skip check is on + _, ok := sess.Get(middlewares.DatabricksClientKey) + assert.False(t, ok) +} + +func TestConfigureAuthStoresClientInSession(t *testing.T) { + // This test requires a valid Databricks configuration + // Skip if no config is available + if os.Getenv("DATABRICKS_HOST") == "" && os.Getenv("DATABRICKS_PROFILE") == "" { + t.Skip("Skipping test: no Databricks configuration found") + } + + ctx := context.Background() + sess := session.NewSession() + + client, err := ConfigureAuth(ctx, sess, nil, nil) + require.NoError(t, err) + require.NotNil(t, client) + + // Verify client was stored in session + stored, ok := sess.Get(middlewares.DatabricksClientKey) + assert.True(t, ok) + assert.Equal(t, client, stored) +} + +func TestConfigureAuthWithCustomHost(t *testing.T) { + // This test requires valid credentials + // Skip if no config is available + if os.Getenv("DATABRICKS_HOST") == "" { + t.Skip("Skipping test: DATABRICKS_HOST not set") + } + + ctx := context.Background() + sess := session.NewSession() + + host := os.Getenv("DATABRICKS_HOST") + client, err := ConfigureAuth(ctx, sess, &host, nil) + require.NoError(t, err) + require.NotNil(t, client) + + // Verify the host was set correctly + assert.Equal(t, host, client.Config.Host) + + // Verify client was stored in session + _, ok := sess.Get(middlewares.DatabricksClientKey) + assert.True(t, ok) +} + +func TestWrapAuthError(t *testing.T) { + tests := []struct { + name string + err error + expected string + }{ + { + name: "regular error", + err: assert.AnError, + expected: assert.AnError.Error(), + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + wrapped := wrapAuthError(tt.err) + assert.Contains(t, wrapped.Error(), tt.expected) + }) + } +} diff --git a/experimental/apps-mcp/lib/providers/clitools/provider.go b/experimental/apps-mcp/lib/providers/clitools/provider.go index b119a97b3e..2dc76a330f 100644 --- a/experimental/apps-mcp/lib/providers/clitools/provider.go +++ b/experimental/apps-mcp/lib/providers/clitools/provider.go @@ -43,6 +43,48 @@ func (p *Provider) Name() string { func (p *Provider) RegisterTools(server *mcpsdk.Server) error { log.Info(p.ctx, "Registering CLI tools") + // Register databricks_configure_auth + type ConfigureAuthInput struct { + Host *string `json:"host,omitempty" jsonschema_description:"Databricks workspace URL (e.g., https://example.cloud.databricks.com). If not provided, uses default from environment or config file"` + Profile *string `json:"profile,omitempty" jsonschema_description:"Profile name from ~/.databrickscfg. If not provided, uses default profile"` + } + + mcpsdk.AddTool(server, + &mcpsdk.Tool{ + Name: "databricks_configure_auth", + Description: "Configure authentication for Databricks. Only call when Databricks authentication has has failed to authenticate automatically or when the user explicitly asks for using a specific host or profile. Validates credentials and stores the authenticated client in the session.", + }, + func(ctx context.Context, req *mcpsdk.CallToolRequest, args ConfigureAuthInput) (*mcpsdk.CallToolResult, any, error) { + log.Debug(ctx, "databricks_configure_auth called") + + sess, err := session.GetSession(ctx) + if err != nil { + return nil, nil, err + } + + client, err := ConfigureAuth(ctx, sess, args.Host, args.Profile) + if err != nil { + return nil, nil, err + } + + message := "Authentication configured successfully" + if args.Host != nil { + message += " for host: " + *args.Host + } + if args.Profile != nil { + message += " using profile: " + *args.Profile + } + + // Get user info to confirm auth + me, err := client.CurrentUser.Me(ctx) + if err == nil && me.UserName != "" { + message += "\nAuthenticated as: " + me.UserName + } + + return mcpsdk.CreateNewTextContentResult(message), nil, nil + }, + ) + // Register explore tool mcpsdk.AddTool(server, &mcpsdk.Tool{ @@ -80,6 +122,6 @@ func (p *Provider) RegisterTools(server *mcpsdk.Server) error { }, ) - log.Infof(p.ctx, "Registered CLI tools: count=%d", 2) + log.Infof(p.ctx, "Registered CLI tools: count=%d", 3) return nil } diff --git a/experimental/apps-mcp/lib/server/server.go b/experimental/apps-mcp/lib/server/server.go index 19e72b4dae..8bf7bfeb42 100644 --- a/experimental/apps-mcp/lib/server/server.go +++ b/experimental/apps-mcp/lib/server/server.go @@ -44,7 +44,7 @@ func NewServer(ctx context.Context, cfg *mcp.Config) *Server { } server.AddMiddleware(middlewares.NewToolCounterMiddleware(sess)) - server.AddMiddleware(middlewares.NewDatabricksClientMiddleware(nil)) + server.AddMiddleware(middlewares.NewDatabricksClientMiddleware([]string{"databricks_configure_auth"})) server.AddMiddleware(middlewares.NewEngineGuideMiddleware()) server.AddMiddleware(middlewares.NewTrajectoryMiddleware(tracker)) From 6f9d8cbf09b99d9915b07ff9b3e0e4da0128b275 Mon Sep 17 00:00:00 2001 From: Igor Rekun Date: Wed, 26 Nov 2025 09:21:38 +0000 Subject: [PATCH 05/18] Store warehouse endpoint in session. --- .../apps-mcp/lib/middlewares/warehouse.go | 31 ++++++++++++------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/experimental/apps-mcp/lib/middlewares/warehouse.go b/experimental/apps-mcp/lib/middlewares/warehouse.go index 04563d34f1..680b26d3ef 100644 --- a/experimental/apps-mcp/lib/middlewares/warehouse.go +++ b/experimental/apps-mcp/lib/middlewares/warehouse.go @@ -39,13 +39,13 @@ func loadWarehouseInBackground(ctx context.Context) { return } - sess.Set("warehouse_id", warehouse.Id) + sess.Set("warehouse_endpoint", warehouse) } -func GetWarehouseID(ctx context.Context) (string, error) { +func GetWarehouseEndpoint(ctx context.Context) (*sql.EndpointInfo, error) { sess, err := session.GetSession(ctx) if err != nil { - return "", err + return nil, err } // Wait for background loading if in progress @@ -57,22 +57,29 @@ func GetWarehouseID(ctx context.Context) (string, error) { // Check if there was an error during background loading if errRaw, ok := sess.Get(warehouseErrorKey); ok { sess.Delete(warehouseErrorKey) - return "", errRaw.(error) + return nil, errRaw.(error) } } - warehouseID, ok := sess.Get("warehouse_id") + warehouse, ok := sess.Get("warehouse_endpoint") if !ok { // Fallback: synchronously load if background loading didn't happen - warehouse, err := getDefaultWarehouse(ctx) + warehouse, err = getDefaultWarehouse(ctx) if err != nil { - return "", err + return nil, err } - warehouseID = warehouse.Id - sess.Set("warehouse_id", warehouseID.(string)) + sess.Set("warehouse_endpoint", warehouse) } - return warehouseID.(string), nil + return warehouse.(*sql.EndpointInfo), nil +} + +func GetWarehouseID(ctx context.Context) (string, error) { + warehouse, err := GetWarehouseEndpoint(ctx) + if err != nil { + return "", err + } + return warehouse.Id, nil } func getDefaultWarehouse(ctx context.Context) (*sql.EndpointInfo, error) { @@ -87,7 +94,9 @@ func getDefaultWarehouse(ctx context.Context) (*sql.EndpointInfo, error) { return nil, fmt.Errorf("get warehouse: %w", err) } return &sql.EndpointInfo{ - Id: warehouse.Id, + Id: warehouse.Id, + Name: warehouse.Name, + State: warehouse.State, }, nil } From 1835aa7ef0b6aa0493149b0d40b65aa25ad1730a Mon Sep 17 00:00:00 2001 From: Igor Rekun Date: Wed, 26 Nov 2025 09:25:46 +0000 Subject: [PATCH 06/18] Reuse warehouse discovery logic. --- .../lib/providers/clitools/explore.go | 64 ++----------------- 1 file changed, 5 insertions(+), 59 deletions(-) diff --git a/experimental/apps-mcp/lib/providers/clitools/explore.go b/experimental/apps-mcp/lib/providers/clitools/explore.go index 89ec5cd36b..bc9550e012 100644 --- a/experimental/apps-mcp/lib/providers/clitools/explore.go +++ b/experimental/apps-mcp/lib/providers/clitools/explore.go @@ -2,29 +2,20 @@ package clitools import ( "context" - "encoding/json" - "errors" "fmt" - "strings" - "github.com/databricks/cli/experimental/apps-mcp/lib/common" + "github.com/databricks/cli/experimental/apps-mcp/lib/middlewares" "github.com/databricks/cli/experimental/apps-mcp/lib/prompts" "github.com/databricks/cli/experimental/apps-mcp/lib/session" "github.com/databricks/cli/libs/databrickscfg/profile" "github.com/databricks/cli/libs/env" - "github.com/databricks/cli/libs/exec" "github.com/databricks/cli/libs/log" + "github.com/databricks/databricks-sdk-go/service/sql" ) -type warehouse struct { - ID string `json:"id"` - Name string `json:"name"` - State string `json:"state"` -} - // Explore provides guidance on exploring Databricks workspaces and resources. func Explore(ctx context.Context) (string, error) { - warehouse, err := GetDefaultWarehouse(ctx) + warehouse, err := middlewares.GetWarehouseEndpoint(ctx) if err != nil { log.Debugf(ctx, "Failed to get default warehouse (non-fatal): %v", err) warehouse = nil @@ -36,51 +27,6 @@ func Explore(ctx context.Context) (string, error) { return generateExploreGuidance(ctx, warehouse, currentProfile, profiles), nil } -// GetDefaultWarehouse finds a suitable SQL warehouse for queries. -// It filters out warehouses the user cannot access and prefers RUNNING warehouses, -// then falls back to STOPPED ones (which auto-start). -func GetDefaultWarehouse(ctx context.Context) (*warehouse, error) { - executor, err := exec.NewCommandExecutor("") - if err != nil { - return nil, fmt.Errorf("failed to create command executor: %w", err) - } - - cliPath := common.GetCLIPath() - output, err := executor.Exec(ctx, fmt.Sprintf(`"%s" api get "/api/2.0/sql/warehouses?skip_cannot_use=true" --output json`, cliPath)) - if err != nil { - return nil, fmt.Errorf("failed to list warehouses: %w\nOutput: %s", err, output) - } - - var response struct { - Warehouses []warehouse `json:"warehouses"` - } - if err := json.Unmarshal(output, &response); err != nil { - return nil, fmt.Errorf("failed to parse warehouses: %w", err) - } - warehouses := response.Warehouses - - if len(warehouses) == 0 { - return nil, errors.New("no SQL warehouses found in workspace") - } - - // Prefer RUNNING warehouses - for i := range warehouses { - if strings.ToUpper(warehouses[i].State) == "RUNNING" { - return &warehouses[i], nil - } - } - - // Fall back to STOPPED warehouses (they auto-start when queried) - for i := range warehouses { - if strings.ToUpper(warehouses[i].State) == "STOPPED" { - return &warehouses[i], nil - } - } - - // Return first available warehouse regardless of state - return &warehouses[0], nil -} - // getCurrentProfile returns the currently active profile name. func getCurrentProfile(ctx context.Context) string { // Check DATABRICKS_CONFIG_PROFILE env var @@ -102,7 +48,7 @@ func getAvailableProfiles(ctx context.Context) profile.Profiles { } // generateExploreGuidance creates comprehensive guidance for data exploration. -func generateExploreGuidance(ctx context.Context, warehouse *warehouse, currentProfile string, profiles profile.Profiles) string { +func generateExploreGuidance(ctx context.Context, warehouse *sql.EndpointInfo, currentProfile string, profiles profile.Profiles) string { // Build workspace/profile information workspaceInfo := "Current Workspace Profile: " + currentProfile if len(profiles) > 0 { @@ -147,7 +93,7 @@ func generateExploreGuidance(ctx context.Context, warehouse *warehouse, currentP warehouseID := "" if warehouse != nil { warehouseName = warehouse.Name - warehouseID = warehouse.ID + warehouseID = warehouse.Id } // Prepare template data From f8d53450708cc639cbfa45f93b68b186d8d490bf Mon Sep 17 00:00:00 2001 From: Igor Rekun Date: Wed, 26 Nov 2025 11:31:57 +0000 Subject: [PATCH 07/18] Use tRPC bundle for apps. --- experimental/apps-mcp/lib/prompts/apps.tmpl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/experimental/apps-mcp/lib/prompts/apps.tmpl b/experimental/apps-mcp/lib/prompts/apps.tmpl index 12d6c3ad6d..7243fd88f9 100644 --- a/experimental/apps-mcp/lib/prompts/apps.tmpl +++ b/experimental/apps-mcp/lib/prompts/apps.tmpl @@ -11,7 +11,7 @@ DATABRICKS APPS DEVELOPMENT Initialize a New App Bundle: To create a new Databricks app using the Streamlit template: - invoke_databricks_cli 'bundle init https://github.com/databricks/bundle-examples --template-dir contrib/templates/streamlit-app --config-file /dev/stdin < Date: Wed, 26 Nov 2025 13:10:31 +0000 Subject: [PATCH 08/18] Adjust instructions. --- experimental/apps-mcp/lib/prompts/apps.tmpl | 26 +++++-------------- .../apps-mcp/lib/prompts/explore.tmpl | 2 +- .../clitools/invoke_databricks_cli.go | 9 ++----- .../lib/providers/clitools/provider.go | 5 ++-- 4 files changed, 12 insertions(+), 30 deletions(-) diff --git a/experimental/apps-mcp/lib/prompts/apps.tmpl b/experimental/apps-mcp/lib/prompts/apps.tmpl index 7243fd88f9..5d48f1d817 100644 --- a/experimental/apps-mcp/lib/prompts/apps.tmpl +++ b/experimental/apps-mcp/lib/prompts/apps.tmpl @@ -9,25 +9,13 @@ DATABRICKS APPS DEVELOPMENT ============================ -Initialize a New App Bundle: - To create a new Databricks app using the Streamlit template: - invoke_databricks_cli 'bundle init https://github.com/neondatabase/appdotbuild-agent --template-dir edda/edda_templates/trpc_bundle --config-file /dev/stdin <' +invoke_databricks_cli 'bundle init https://github.com/neondatabase/appdotbuild-agent --template-dir edda/edda_templates/trpc_bundle --config-file /dev/stdin <' diff --git a/experimental/apps-mcp/lib/prompts/explore.tmpl b/experimental/apps-mcp/lib/prompts/explore.tmpl index 6bed77c18a..e6a91bc8c9 100644 --- a/experimental/apps-mcp/lib/prompts/explore.tmpl +++ b/experimental/apps-mcp/lib/prompts/explore.tmpl @@ -19,7 +19,7 @@ IMPORTANT: Use the invoke_databricks_cli tool to run all commands below! 1. EXECUTING SQL QUERIES Run queries with auto-wait (max 50s): - invoke_databricks_cli 'api post /api/2.0/sql/statements --json {"warehouse_id":"{{if .WarehouseID}}{{.WarehouseID}}{{else}}{{end}}","statement":"SELECT * FROM ..
LIMIT 10","wait_timeout":"50s"}' + invoke_databricks_cli 'api post /api/2.0/sql/statements --json '{"warehouse_id":"{{if .WarehouseID}}{{.WarehouseID}}{{else}}{{end}}","statement":"SELECT * FROM ..
LIMIT 10","wait_timeout":"50s"}' Response has status.state: - "SUCCEEDED" → Results in result.data_array (you're done!) diff --git a/experimental/apps-mcp/lib/providers/clitools/invoke_databricks_cli.go b/experimental/apps-mcp/lib/providers/clitools/invoke_databricks_cli.go index 150f295afa..4f584993ef 100644 --- a/experimental/apps-mcp/lib/providers/clitools/invoke_databricks_cli.go +++ b/experimental/apps-mcp/lib/providers/clitools/invoke_databricks_cli.go @@ -10,17 +10,12 @@ import ( ) // InvokeDatabricksCLI runs a Databricks CLI command and returns the output. -func InvokeDatabricksCLI(ctx context.Context, command string, workingDirectory *string) (string, error) { +func InvokeDatabricksCLI(ctx context.Context, command string) (string, error) { if command == "" { return "", errors.New("command is required") } - workDir := "." - if workingDirectory != nil && *workingDirectory != "" { - workDir = *workingDirectory - } - - executor, err := exec.NewCommandExecutor(workDir) + executor, err := exec.NewCommandExecutor(".") if err != nil { return "", fmt.Errorf("failed to create command executor: %w", err) } diff --git a/experimental/apps-mcp/lib/providers/clitools/provider.go b/experimental/apps-mcp/lib/providers/clitools/provider.go index 2dc76a330f..967787d098 100644 --- a/experimental/apps-mcp/lib/providers/clitools/provider.go +++ b/experimental/apps-mcp/lib/providers/clitools/provider.go @@ -103,8 +103,7 @@ func (p *Provider) RegisterTools(server *mcpsdk.Server) error { // Register invoke_databricks_cli tool type InvokeDatabricksCLIInput struct { - Command string `json:"command" jsonschema:"required" jsonschema_description:"The full Databricks CLI command to run, e.g. 'bundle deploy' or 'bundle validate'. Do not include the 'databricks' prefix."` - WorkingDirectory *string `json:"working_directory,omitempty" jsonschema_description:"Optional. The directory to run the command in. Defaults to the current directory."` + Command string `json:"command" jsonschema:"required" jsonschema_description:"The full Databricks CLI command to run, e.g. 'bundle deploy' or 'bundle validate'. Do not include the 'databricks' prefix."` } mcpsdk.AddTool(server, @@ -114,7 +113,7 @@ func (p *Provider) RegisterTools(server *mcpsdk.Server) error { }, func(ctx context.Context, req *mcpsdk.CallToolRequest, args InvokeDatabricksCLIInput) (*mcpsdk.CallToolResult, any, error) { log.Debugf(ctx, "invoke_databricks_cli called: command=%s", args.Command) - result, err := InvokeDatabricksCLI(ctx, args.Command, args.WorkingDirectory) + result, err := InvokeDatabricksCLI(ctx, args.Command) if err != nil { return nil, nil, err } From b412143ed729dbdf6965d48427b52fde3cd52c59 Mon Sep 17 00:00:00 2001 From: Arseny Kravchenko Date: Wed, 26 Nov 2025 15:55:52 +0100 Subject: [PATCH 09/18] Add MCP tools subcommands: query, discover-schema, init-template --- experimental/apps-mcp/cmd/apps_mcp.go | 1 + experimental/apps-mcp/cmd/discover_schema.go | 229 ++++++++++++++++++ experimental/apps-mcp/cmd/init_template.go | 105 ++++++++ experimental/apps-mcp/cmd/query.go | 140 +++++++++++ experimental/apps-mcp/cmd/tools.go | 19 ++ .../lib/providers/clitools/provider.go | 29 ++- 6 files changed, 521 insertions(+), 2 deletions(-) create mode 100644 experimental/apps-mcp/cmd/discover_schema.go create mode 100644 experimental/apps-mcp/cmd/init_template.go create mode 100644 experimental/apps-mcp/cmd/query.go create mode 100644 experimental/apps-mcp/cmd/tools.go diff --git a/experimental/apps-mcp/cmd/apps_mcp.go b/experimental/apps-mcp/cmd/apps_mcp.go index 8e408aba01..31709675e2 100644 --- a/experimental/apps-mcp/cmd/apps_mcp.go +++ b/experimental/apps-mcp/cmd/apps_mcp.go @@ -57,6 +57,7 @@ The server communicates via stdio using the Model Context Protocol.`, cmd.Flags().BoolVar(&withWorkspaceTools, "with-workspace-tools", false, "Enable workspace tools (file operations, bash, grep, glob)") cmd.AddCommand(newInstallCmd()) + cmd.AddCommand(newToolsCmd()) return cmd } diff --git a/experimental/apps-mcp/cmd/discover_schema.go b/experimental/apps-mcp/cmd/discover_schema.go new file mode 100644 index 0000000000..2f34c8781a --- /dev/null +++ b/experimental/apps-mcp/cmd/discover_schema.go @@ -0,0 +1,229 @@ +package mcp + +import ( + "context" + "errors" + "fmt" + "strings" + + "github.com/databricks/cli/cmd/root" + "github.com/databricks/cli/experimental/apps-mcp/lib/middlewares" + "github.com/databricks/cli/experimental/apps-mcp/lib/session" + "github.com/databricks/cli/libs/cmdctx" + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/databricks-sdk-go" + dbsql "github.com/databricks/databricks-sdk-go/service/sql" + "github.com/spf13/cobra" +) + +func newDiscoverSchemaCmd() *cobra.Command { + cmd := &cobra.Command{ + Use: "discover-schema TABLE...", + Short: "Discover schema for one or more tables", + Long: `Batch discover table metadata including columns, types, sample data, and null counts. + +Tables must be specified in CATALOG.SCHEMA.TABLE format. + +For each table, returns: +- Column names and types +- Sample data (5 rows) +- Null counts per column +- Total row count`, + Example: ` databricks experimental apps-mcp discover-schema samples.nyctaxi.trips + databricks experimental apps-mcp discover-schema catalog.schema.table1 catalog.schema.table2`, + Args: cobra.MinimumNArgs(1), + PreRunE: root.MustWorkspaceClient, + RunE: func(cmd *cobra.Command, args []string) error { + ctx := cmd.Context() + w := cmdctx.WorkspaceClient(ctx) + + // validate table names + for _, table := range args { + parts := strings.Split(table, ".") + if len(parts) != 3 { + return fmt.Errorf("invalid table format %q: expected CATALOG.SCHEMA.TABLE", table) + } + } + + // set up session with client for middleware compatibility + sess := session.NewSession() + sess.Set(middlewares.DatabricksClientKey, w) + ctx = session.WithSession(ctx, sess) + + warehouseID, err := middlewares.GetWarehouseID(ctx) + if err != nil { + return err + } + + var results []string + for _, table := range args { + result, err := discoverTable(ctx, w, warehouseID, table) + if err != nil { + result = fmt.Sprintf("Error discovering %s: %v", table, err) + } + results = append(results, result) + } + + // format output with dividers for multiple tables + var output string + if len(results) == 1 { + output = results[0] + } else { + divider := strings.Repeat("-", 70) + for i, result := range results { + if i > 0 { + output += "\n" + divider + "\n" + } + output += fmt.Sprintf("TABLE: %s\n%s\n", args[i], divider) + output += result + } + } + + cmdio.LogString(ctx, output) + return nil + }, + } + + return cmd +} + +func discoverTable(ctx context.Context, w *databricks.WorkspaceClient, warehouseID, table string) (string, error) { + var sb strings.Builder + + // 1. describe table - get columns and types + describeSQL := "DESCRIBE TABLE " + table + descResp, err := executeSQL(ctx, w, warehouseID, describeSQL) + if err != nil { + return "", fmt.Errorf("describe table: %w", err) + } + + columns, types := parseDescribeResult(descResp) + if len(columns) == 0 { + return "", errors.New("no columns found") + } + + sb.WriteString("COLUMNS:\n") + for i, col := range columns { + sb.WriteString(fmt.Sprintf(" %s: %s\n", col, types[i])) + } + + // 2. sample data (5 rows) + sampleSQL := fmt.Sprintf("SELECT * FROM %s LIMIT 5", table) + sampleResp, err := executeSQL(ctx, w, warehouseID, sampleSQL) + if err != nil { + sb.WriteString(fmt.Sprintf("\nSAMPLE DATA: Error - %v\n", err)) + } else { + sb.WriteString("\nSAMPLE DATA:\n") + sb.WriteString(formatTableData(sampleResp)) + } + + // 3. null counts per column + nullCountExprs := make([]string, len(columns)) + for i, col := range columns { + nullCountExprs[i] = fmt.Sprintf("SUM(CASE WHEN `%s` IS NULL THEN 1 ELSE 0 END) AS `%s_nulls`", col, col) + } + nullSQL := fmt.Sprintf("SELECT COUNT(*) AS total_rows, %s FROM %s", + strings.Join(nullCountExprs, ", "), table) + + nullResp, err := executeSQL(ctx, w, warehouseID, nullSQL) + if err != nil { + sb.WriteString(fmt.Sprintf("\nNULL COUNTS: Error - %v\n", err)) + } else { + sb.WriteString("\nNULL COUNTS:\n") + sb.WriteString(formatNullCounts(nullResp, columns)) + } + + return sb.String(), nil +} + +func executeSQL(ctx context.Context, w *databricks.WorkspaceClient, warehouseID, statement string) (*dbsql.StatementResponse, error) { + resp, err := w.StatementExecution.ExecuteAndWait(ctx, dbsql.ExecuteStatementRequest{ + WarehouseId: warehouseID, + Statement: statement, + WaitTimeout: "50s", + }) + if err != nil { + return nil, err + } + + if resp.Status != nil && resp.Status.State == dbsql.StatementStateFailed { + errMsg := "query failed" + if resp.Status.Error != nil { + errMsg = resp.Status.Error.Message + } + return nil, errors.New(errMsg) + } + + return resp, nil +} + +func parseDescribeResult(resp *dbsql.StatementResponse) (columns, types []string) { + if resp.Result == nil || resp.Result.DataArray == nil { + return nil, nil + } + + for _, row := range resp.Result.DataArray { + if len(row) < 2 { + continue + } + colName := row[0] + colType := row[1] + // skip partition/metadata rows (they start with #) + if strings.HasPrefix(colName, "#") || colName == "" { + continue + } + columns = append(columns, colName) + types = append(types, colType) + } + return columns, types +} + +func formatTableData(resp *dbsql.StatementResponse) string { + if resp.Result == nil || resp.Result.DataArray == nil || len(resp.Result.DataArray) == 0 { + return " (no data)\n" + } + + var sb strings.Builder + var columns []string + if resp.Manifest != nil && resp.Manifest.Schema != nil { + for _, col := range resp.Manifest.Schema.Columns { + columns = append(columns, col.Name) + } + } + + for i, row := range resp.Result.DataArray { + sb.WriteString(fmt.Sprintf(" Row %d:\n", i+1)) + for j, val := range row { + colName := fmt.Sprintf("col%d", j) + if j < len(columns) { + colName = columns[j] + } + sb.WriteString(fmt.Sprintf(" %s: %v\n", colName, val)) + } + } + return sb.String() +} + +func formatNullCounts(resp *dbsql.StatementResponse, columns []string) string { + if resp.Result == nil || resp.Result.DataArray == nil || len(resp.Result.DataArray) == 0 { + return " (no data)\n" + } + + row := resp.Result.DataArray[0] + var sb strings.Builder + + // first value is total_rows + if len(row) > 0 { + sb.WriteString(fmt.Sprintf(" total_rows: %v\n", row[0])) + } + + // remaining values are null counts per column + for i, col := range columns { + idx := i + 1 + if idx < len(row) { + sb.WriteString(fmt.Sprintf(" %s_nulls: %v\n", col, row[idx])) + } + } + + return sb.String() +} diff --git a/experimental/apps-mcp/cmd/init_template.go b/experimental/apps-mcp/cmd/init_template.go new file mode 100644 index 0000000000..263e5295fa --- /dev/null +++ b/experimental/apps-mcp/cmd/init_template.go @@ -0,0 +1,105 @@ +package mcp + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" + + "github.com/databricks/cli/cmd/root" + "github.com/databricks/cli/experimental/apps-mcp/lib/middlewares" + "github.com/databricks/cli/experimental/apps-mcp/lib/session" + "github.com/databricks/cli/libs/cmdctx" + "github.com/databricks/cli/libs/template" + "github.com/spf13/cobra" +) + +const ( + defaultTemplateRepo = "https://github.com/databricks/cli" + defaultTemplateDir = "experimental/apps-mcp/lib/templates/trpc" +) + +func newInitTemplateCmd() *cobra.Command { + cmd := &cobra.Command{ + Use: "init-template PROJECT_NAME", + Short: "Initialize a new app from template", + Long: `Initialize a new Databricks app from the tRPC template. + +This is a shortcut for 'bundle init' with the default MCP app template. +Auto-detects the SQL warehouse ID unless DATABRICKS_WAREHOUSE_ID is set. + +PROJECT_NAME is used as both the project name and the output directory.`, + Example: ` databricks experimental apps-mcp tools init-template my-app`, + Args: cobra.ExactArgs(1), + PreRunE: root.MustWorkspaceClient, + RunE: func(cmd *cobra.Command, args []string) error { + ctx := cmd.Context() + w := cmdctx.WorkspaceClient(ctx) + + outputDir := args[0] + projectName := filepath.Base(outputDir) + if !filepath.IsAbs(outputDir) { + cwd, err := os.Getwd() + if err != nil { + return fmt.Errorf("get working directory: %w", err) + } + outputDir = filepath.Join(cwd, outputDir) + } + + // set up session with client for middleware compatibility + sess := session.NewSession() + sess.Set(middlewares.DatabricksClientKey, w) + ctx = session.WithSession(ctx, sess) + + warehouseID, err := middlewares.GetWarehouseID(ctx) + if err != nil { + return err + } + + // create temp config file with parameters + configMap := map[string]string{ + "project_name": projectName, + "sql_warehouse_id": warehouseID, + } + configBytes, err := json.Marshal(configMap) + if err != nil { + return fmt.Errorf("marshal config: %w", err) + } + + tmpFile, err := os.CreateTemp("", "mcp-template-config-*.json") + if err != nil { + return fmt.Errorf("create temp config file: %w", err) + } + defer os.Remove(tmpFile.Name()) + + if _, err := tmpFile.Write(configBytes); err != nil { + return fmt.Errorf("write config file: %w", err) + } + if err := tmpFile.Close(); err != nil { + return fmt.Errorf("close config file: %w", err) + } + + r := template.Resolver{ + TemplatePathOrUrl: defaultTemplateRepo, + ConfigFile: tmpFile.Name(), + OutputDir: outputDir, + TemplateDir: defaultTemplateDir, + } + + tmpl, err := r.Resolve(ctx) + if err != nil { + return err + } + defer tmpl.Reader.Cleanup(ctx) + + err = tmpl.Writer.Materialize(ctx, tmpl.Reader) + if err != nil { + return err + } + tmpl.Writer.LogTelemetry(ctx) + return nil + }, + } + + return cmd +} diff --git a/experimental/apps-mcp/cmd/query.go b/experimental/apps-mcp/cmd/query.go new file mode 100644 index 0000000000..85053676b2 --- /dev/null +++ b/experimental/apps-mcp/cmd/query.go @@ -0,0 +1,140 @@ +package mcp + +import ( + "encoding/json" + "errors" + "fmt" + "strings" + + "github.com/databricks/cli/cmd/root" + "github.com/databricks/cli/experimental/apps-mcp/lib/middlewares" + "github.com/databricks/cli/experimental/apps-mcp/lib/session" + "github.com/databricks/cli/libs/cmdctx" + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/databricks-sdk-go/service/sql" + "github.com/spf13/cobra" +) + +func newQueryCmd() *cobra.Command { + cmd := &cobra.Command{ + Use: "query SQL", + Short: "Execute SQL against Databricks warehouse", + Long: `Execute a SQL statement against a Databricks SQL warehouse and return results. + +The command auto-detects an available warehouse unless DATABRICKS_WAREHOUSE_ID is set. + +Output includes the query results as JSON and row count.`, + Example: ` databricks experimental apps-mcp query "SELECT * FROM samples.nyctaxi.trips LIMIT 5"`, + Args: cobra.ExactArgs(1), + PreRunE: root.MustWorkspaceClient, + RunE: func(cmd *cobra.Command, args []string) error { + ctx := cmd.Context() + w := cmdctx.WorkspaceClient(ctx) + + sqlStatement := cleanSQL(args[0]) + if sqlStatement == "" { + return errors.New("SQL statement is required") + } + + // set up session with client for middleware compatibility + sess := session.NewSession() + sess.Set(middlewares.DatabricksClientKey, w) + ctx = session.WithSession(ctx, sess) + + warehouseID, err := middlewares.GetWarehouseID(ctx) + if err != nil { + return err + } + + resp, err := w.StatementExecution.ExecuteAndWait(ctx, sql.ExecuteStatementRequest{ + WarehouseId: warehouseID, + Statement: sqlStatement, + WaitTimeout: "50s", + }) + if err != nil { + return fmt.Errorf("execute statement: %w", err) + } + + if resp.Status != nil && resp.Status.State == sql.StatementStateFailed { + errMsg := "query failed" + if resp.Status.Error != nil { + errMsg = resp.Status.Error.Message + } + return errors.New(errMsg) + } + + output, err := formatQueryResult(resp) + if err != nil { + return err + } + + cmdio.LogString(ctx, output) + return nil + }, + } + + return cmd +} + +// cleanSQL removes surrounding quotes, empty lines, and SQL comments. +func cleanSQL(s string) string { + s = strings.TrimSpace(s) + // remove surrounding quotes if present + if (strings.HasPrefix(s, `"`) && strings.HasSuffix(s, `"`)) || + (strings.HasPrefix(s, `'`) && strings.HasSuffix(s, `'`)) { + s = s[1 : len(s)-1] + } + + var lines []string + for _, line := range strings.Split(s, "\n") { + line = strings.TrimSpace(line) + // skip empty lines and single-line comments + if line == "" || strings.HasPrefix(line, "--") { + continue + } + lines = append(lines, line) + } + return strings.Join(lines, "\n") +} + +func formatQueryResult(resp *sql.StatementResponse) (string, error) { + var sb strings.Builder + + if resp.Manifest == nil || resp.Result == nil { + sb.WriteString("Query executed successfully (no results)\n") + return sb.String(), nil + } + + // get column names + var columns []string + if resp.Manifest.Schema != nil { + for _, col := range resp.Manifest.Schema.Columns { + columns = append(columns, col.Name) + } + } + + // format as JSON array for consistency with Neon API + var rows []map[string]any + if resp.Result.DataArray != nil { + for _, row := range resp.Result.DataArray { + rowMap := make(map[string]any) + for i, val := range row { + if i < len(columns) { + rowMap[columns[i]] = val + } + } + rows = append(rows, rowMap) + } + } + + output, err := json.MarshalIndent(rows, "", " ") + if err != nil { + return "", fmt.Errorf("marshal results: %w", err) + } + + sb.Write(output) + sb.WriteString("\n\n") + sb.WriteString(fmt.Sprintf("Row count: %d\n", len(rows))) + + return sb.String(), nil +} diff --git a/experimental/apps-mcp/cmd/tools.go b/experimental/apps-mcp/cmd/tools.go new file mode 100644 index 0000000000..4459babe6c --- /dev/null +++ b/experimental/apps-mcp/cmd/tools.go @@ -0,0 +1,19 @@ +package mcp + +import ( + "github.com/spf13/cobra" +) + +func newToolsCmd() *cobra.Command { + cmd := &cobra.Command{ + Use: "tools", + Short: "MCP tools for AI agents", + Hidden: true, + } + + cmd.AddCommand(newQueryCmd()) + cmd.AddCommand(newDiscoverSchemaCmd()) + cmd.AddCommand(newInitTemplateCmd()) + + return cmd +} diff --git a/experimental/apps-mcp/lib/providers/clitools/provider.go b/experimental/apps-mcp/lib/providers/clitools/provider.go index 967787d098..2e6dea2512 100644 --- a/experimental/apps-mcp/lib/providers/clitools/provider.go +++ b/experimental/apps-mcp/lib/providers/clitools/provider.go @@ -108,8 +108,33 @@ func (p *Provider) RegisterTools(server *mcpsdk.Server) error { mcpsdk.AddTool(server, &mcpsdk.Tool{ - Name: "invoke_databricks_cli", - Description: "Run any Databricks CLI command. Use this tool whenever you need to run databricks CLI commands like 'bundle deploy', 'bundle validate', 'bundle run', 'auth login', etc. The reason this tool exists (instead of invoking the databricks CLI directly) is to make it easier for users to allow-list commands.", + Name: "invoke_databricks_cli", + Description: `Execute Databricks CLI command. Pass all arguments as a single string. + +## ⚡ EFFICIENT DATA DISCOVERY (Recommended): +1. 'catalogs list' → find available catalogs +2. 'schemas list CATALOG' → find schemas in catalog +3. 'tables list CATALOG SCHEMA' → find tables in schema +4. 'experimental apps-mcp tools discover-schema TABLE1 TABLE2 TABLE3' → **BATCH discover multiple tables in ONE call** ⚡ + +## Data Commands: +- Execute SQL: 'experimental apps-mcp tools query "SELECT * FROM table LIMIT 5"' (returns JSON + row count) +- Discover schema: 'experimental apps-mcp tools discover-schema TABLE1 TABLE2 ...' (columns, types, samples, nulls) + ↳ ALWAYS use batch mode: 'experimental apps-mcp tools discover-schema tbl1 tbl2 tbl3' instead of 3 separate calls + ↳ Table format: CATALOG.SCHEMA.TABLE (e.g., samples.nyctaxi.trips) + +## Project Commands: +- Init template: 'experimental apps-mcp tools init-template PROJECT_NAME' → create new app from tRPC template +- Bundle commands: 'bundle deploy', 'bundle validate', 'bundle run JOB_NAME' + +## Common Errors: +❌ 'tables list samples.tpcds_sf1' → Wrong format! +✅ 'tables list samples tpcds_sf1' → Correct (CATALOG SCHEMA as separate args) + +## Best Practices: +✅ Use batch discover-schema for multiple tables (faster) +✅ Test SQL with 'experimental apps-mcp tools query' before implementing in code +✅ Use 'experimental apps-mcp tools init-template' to scaffold new projects`, }, func(ctx context.Context, req *mcpsdk.CallToolRequest, args InvokeDatabricksCLIInput) (*mcpsdk.CallToolResult, any, error) { log.Debugf(ctx, "invoke_databricks_cli called: command=%s", args.Command) From e7042cfa974a7e0331c179991e649f8670836f2b Mon Sep 17 00:00:00 2001 From: Igor Rekun Date: Wed, 26 Nov 2025 16:22:23 +0000 Subject: [PATCH 10/18] Add validation tool. --- experimental/apps-mcp/cmd/tools.go | 1 + experimental/apps-mcp/cmd/validate.go | 92 +++++++++++++++++++ .../apps-mcp/lib/validation/command.go | 43 +++++++++ .../apps-mcp/lib/validation/custom.go | 45 +++++++++ .../apps-mcp/lib/validation/nodejs.go | 82 +++++++++++++++++ experimental/apps-mcp/lib/validation/types.go | 55 +++++++++++ 6 files changed, 318 insertions(+) create mode 100644 experimental/apps-mcp/cmd/validate.go create mode 100644 experimental/apps-mcp/lib/validation/command.go create mode 100644 experimental/apps-mcp/lib/validation/custom.go create mode 100644 experimental/apps-mcp/lib/validation/nodejs.go create mode 100644 experimental/apps-mcp/lib/validation/types.go diff --git a/experimental/apps-mcp/cmd/tools.go b/experimental/apps-mcp/cmd/tools.go index 4459babe6c..a040f2f9f0 100644 --- a/experimental/apps-mcp/cmd/tools.go +++ b/experimental/apps-mcp/cmd/tools.go @@ -14,6 +14,7 @@ func newToolsCmd() *cobra.Command { cmd.AddCommand(newQueryCmd()) cmd.AddCommand(newDiscoverSchemaCmd()) cmd.AddCommand(newInitTemplateCmd()) + cmd.AddCommand(newValidateCmd()) return cmd } diff --git a/experimental/apps-mcp/cmd/validate.go b/experimental/apps-mcp/cmd/validate.go new file mode 100644 index 0000000000..d9fe70337b --- /dev/null +++ b/experimental/apps-mcp/cmd/validate.go @@ -0,0 +1,92 @@ +package mcp + +import ( + "errors" + "fmt" + "os" + "path/filepath" + + "github.com/databricks/cli/experimental/apps-mcp/lib/validation" + "github.com/databricks/cli/libs/cmdio" + "github.com/spf13/cobra" +) + +// getValidator returns the appropriate validator based on the validator type. +func getValidator(validatorType, customCommand string) (validation.Validation, error) { + switch validatorType { + case "nodejs": + return &validation.ValidationNodeJs{}, nil + case "custom": + if customCommand == "" { + return nil, errors.New("--custom-command is required when --validator=custom") + } + return &validation.ValidationCmd{Command: customCommand}, nil + default: + return nil, fmt.Errorf("unknown validator: %s (available: nodejs, custom)", validatorType) + } +} + +func newValidateCmd() *cobra.Command { + var ( + validatorType string + customCommand string + ) + + cmd := &cobra.Command{ + Use: "validate ", + Short: "Validate a Databricks app project", + Long: `Validate a Databricks app project by running build, type checks, tests, etc. + +Supports multiple validation strategies for different project types. + +Exit codes: + 0 - Validation succeeded + 1 - Validation failed + 2 - Invalid flags or configuration`, + Example: ` databricks experimental apps-mcp tools validate ./my-project + databricks experimental apps-mcp tools validate ./my-project --validator=custom --custom-command="./validate.sh"`, + Args: cobra.ExactArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + ctx := cmd.Context() + workDir := args[0] + + // Validate directory exists + absPath, err := filepath.Abs(workDir) + if err != nil { + return fmt.Errorf("invalid work directory path: %w", err) + } + + if _, err := os.Stat(absPath); os.IsNotExist(err) { + return fmt.Errorf("work directory does not exist: %s", absPath) + } + + // Get validator + validator, err := getValidator(validatorType, customCommand) + if err != nil { + return err + } + + // Run validation + result, err := validator.Validate(ctx, absPath) + if err != nil { + return fmt.Errorf("validation error: %w", err) + } + + // Output result + cmdio.LogString(ctx, result.String()) + + // Return appropriate exit code + if !result.Success { + return errors.New("validation failed") + } + return nil + }, + } + + cmd.Flags().StringVar(&validatorType, "validator", "nodejs", + "Validator to use: nodejs or custom") + cmd.Flags().StringVar(&customCommand, "custom-command", "", + "Custom command to run (for validator=custom)") + + return cmd +} diff --git a/experimental/apps-mcp/lib/validation/command.go b/experimental/apps-mcp/lib/validation/command.go new file mode 100644 index 0000000000..2fd619baef --- /dev/null +++ b/experimental/apps-mcp/lib/validation/command.go @@ -0,0 +1,43 @@ +package validation + +import ( + "bytes" + "context" + "fmt" + "os/exec" +) + +// runCommand executes a shell command in the specified directory. +// Returns ValidationDetail on failure, nil on success. +func runCommand(ctx context.Context, workDir, command string) *ValidationDetail { + cmd := exec.CommandContext(ctx, "sh", "-c", command) + cmd.Dir = workDir + + var stdout, stderr bytes.Buffer + cmd.Stdout = &stdout + cmd.Stderr = &stderr + + err := cmd.Run() + exitCode := 0 + if err != nil { + if exitErr, ok := err.(*exec.ExitError); ok { + exitCode = exitErr.ExitCode() + } else { + return &ValidationDetail{ + ExitCode: -1, + Stdout: stdout.String(), + Stderr: fmt.Sprintf("Failed to execute command: %v\nStderr: %s", err, stderr.String()), + } + } + } + + if exitCode != 0 { + return &ValidationDetail{ + ExitCode: exitCode, + Stdout: stdout.String(), + Stderr: stderr.String(), + } + } + + return nil +} diff --git a/experimental/apps-mcp/lib/validation/custom.go b/experimental/apps-mcp/lib/validation/custom.go new file mode 100644 index 0000000000..35ab4ec476 --- /dev/null +++ b/experimental/apps-mcp/lib/validation/custom.go @@ -0,0 +1,45 @@ +package validation + +import ( + "context" + "fmt" + "time" + + "github.com/databricks/cli/libs/log" +) + +// ValidationCmd implements validation using a custom command specified by the user. +type ValidationCmd struct { + Command string +} + +func (v *ValidationCmd) Validate(ctx context.Context, workDir string) (*ValidateResult, error) { + log.Infof(ctx, "starting custom validation: command=%s", v.Command) + startTime := time.Now() + var progressLog []string + + progressLog = append(progressLog, "🔄 Starting custom validation: "+v.Command) + + fullCommand := v.Command + err := runCommand(ctx, workDir, fullCommand) + if err != nil { + duration := time.Since(startTime) + log.Errorf(ctx, "custom validation command failed (duration: %.1fs, error: %v)", duration.Seconds(), err) + progressLog = append(progressLog, fmt.Sprintf("❌ Command failed (%.1fs): %v", duration.Seconds(), err)) + return &ValidateResult{ + Success: false, + Message: "Custom validation command failed", + Details: err, + ProgressLog: progressLog, + }, nil + } + + duration := time.Since(startTime) + log.Infof(ctx, "✓ custom validation passed: duration=%.1fs", duration.Seconds()) + progressLog = append(progressLog, fmt.Sprintf("✅ Custom validation passed (%.1fs)", duration.Seconds())) + return &ValidateResult{ + Success: true, + Message: "Custom validation passed", + ProgressLog: progressLog, + }, nil +} diff --git a/experimental/apps-mcp/lib/validation/nodejs.go b/experimental/apps-mcp/lib/validation/nodejs.go new file mode 100644 index 0000000000..43b7451eaa --- /dev/null +++ b/experimental/apps-mcp/lib/validation/nodejs.go @@ -0,0 +1,82 @@ +package validation + +import ( + "context" + "fmt" + "time" + + "github.com/databricks/cli/libs/log" +) + +// ValidationNodeJs implements validation for Node.js-based projects using build, type check, and tests. +type ValidationNodeJs struct{} + +type validationStep struct { + name string + command string + errorPrefix string + displayName string +} + +func (v *ValidationNodeJs) Validate(ctx context.Context, workDir string) (*ValidateResult, error) { + log.Info(ctx, "Starting Node.js validation: build + typecheck + tests") + startTime := time.Now() + var progressLog []string + + progressLog = append(progressLog, "🔄 Starting Node.js validation: build + typecheck + tests") + + steps := []validationStep{ + { + name: "build", + command: "npm run build --if-present", + errorPrefix: "Failed to run npm build", + displayName: "Build", + }, + { + name: "typecheck", + command: "npm run typecheck --if-present", + errorPrefix: "Failed to run client typecheck", + displayName: "Type check", + }, + { + name: "tests", + command: "npm run test --if-present", + errorPrefix: "Failed to run tests", + displayName: "Tests", + }, + } + + for i, step := range steps { + stepNum := fmt.Sprintf("%d/%d", i+1, len(steps)) + log.Infof(ctx, "step %s: running %s...", stepNum, step.name) + progressLog = append(progressLog, fmt.Sprintf("⏳ Step %s: Running %s...", stepNum, step.displayName)) + + stepStart := time.Now() + err := runCommand(ctx, workDir, step.command) + if err != nil { + stepDuration := time.Since(stepStart) + log.Errorf(ctx, "%s failed (duration: %.1fs)", step.name, stepDuration.Seconds()) + progressLog = append(progressLog, fmt.Sprintf("❌ %s failed (%.1fs)", step.displayName, stepDuration.Seconds())) + return &ValidateResult{ + Success: false, + Message: step.errorPrefix, + Details: err, + ProgressLog: progressLog, + }, nil + } + stepDuration := time.Since(stepStart) + log.Infof(ctx, "✓ %s passed: duration=%.1fs", step.name, stepDuration.Seconds()) + progressLog = append(progressLog, fmt.Sprintf("✅ %s passed (%.1fs)", step.displayName, stepDuration.Seconds())) + } + + totalDuration := time.Since(startTime) + log.Infof(ctx, "✓ all validation checks passed: total_duration=%.1fs, steps=%s", + totalDuration.Seconds(), "build + type check + tests") + progressLog = append(progressLog, fmt.Sprintf("✅ All checks passed! Total: %.1fs", totalDuration.Seconds())) + + return &ValidateResult{ + Success: true, + Message: "All validation checks passed", + ProgressLog: progressLog, + }, nil +} diff --git a/experimental/apps-mcp/lib/validation/types.go b/experimental/apps-mcp/lib/validation/types.go new file mode 100644 index 0000000000..049d0893dc --- /dev/null +++ b/experimental/apps-mcp/lib/validation/types.go @@ -0,0 +1,55 @@ +package validation + +import ( + "context" + "fmt" +) + +// ValidationDetail contains detailed output from a failed validation. +type ValidationDetail struct { + ExitCode int `json:"exit_code"` + Stdout string `json:"stdout"` + Stderr string `json:"stderr"` +} + +func (vd *ValidationDetail) Error() string { + return fmt.Sprintf("validation failed (exit code %d)\nStdout:\n%s\nStderr:\n%s", + vd.ExitCode, vd.Stdout, vd.Stderr) +} + +// ValidateResult contains the outcome of a validation operation. +type ValidateResult struct { + Success bool `json:"success"` + Message string `json:"message"` + Details *ValidationDetail `json:"details,omitempty"` + ProgressLog []string `json:"progress_log,omitempty"` +} + +func (vr *ValidateResult) String() string { + var result string + + if len(vr.ProgressLog) > 0 { + result = "Validation Progress:\n" + for _, log := range vr.ProgressLog { + result += log + "\n" + } + result += "\n" + } + + if vr.Success { + result += "✓ " + vr.Message + } else { + result += "✗ " + vr.Message + if vr.Details != nil { + result += fmt.Sprintf("\n\nExit code: %d\n\nStdout:\n%s\n\nStderr:\n%s", + vr.Details.ExitCode, vr.Details.Stdout, vr.Details.Stderr) + } + } + + return result +} + +// Validation defines the interface for project validation strategies. +type Validation interface { + Validate(ctx context.Context, workDir string) (*ValidateResult, error) +} From 897957aa9bea256d25fc8777058ccb0e26ee2473 Mon Sep 17 00:00:00 2001 From: Igor Rekun Date: Thu, 27 Nov 2025 11:49:21 +0000 Subject: [PATCH 11/18] Adjust prompts to use nested CLI commands. --- experimental/apps-mcp/cmd/init_template.go | 23 +++----- experimental/apps-mcp/lib/prompts/apps.tmpl | 8 ++- .../apps-mcp/lib/prompts/explore.tmpl | 58 ++++++++++++++++--- .../lib/providers/clitools/provider.go | 29 +--------- .../apps-mcp/lib/validation/nodejs.go | 6 ++ 5 files changed, 72 insertions(+), 52 deletions(-) diff --git a/experimental/apps-mcp/cmd/init_template.go b/experimental/apps-mcp/cmd/init_template.go index 263e5295fa..981967f880 100644 --- a/experimental/apps-mcp/cmd/init_template.go +++ b/experimental/apps-mcp/cmd/init_template.go @@ -4,7 +4,6 @@ import ( "encoding/json" "fmt" "os" - "path/filepath" "github.com/databricks/cli/cmd/root" "github.com/databricks/cli/experimental/apps-mcp/lib/middlewares" @@ -15,20 +14,18 @@ import ( ) const ( - defaultTemplateRepo = "https://github.com/databricks/cli" - defaultTemplateDir = "experimental/apps-mcp/lib/templates/trpc" + defaultTemplateRepo = "https://github.com/neondatabase/appdotbuild-agent" + defaultTemplateDir = "edda/edda_templates/trpc_bundle" ) func newInitTemplateCmd() *cobra.Command { cmd := &cobra.Command{ Use: "init-template PROJECT_NAME", Short: "Initialize a new app from template", - Long: `Initialize a new Databricks app from the tRPC template. + Long: `Initialize a new Databricks app from a template. This is a shortcut for 'bundle init' with the default MCP app template. -Auto-detects the SQL warehouse ID unless DATABRICKS_WAREHOUSE_ID is set. - -PROJECT_NAME is used as both the project name and the output directory.`, +Auto-detects the SQL warehouse ID unless DATABRICKS_WAREHOUSE_ID is set.`, Example: ` databricks experimental apps-mcp tools init-template my-app`, Args: cobra.ExactArgs(1), PreRunE: root.MustWorkspaceClient, @@ -36,14 +33,10 @@ PROJECT_NAME is used as both the project name and the output directory.`, ctx := cmd.Context() w := cmdctx.WorkspaceClient(ctx) - outputDir := args[0] - projectName := filepath.Base(outputDir) - if !filepath.IsAbs(outputDir) { - cwd, err := os.Getwd() - if err != nil { - return fmt.Errorf("get working directory: %w", err) - } - outputDir = filepath.Join(cwd, outputDir) + projectName := args[0] + outputDir, err := os.Getwd() + if err != nil { + return fmt.Errorf("get working directory: %w", err) } // set up session with client for middleware compatibility diff --git a/experimental/apps-mcp/lib/prompts/apps.tmpl b/experimental/apps-mcp/lib/prompts/apps.tmpl index 5d48f1d817..b1cfec3e6b 100644 --- a/experimental/apps-mcp/lib/prompts/apps.tmpl +++ b/experimental/apps-mcp/lib/prompts/apps.tmpl @@ -13,7 +13,13 @@ DATABRICKS APPS DEVELOPMENT ALWAYS start by scaffolding a new app using command: -invoke_databricks_cli 'bundle init https://github.com/neondatabase/appdotbuild-agent --template-dir edda/edda_templates/trpc_bundle --config-file /dev/stdin <{{end}}","statement":"SELECT * FROM ..
LIMIT 10","wait_timeout":"50s"}' +⚡ EFFICIENT 4-STEP WORKFLOW: + +1. Find available catalogs: + invoke_databricks_cli 'catalogs list' + +2. Find schemas in a catalog: + invoke_databricks_cli 'schemas list ' + +3. Find tables in a schema: + invoke_databricks_cli 'tables list ' + +4. Batch discover multiple tables (ONE call for efficiency): + invoke_databricks_cli 'experimental apps-mcp tools discover-schema TABLE1 TABLE2 TABLE3' + + ⚡ Always use batch mode: Discover multiple tables in ONE call instead of separate calls + Table format: CATALOG.SCHEMA.TABLE (e.g., samples.nyctaxi.trips) + +QUICK SQL EXECUTION: + Execute SQL and get JSON results: + invoke_databricks_cli 'experimental apps-mcp tools query "SELECT * FROM catalog.schema.table LIMIT 10"' - Response has status.state: - - "SUCCEEDED" → Results in result.data_array (you're done!) - - "PENDING" → Warehouse starting or query slow. Poll with: - invoke_databricks_cli 'api get /api/2.0/sql/statements/' - Repeat every 5-10s until "SUCCEEDED" +⚠️ COMMON ERRORS: +❌ Wrong: invoke_databricks_cli 'tables list samples.tpcds_sf1' +✅ Correct: invoke_databricks_cli 'tables list samples tpcds_sf1' + (Use separate arguments, not dot notation for catalog and schema) - Note: First query on stopped warehouse takes 60-120s startup time +📚 For detailed information on each command, see sections below. +1. EXECUTING SQL QUERIES + Execute SQL queries using the query tool (recommended): + invoke_databricks_cli 'experimental apps-mcp tools query "SELECT * FROM catalog.schema.table LIMIT 10"' + 2. EXPLORING JOBS AND WORKFLOWS List all jobs: invoke_databricks_cli 'jobs list' @@ -64,6 +86,12 @@ IMPORTANT: Use the invoke_databricks_cli tool to run all commands below! Get table details (schema, columns, properties): invoke_databricks_cli 'tables get ..
' + Discover table schema with samples (recommended): + Batch discover multiple tables in ONE call: + invoke_databricks_cli 'experimental apps-mcp tools discover-schema catalog.schema.table1 catalog.schema.table2' + + Single table: + invoke_databricks_cli 'experimental apps-mcp tools discover-schema catalog.schema.table' 5. EXPLORING WORKSPACE FILES List workspace files and notebooks: @@ -141,6 +169,18 @@ BEST PRACTICES ✅ DO use invoke_databricks_cli for all Databricks CLI commands (Better for user allowlisting and tracking) +✅ DO use 'experimental apps-mcp tools query' for SQL execution + (Auto-wait, clean JSON output, no manual polling) + +✅ DO use batch discover-schema for multiple tables + (One call instead of multiple: more efficient) + +✅ DO test SQL with query tool before implementing in code + (Verify syntax and results interactively) + +✅ DO use 'experimental apps-mcp tools init-template your-app-name' for new projects + (Faster scaffolding with auto-configured warehouse) + ✅ DO validate bundles before deploying: invoke_databricks_cli 'bundle validate' diff --git a/experimental/apps-mcp/lib/providers/clitools/provider.go b/experimental/apps-mcp/lib/providers/clitools/provider.go index 2e6dea2512..b106ae0252 100644 --- a/experimental/apps-mcp/lib/providers/clitools/provider.go +++ b/experimental/apps-mcp/lib/providers/clitools/provider.go @@ -108,33 +108,8 @@ func (p *Provider) RegisterTools(server *mcpsdk.Server) error { mcpsdk.AddTool(server, &mcpsdk.Tool{ - Name: "invoke_databricks_cli", - Description: `Execute Databricks CLI command. Pass all arguments as a single string. - -## ⚡ EFFICIENT DATA DISCOVERY (Recommended): -1. 'catalogs list' → find available catalogs -2. 'schemas list CATALOG' → find schemas in catalog -3. 'tables list CATALOG SCHEMA' → find tables in schema -4. 'experimental apps-mcp tools discover-schema TABLE1 TABLE2 TABLE3' → **BATCH discover multiple tables in ONE call** ⚡ - -## Data Commands: -- Execute SQL: 'experimental apps-mcp tools query "SELECT * FROM table LIMIT 5"' (returns JSON + row count) -- Discover schema: 'experimental apps-mcp tools discover-schema TABLE1 TABLE2 ...' (columns, types, samples, nulls) - ↳ ALWAYS use batch mode: 'experimental apps-mcp tools discover-schema tbl1 tbl2 tbl3' instead of 3 separate calls - ↳ Table format: CATALOG.SCHEMA.TABLE (e.g., samples.nyctaxi.trips) - -## Project Commands: -- Init template: 'experimental apps-mcp tools init-template PROJECT_NAME' → create new app from tRPC template -- Bundle commands: 'bundle deploy', 'bundle validate', 'bundle run JOB_NAME' - -## Common Errors: -❌ 'tables list samples.tpcds_sf1' → Wrong format! -✅ 'tables list samples tpcds_sf1' → Correct (CATALOG SCHEMA as separate args) - -## Best Practices: -✅ Use batch discover-schema for multiple tables (faster) -✅ Test SQL with 'experimental apps-mcp tools query' before implementing in code -✅ Use 'experimental apps-mcp tools init-template' to scaffold new projects`, + Name: "invoke_databricks_cli", + Description: "Execute Databricks CLI command. Pass all arguments as a single string.", }, func(ctx context.Context, req *mcpsdk.CallToolRequest, args InvokeDatabricksCLIInput) (*mcpsdk.CallToolResult, any, error) { log.Debugf(ctx, "invoke_databricks_cli called: command=%s", args.Command) diff --git a/experimental/apps-mcp/lib/validation/nodejs.go b/experimental/apps-mcp/lib/validation/nodejs.go index 43b7451eaa..1e1eb797b0 100644 --- a/experimental/apps-mcp/lib/validation/nodejs.go +++ b/experimental/apps-mcp/lib/validation/nodejs.go @@ -26,6 +26,12 @@ func (v *ValidationNodeJs) Validate(ctx context.Context, workDir string) (*Valid progressLog = append(progressLog, "🔄 Starting Node.js validation: build + typecheck + tests") steps := []validationStep{ + { + name: "install", + command: "npm install", + errorPrefix: "Failed to install dependencies", + displayName: "Install", + }, { name: "build", command: "npm run build --if-present", From 9efbb73ec038d400ff5b38fd49f252808c12d66f Mon Sep 17 00:00:00 2001 From: Igor Rekun Date: Thu, 27 Nov 2025 13:00:57 +0000 Subject: [PATCH 12/18] Remove workspace tools. --- experimental/apps-mcp/cmd/apps_mcp.go | 11 ++--------- experimental/apps-mcp/cmd/query.go | 2 +- experimental/apps-mcp/lib/config.go | 8 ++------ experimental/apps-mcp/lib/server/health.go | 5 ----- experimental/apps-mcp/lib/trajectory/tracker.go | 1 - 5 files changed, 5 insertions(+), 22 deletions(-) diff --git a/experimental/apps-mcp/cmd/apps_mcp.go b/experimental/apps-mcp/cmd/apps_mcp.go index 31709675e2..83da91447c 100644 --- a/experimental/apps-mcp/cmd/apps_mcp.go +++ b/experimental/apps-mcp/cmd/apps_mcp.go @@ -9,7 +9,6 @@ import ( func NewMcpCmd() *cobra.Command { var warehouseID string - var withWorkspaceTools bool cmd := &cobra.Command{ Use: "apps-mcp", @@ -24,17 +23,12 @@ The MCP server exposes the following capabilities: The server communicates via stdio using the Model Context Protocol.`, Example: ` # Start MCP server with required warehouse - databricks experimental apps-mcp --warehouse-id abc123 - - # Start with workspace tools enabled - databricks experimental apps-mcp --warehouse-id abc123 --with-workspace-tools`, + databricks experimental apps-mcp --warehouse-id abc123`, RunE: func(cmd *cobra.Command, args []string) error { ctx := cmd.Context() // Build MCP config from flags - cfg := &mcplib.Config{ - WithWorkspaceTools: withWorkspaceTools, - } + cfg := &mcplib.Config{} log.Infof(ctx, "Starting MCP server") @@ -54,7 +48,6 @@ The server communicates via stdio using the Model Context Protocol.`, // Define flags cmd.Flags().StringVar(&warehouseID, "warehouse-id", "", "Databricks SQL Warehouse ID") - cmd.Flags().BoolVar(&withWorkspaceTools, "with-workspace-tools", false, "Enable workspace tools (file operations, bash, grep, glob)") cmd.AddCommand(newInstallCmd()) cmd.AddCommand(newToolsCmd()) diff --git a/experimental/apps-mcp/cmd/query.go b/experimental/apps-mcp/cmd/query.go index 85053676b2..e8530a916b 100644 --- a/experimental/apps-mcp/cmd/query.go +++ b/experimental/apps-mcp/cmd/query.go @@ -18,7 +18,7 @@ import ( func newQueryCmd() *cobra.Command { cmd := &cobra.Command{ Use: "query SQL", - Short: "Execute SQL against Databricks warehouse", + Short: "Execute SQL against a Databricks warehouse", Long: `Execute a SQL statement against a Databricks SQL warehouse and return results. The command auto-detects an available warehouse unless DATABRICKS_WAREHOUSE_ID is set. diff --git a/experimental/apps-mcp/lib/config.go b/experimental/apps-mcp/lib/config.go index f816e1b48f..f8a4d8f6cc 100644 --- a/experimental/apps-mcp/lib/config.go +++ b/experimental/apps-mcp/lib/config.go @@ -4,13 +4,9 @@ package mcp // Config holds MCP server configuration. // Configuration is populated from CLI flags and Databricks client context. -type Config struct { - WithWorkspaceTools bool -} +type Config struct{} // DefaultConfig returns a Config with sensible default values. func DefaultConfig() *Config { - return &Config{ - WithWorkspaceTools: false, - } + return &Config{} } diff --git a/experimental/apps-mcp/lib/server/health.go b/experimental/apps-mcp/lib/server/health.go index 4dfe1eeac0..9e7fe76b78 100644 --- a/experimental/apps-mcp/lib/server/health.go +++ b/experimental/apps-mcp/lib/server/health.go @@ -32,11 +32,6 @@ func (s *Server) CheckHealth(ctx context.Context) *HealthStatus { // Check CLI tools provider status.Providers["clitools"] = "healthy" - // Check workspace provider if enabled - if s.config.WithWorkspaceTools { - status.Providers["workspace"] = "healthy" - } - return status } diff --git a/experimental/apps-mcp/lib/trajectory/tracker.go b/experimental/apps-mcp/lib/trajectory/tracker.go index 2d5a7df560..6941361532 100644 --- a/experimental/apps-mcp/lib/trajectory/tracker.go +++ b/experimental/apps-mcp/lib/trajectory/tracker.go @@ -49,7 +49,6 @@ func NewTracker(ctx context.Context, sess *session.Session, cfg *mcp.Config) (*T func (t *Tracker) writeSessionEntry(cfg *mcp.Config) error { configMap := make(map[string]any) - configMap["with_workspace_tools"] = cfg.WithWorkspaceTools entry := NewSessionEntry(t.sessionID, configMap) return t.writer.WriteEntry(entry) From 845bc3600b99fd0a58dcee6c37cb20fd45b66546 Mon Sep 17 00:00:00 2001 From: Igor Rekun Date: Thu, 27 Nov 2025 15:57:31 +0000 Subject: [PATCH 13/18] Draft init_template resolver. --- experimental/apps-mcp/cmd/init_template.go | 155 ++++++++++++++++----- 1 file changed, 119 insertions(+), 36 deletions(-) diff --git a/experimental/apps-mcp/cmd/init_template.go b/experimental/apps-mcp/cmd/init_template.go index 981967f880..d0d5dab671 100644 --- a/experimental/apps-mcp/cmd/init_template.go +++ b/experimental/apps-mcp/cmd/init_template.go @@ -4,59 +4,123 @@ import ( "encoding/json" "fmt" "os" + "sort" + "strings" - "github.com/databricks/cli/cmd/root" - "github.com/databricks/cli/experimental/apps-mcp/lib/middlewares" - "github.com/databricks/cli/experimental/apps-mcp/lib/session" - "github.com/databricks/cli/libs/cmdctx" + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/jsonschema" "github.com/databricks/cli/libs/template" "github.com/spf13/cobra" ) -const ( - defaultTemplateRepo = "https://github.com/neondatabase/appdotbuild-agent" - defaultTemplateDir = "edda/edda_templates/trpc_bundle" -) +type templateConfig struct { + repo string + dir string +} + +var templateRegistry = map[string]templateConfig{ + "apps": { + repo: "https://github.com/neondatabase/appdotbuild-agent", + dir: "edda/edda_templates/trpc_bundle", + }, +} + +func getTemplateTypes() []string { + types := make([]string, 0, len(templateRegistry)) + for t := range templateRegistry { + types = append(types, t) + } + sort.Strings(types) + return types +} + +func formatSchemaForDisplay(ctx *cobra.Command, schema *jsonschema.Schema, templateType string) { + if len(schema.Properties) == 0 { + return // Skip display for empty schemas + } + + cmdio.LogString(ctx.Context(), "\nTemplate Configuration Variables:") + cmdio.LogString(ctx.Context(), "==================================\n") + + for _, prop := range schema.OrderedProperties() { + if prop.Schema.SkipPromptIf != nil && prop.Schema.Default == nil { + continue + } + + cmdio.LogString(ctx.Context(), fmt.Sprintf("\n%s (%s)", prop.Name, prop.Schema.Type)) + + if prop.Schema.Description != "" { + desc := strings.TrimSpace(prop.Schema.Description) + desc = strings.ReplaceAll(desc, "\\n", "\n") + cmdio.LogString(ctx.Context(), " Description: "+desc) + } + + if prop.Schema.Default != nil { + cmdio.LogString(ctx.Context(), fmt.Sprintf(" Default: %v", prop.Schema.Default)) + } + if len(prop.Schema.Enum) > 0 { + cmdio.LogString(ctx.Context(), " Options:") + for _, opt := range prop.Schema.Enum { + cmdio.LogString(ctx.Context(), fmt.Sprintf(" - %v", opt)) + } + } + + for _, req := range schema.Required { + if req == prop.Name { + cmdio.LogString(ctx.Context(), " Required: yes") + break + } + } + } + + cmdio.LogString(ctx.Context(), "\n\nTo initialize the template with these values, use:") + cmdio.LogString(ctx.Context(), fmt.Sprintf(" experimental apps-mcp tools init-template %s --config_json '{\"key\":\"value\",...}'", templateType)) +} func newInitTemplateCmd() *cobra.Command { + var configJSON string + cmd := &cobra.Command{ - Use: "init-template PROJECT_NAME", + Use: "init-template TEMPLATE_TYPE", Short: "Initialize a new app from template", Long: `Initialize a new Databricks app from a template. -This is a shortcut for 'bundle init' with the default MCP app template. -Auto-detects the SQL warehouse ID unless DATABRICKS_WAREHOUSE_ID is set.`, - Example: ` databricks experimental apps-mcp tools init-template my-app`, - Args: cobra.ExactArgs(1), - PreRunE: root.MustWorkspaceClient, +Supported template types: apps + +When run without --config_json, displays the template schema and exits. +When run with --config_json, initializes the template with the provided configuration.`, + Example: ` # Display template schema + experimental apps-mcp tools init-template apps + + # Initialize with configuration + experimental apps-mcp tools init-template apps --config_json '{"project_name":"my-app"}'`, + Args: func(cmd *cobra.Command, args []string) error { + if len(args) != 1 { + return fmt.Errorf("accepts 1 arg, received %d", len(args)) + } + templateType := args[0] + if _, ok := templateRegistry[templateType]; !ok { + return fmt.Errorf("unknown template type %q. Supported types: %s", + templateType, strings.Join(getTemplateTypes(), ", ")) + } + return nil + }, RunE: func(cmd *cobra.Command, args []string) error { ctx := cmd.Context() - w := cmdctx.WorkspaceClient(ctx) - projectName := args[0] + templateType := args[0] + tmplCfg := templateRegistry[templateType] + outputDir, err := os.Getwd() if err != nil { return fmt.Errorf("get working directory: %w", err) } - // set up session with client for middleware compatibility - sess := session.NewSession() - sess.Set(middlewares.DatabricksClientKey, w) - ctx = session.WithSession(ctx, sess) - - warehouseID, err := middlewares.GetWarehouseID(ctx) - if err != nil { - return err - } - - // create temp config file with parameters - configMap := map[string]string{ - "project_name": projectName, - "sql_warehouse_id": warehouseID, - } - configBytes, err := json.Marshal(configMap) - if err != nil { - return fmt.Errorf("marshal config: %w", err) + var userConfigMap map[string]any + if configJSON != "" { + if err := json.Unmarshal([]byte(configJSON), &userConfigMap); err != nil { + return fmt.Errorf("invalid JSON in --config_json: %w", err) + } } tmpFile, err := os.CreateTemp("", "mcp-template-config-*.json") @@ -65,6 +129,10 @@ Auto-detects the SQL warehouse ID unless DATABRICKS_WAREHOUSE_ID is set.`, } defer os.Remove(tmpFile.Name()) + configBytes, err := json.Marshal(userConfigMap) + if err != nil { + return fmt.Errorf("marshal config: %w", err) + } if _, err := tmpFile.Write(configBytes); err != nil { return fmt.Errorf("write config file: %w", err) } @@ -73,10 +141,10 @@ Auto-detects the SQL warehouse ID unless DATABRICKS_WAREHOUSE_ID is set.`, } r := template.Resolver{ - TemplatePathOrUrl: defaultTemplateRepo, + TemplatePathOrUrl: tmplCfg.repo, ConfigFile: tmpFile.Name(), OutputDir: outputDir, - TemplateDir: defaultTemplateDir, + TemplateDir: tmplCfg.dir, } tmpl, err := r.Resolve(ctx) @@ -85,14 +153,29 @@ Auto-detects the SQL warehouse ID unless DATABRICKS_WAREHOUSE_ID is set.`, } defer tmpl.Reader.Cleanup(ctx) + schema, _, err := tmpl.Reader.LoadSchemaAndTemplateFS(ctx) + if err != nil { + return fmt.Errorf("failed to load template schema: %w", err) + } + + if configJSON == "" { + if len(schema.Properties) > 0 { + formatSchemaForDisplay(cmd, schema, templateType) + return nil // Exit without materializing + } + } + err = tmpl.Writer.Materialize(ctx, tmpl.Reader) if err != nil { return err } + tmpl.Writer.LogTelemetry(ctx) return nil }, } + cmd.Flags().StringVar(&configJSON, "config_json", "", "JSON string with configuration values") + return cmd } From 9c555308ebef646db9b05e6f9a9307a699d309d1 Mon Sep 17 00:00:00 2001 From: Igor Rekun Date: Thu, 27 Nov 2025 16:56:34 +0000 Subject: [PATCH 14/18] Update deployment instructions and parametrize init-template command. --- experimental/apps-mcp/cmd/init_template.go | 239 +++++++++----------- experimental/apps-mcp/lib/prompts/apps.tmpl | 19 +- 2 files changed, 129 insertions(+), 129 deletions(-) diff --git a/experimental/apps-mcp/cmd/init_template.go b/experimental/apps-mcp/cmd/init_template.go index d0d5dab671..90d92946b5 100644 --- a/experimental/apps-mcp/cmd/init_template.go +++ b/experimental/apps-mcp/cmd/init_template.go @@ -2,125 +2,118 @@ package mcp import ( "encoding/json" + "errors" "fmt" "os" - "sort" - "strings" + "github.com/databricks/cli/cmd/root" "github.com/databricks/cli/libs/cmdio" - "github.com/databricks/cli/libs/jsonschema" "github.com/databricks/cli/libs/template" "github.com/spf13/cobra" ) -type templateConfig struct { - repo string - dir string -} - -var templateRegistry = map[string]templateConfig{ - "apps": { - repo: "https://github.com/neondatabase/appdotbuild-agent", - dir: "edda/edda_templates/trpc_bundle", - }, -} - -func getTemplateTypes() []string { - types := make([]string, 0, len(templateRegistry)) - for t := range templateRegistry { - types = append(types, t) - } - sort.Strings(types) - return types -} - -func formatSchemaForDisplay(ctx *cobra.Command, schema *jsonschema.Schema, templateType string) { - if len(schema.Properties) == 0 { - return // Skip display for empty schemas +func newInitTemplateCmd() *cobra.Command { + cmd := &cobra.Command{ + Use: "init-template [TEMPLATE_PATH]", + Short: "Initialize using a bundle template", + Args: root.MaximumNArgs(1), + Long: fmt.Sprintf(`Initialize using a bundle template to get started quickly. + +TEMPLATE_PATH optionally specifies which template to use. It can be one of the following: +%s +- a local file system path with a template directory +- a Git repository URL, e.g. https://github.com/my/repository + +Supports the same options as 'databricks bundle init' plus: + --describe: Display template schema without materializing + --config_json: Provide config as JSON string instead of file + +Examples: + experimental apps-mcp tools init-template # Choose from built-in templates + experimental apps-mcp tools init-template default-python # Python jobs and notebooks + experimental apps-mcp tools init-template --output-dir ./my-project + experimental apps-mcp tools init-template default-python --describe + experimental apps-mcp tools init-template default-python --config_json '{"project_name":"my-app"}' + +After initialization: + databricks bundle deploy --target dev + +See https://docs.databricks.com/en/dev-tools/bundles/templates.html for more information on templates.`, template.HelpDescriptions()), } - cmdio.LogString(ctx.Context(), "\nTemplate Configuration Variables:") - cmdio.LogString(ctx.Context(), "==================================\n") - - for _, prop := range schema.OrderedProperties() { - if prop.Schema.SkipPromptIf != nil && prop.Schema.Default == nil { - continue - } - - cmdio.LogString(ctx.Context(), fmt.Sprintf("\n%s (%s)", prop.Name, prop.Schema.Type)) - - if prop.Schema.Description != "" { - desc := strings.TrimSpace(prop.Schema.Description) - desc = strings.ReplaceAll(desc, "\\n", "\n") - cmdio.LogString(ctx.Context(), " Description: "+desc) + var configFile string + var outputDir string + var templateDir string + var tag string + var branch string + var configJSON string + var describe bool + + cmd.Flags().StringVar(&configFile, "config-file", "", "JSON file containing key value pairs of input parameters required for template initialization.") + cmd.Flags().StringVar(&templateDir, "template-dir", "", "Directory path within a Git repository containing the template.") + cmd.Flags().StringVar(&outputDir, "output-dir", "", "Directory to write the initialized template to.") + cmd.Flags().StringVar(&branch, "tag", "", "Git tag to use for template initialization") + cmd.Flags().StringVar(&tag, "branch", "", "Git branch to use for template initialization") + cmd.Flags().StringVar(&configJSON, "config-json", "", "JSON string containing key value pairs (alternative to --config-file).") + cmd.Flags().BoolVar(&describe, "describe", false, "Display template schema without initializing") + + cmd.PreRunE = root.MustWorkspaceClient + cmd.RunE = func(cmd *cobra.Command, args []string) error { + if tag != "" && branch != "" { + return errors.New("only one of --tag or --branch can be specified") } - if prop.Schema.Default != nil { - cmdio.LogString(ctx.Context(), fmt.Sprintf(" Default: %v", prop.Schema.Default)) - } - if len(prop.Schema.Enum) > 0 { - cmdio.LogString(ctx.Context(), " Options:") - for _, opt := range prop.Schema.Enum { - cmdio.LogString(ctx.Context(), fmt.Sprintf(" - %v", opt)) - } + if configFile != "" && configJSON != "" { + return errors.New("only one of --config-file or --config-json can be specified") } - for _, req := range schema.Required { - if req == prop.Name { - cmdio.LogString(ctx.Context(), " Required: yes") - break - } + var templatePathOrUrl string + if len(args) > 0 { + templatePathOrUrl = args[0] } - } - cmdio.LogString(ctx.Context(), "\n\nTo initialize the template with these values, use:") - cmdio.LogString(ctx.Context(), fmt.Sprintf(" experimental apps-mcp tools init-template %s --config_json '{\"key\":\"value\",...}'", templateType)) -} + ctx := cmd.Context() -func newInitTemplateCmd() *cobra.Command { - var configJSON string + // NEW: Describe mode - show schema only + if describe { + r := template.Resolver{ + TemplatePathOrUrl: templatePathOrUrl, + ConfigFile: "", + OutputDir: outputDir, + TemplateDir: templateDir, + Tag: tag, + Branch: branch, + } - cmd := &cobra.Command{ - Use: "init-template TEMPLATE_TYPE", - Short: "Initialize a new app from template", - Long: `Initialize a new Databricks app from a template. - -Supported template types: apps - -When run without --config_json, displays the template schema and exits. -When run with --config_json, initializes the template with the provided configuration.`, - Example: ` # Display template schema - experimental apps-mcp tools init-template apps - - # Initialize with configuration - experimental apps-mcp tools init-template apps --config_json '{"project_name":"my-app"}'`, - Args: func(cmd *cobra.Command, args []string) error { - if len(args) != 1 { - return fmt.Errorf("accepts 1 arg, received %d", len(args)) + tmpl, err := r.Resolve(ctx) + if errors.Is(err, template.ErrCustomSelected) { + cmdio.LogString(ctx, "Please specify a path or Git repository to use a custom template.") + cmdio.LogString(ctx, "See https://docs.databricks.com/en/dev-tools/bundles/templates.html to learn more about custom templates.") + return nil } - templateType := args[0] - if _, ok := templateRegistry[templateType]; !ok { - return fmt.Errorf("unknown template type %q. Supported types: %s", - templateType, strings.Join(getTemplateTypes(), ", ")) + if err != nil { + return err } - return nil - }, - RunE: func(cmd *cobra.Command, args []string) error { - ctx := cmd.Context() + defer tmpl.Reader.Cleanup(ctx) - templateType := args[0] - tmplCfg := templateRegistry[templateType] + schema, _, err := tmpl.Reader.LoadSchemaAndTemplateFS(ctx) + if err != nil { + return fmt.Errorf("failed to load template schema: %w", err) + } - outputDir, err := os.Getwd() + schemaJSON, err := json.MarshalIndent(schema, "", " ") if err != nil { - return fmt.Errorf("get working directory: %w", err) + return err } + cmdio.LogString(ctx, string(schemaJSON)) + return nil + } + // NEW: Handle config_json by creating temp config file + if configJSON != "" { var userConfigMap map[string]any - if configJSON != "" { - if err := json.Unmarshal([]byte(configJSON), &userConfigMap); err != nil { - return fmt.Errorf("invalid JSON in --config_json: %w", err) - } + if err := json.Unmarshal([]byte(configJSON), &userConfigMap); err != nil { + return fmt.Errorf("invalid JSON in --config-json: %w", err) } tmpFile, err := os.CreateTemp("", "mcp-template-config-*.json") @@ -140,42 +133,36 @@ When run with --config_json, initializes the template with the provided configur return fmt.Errorf("close config file: %w", err) } - r := template.Resolver{ - TemplatePathOrUrl: tmplCfg.repo, - ConfigFile: tmpFile.Name(), - OutputDir: outputDir, - TemplateDir: tmplCfg.dir, - } - - tmpl, err := r.Resolve(ctx) - if err != nil { - return err - } - defer tmpl.Reader.Cleanup(ctx) - - schema, _, err := tmpl.Reader.LoadSchemaAndTemplateFS(ctx) - if err != nil { - return fmt.Errorf("failed to load template schema: %w", err) - } - - if configJSON == "" { - if len(schema.Properties) > 0 { - formatSchemaForDisplay(cmd, schema, templateType) - return nil // Exit without materializing - } - } + configFile = tmpFile.Name() + } - err = tmpl.Writer.Materialize(ctx, tmpl.Reader) - if err != nil { - return err - } + // Standard materialize flow (identical to bundle/init.go) + r := template.Resolver{ + TemplatePathOrUrl: templatePathOrUrl, + ConfigFile: configFile, + OutputDir: outputDir, + TemplateDir: templateDir, + Tag: tag, + Branch: branch, + } - tmpl.Writer.LogTelemetry(ctx) + tmpl, err := r.Resolve(ctx) + if errors.Is(err, template.ErrCustomSelected) { + cmdio.LogString(ctx, "Please specify a path or Git repository to use a custom template.") + cmdio.LogString(ctx, "See https://docs.databricks.com/en/dev-tools/bundles/templates.html to learn more about custom templates.") return nil - }, - } - - cmd.Flags().StringVar(&configJSON, "config_json", "", "JSON string with configuration values") + } + if err != nil { + return err + } + defer tmpl.Reader.Cleanup(ctx) + err = tmpl.Writer.Materialize(ctx, tmpl.Reader) + if err != nil { + return err + } + tmpl.Writer.LogTelemetry(ctx) + return nil + } return cmd } diff --git a/experimental/apps-mcp/lib/prompts/apps.tmpl b/experimental/apps-mcp/lib/prompts/apps.tmpl index b1cfec3e6b..766e723018 100644 --- a/experimental/apps-mcp/lib/prompts/apps.tmpl +++ b/experimental/apps-mcp/lib/prompts/apps.tmpl @@ -13,7 +13,7 @@ DATABRICKS APPS DEVELOPMENT ALWAYS start by scaffolding a new app using command: -invoke_databricks_cli 'experimental apps-mcp tools init-template your-app-name' +invoke_databricks_cli 'experimental apps-mcp tools init-template https://github.com/neondatabase/appdotbuild-agent --template-dir edda/edda_templates/trpc_bundle --config-json '{"project_name":"my-app-name","sql_warehouse_id":"{{if .WarehouseID}}{{.WarehouseID}}{{else}}12345{{end}}"}'' # Validation @@ -21,7 +21,20 @@ invoke_databricks_cli 'experimental apps-mcp tools init-template your-app-name' invoke_databricks_cli 'experimental apps-mcp tools validate ./your-app-location' +# Deployment + +Always use the sequence of commands: + +invoke_databricks_cli 'bundle deploy' +invoke_databricks_cli 'bundle run my-app-name_app' + +⚠️ Important: the name of the resource is found in ./resources/trpc.app.yml +` +resources: + apps: + my-trpc-app_app: <- this is the argument to 'bundle run' +` + # View and manage your app: -invoke_databricks_cli 'apps list' -invoke_databricks_cli 'apps get ' +invoke_databricks_cli 'bundle summary' From 4c9825810446f3e27e853479a5b587a81ae16c86 Mon Sep 17 00:00:00 2001 From: Igor Rekun Date: Thu, 27 Nov 2025 17:26:22 +0000 Subject: [PATCH 15/18] Use AppKit template. --- experimental/apps-mcp/lib/prompts/apps.tmpl | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/experimental/apps-mcp/lib/prompts/apps.tmpl b/experimental/apps-mcp/lib/prompts/apps.tmpl index 766e723018..a4665bcef8 100644 --- a/experimental/apps-mcp/lib/prompts/apps.tmpl +++ b/experimental/apps-mcp/lib/prompts/apps.tmpl @@ -11,9 +11,9 @@ DATABRICKS APPS DEVELOPMENT # Initialization -ALWAYS start by scaffolding a new app using command: +⚠️ ALWAYS start by scaffolding a new app using command: -invoke_databricks_cli 'experimental apps-mcp tools init-template https://github.com/neondatabase/appdotbuild-agent --template-dir edda/edda_templates/trpc_bundle --config-json '{"project_name":"my-app-name","sql_warehouse_id":"{{if .WarehouseID}}{{.WarehouseID}}{{else}}12345{{end}}"}'' +invoke_databricks_cli 'experimental apps-mcp tools init-template https://github.com/databricks/cli --template-dir experimental/apps-mcp/templates/appkit --branch mcp/appkit-bundle --config-json '{"project_name":"my-app-name","sql_warehouse_id":"{{if .WarehouseID}}{{.WarehouseID}}{{end}}"}'' # Validation @@ -23,17 +23,10 @@ invoke_databricks_cli 'experimental apps-mcp tools validate ./your-app-location' # Deployment -Always use the sequence of commands: +⚠️ Always use the sequence of commands: invoke_databricks_cli 'bundle deploy' -invoke_databricks_cli 'bundle run my-app-name_app' - -⚠️ Important: the name of the resource is found in ./resources/trpc.app.yml -` -resources: - apps: - my-trpc-app_app: <- this is the argument to 'bundle run' -` +invoke_databricks_cli 'bundle run app' # View and manage your app: From 311ddfe2e7ec02b93b987e2de3faf76ffea9e20b Mon Sep 17 00:00:00 2001 From: Igor Rekun Date: Thu, 27 Nov 2025 17:33:26 +0000 Subject: [PATCH 16/18] Slim instructions, remove duplication. --- .../apps-mcp/lib/prompts/explore.tmpl | 104 ++++-------------- .../lib/prompts/initialization_message.tmpl | 2 - 2 files changed, 20 insertions(+), 86 deletions(-) diff --git a/experimental/apps-mcp/lib/prompts/explore.tmpl b/experimental/apps-mcp/lib/prompts/explore.tmpl index 72c962bdf1..0ef786af13 100644 --- a/experimental/apps-mcp/lib/prompts/explore.tmpl +++ b/experimental/apps-mcp/lib/prompts/explore.tmpl @@ -7,47 +7,12 @@ * */ -}} -Databricks Data Exploration Guide -===================================== - {{.WorkspaceInfo}}{{if .WarehouseName}} Default SQL Warehouse: {{.WarehouseName}} ({{.WarehouseID}}){{else}} Note: No SQL warehouse detected. SQL queries will require warehouse_id to be specified manually.{{end}}{{.ProfilesInfo}} IMPORTANT: Use the invoke_databricks_cli tool to run all commands below! -QUICK START - DATA DISCOVERY -============================= - -⚡ EFFICIENT 4-STEP WORKFLOW: - -1. Find available catalogs: - invoke_databricks_cli 'catalogs list' - -2. Find schemas in a catalog: - invoke_databricks_cli 'schemas list ' - -3. Find tables in a schema: - invoke_databricks_cli 'tables list ' - -4. Batch discover multiple tables (ONE call for efficiency): - invoke_databricks_cli 'experimental apps-mcp tools discover-schema TABLE1 TABLE2 TABLE3' - - ⚡ Always use batch mode: Discover multiple tables in ONE call instead of separate calls - Table format: CATALOG.SCHEMA.TABLE (e.g., samples.nyctaxi.trips) - -QUICK SQL EXECUTION: - Execute SQL and get JSON results: - invoke_databricks_cli 'experimental apps-mcp tools query "SELECT * FROM catalog.schema.table LIMIT 10"' - -⚠️ COMMON ERRORS: -❌ Wrong: invoke_databricks_cli 'tables list samples.tpcds_sf1' -✅ Correct: invoke_databricks_cli 'tables list samples tpcds_sf1' - (Use separate arguments, not dot notation for catalog and schema) - -📚 For detailed information on each command, see sections below. - - 1. EXECUTING SQL QUERIES Execute SQL queries using the query tool (recommended): invoke_databricks_cli 'experimental apps-mcp tools query "SELECT * FROM catalog.schema.table LIMIT 10"' @@ -72,26 +37,31 @@ QUICK SQL EXECUTION: 4. EXPLORING UNITY CATALOG DATA - Unity Catalog uses a three-level namespace: catalog.schema.table + ⚡ EFFICIENT 4-STEP WORKFLOW: + + 1. Find available catalogs: + invoke_databricks_cli 'catalogs list' - List all catalogs: - invoke_databricks_cli 'catalogs list' + 2. Find schemas in a catalog: + invoke_databricks_cli 'schemas list ' - List schemas in a catalog: - invoke_databricks_cli 'schemas list ' + 3. Find tables in a schema: + invoke_databricks_cli 'tables list ' - List tables in a schema: - invoke_databricks_cli 'tables list ' + 4. Batch discover multiple tables (ONE call for efficiency): + invoke_databricks_cli 'experimental apps-mcp tools discover-schema TABLE1 TABLE2 TABLE3' - Get table details (schema, columns, properties): - invoke_databricks_cli 'tables get ..
' + ⚡ Always use batch mode: Discover multiple tables in ONE call instead of separate calls + Table format: CATALOG.SCHEMA.TABLE (e.g., samples.nyctaxi.trips) - Discover table schema with samples (recommended): - Batch discover multiple tables in ONE call: - invoke_databricks_cli 'experimental apps-mcp tools discover-schema catalog.schema.table1 catalog.schema.table2' + QUICK SQL EXECUTION: + Execute SQL and get JSON results: + invoke_databricks_cli 'experimental apps-mcp tools query "SELECT * FROM catalog.schema.table LIMIT 10"' - Single table: - invoke_databricks_cli 'experimental apps-mcp tools discover-schema catalog.schema.table' + ⚠️ COMMON ERRORS: + ❌ Wrong: invoke_databricks_cli 'tables list samples.tpcds_sf1' + ✅ Correct: invoke_databricks_cli 'tables list samples tpcds_sf1' + (Use separate arguments, not dot notation for catalog and schema) 5. EXPLORING WORKSPACE FILES List workspace files and notebooks: @@ -101,12 +71,6 @@ QUICK SQL EXECUTION: invoke_databricks_cli 'workspace export ' -Getting Started: -- Use the commands above to explore what resources exist in the workspace -- All commands support --output json for programmatic access -- Remember to add --profile when working with non-default workspaces - - DATABRICKS ASSET BUNDLES (DABs) WORKFLOW ========================================= @@ -131,37 +95,12 @@ Working with Existing Bundle Project: 2. Deploy to environment: invoke_databricks_cli 'bundle deploy --target ' - 3. Run a resource (job/pipeline): + 3. Run a resource (job/pipeline/app): invoke_databricks_cli 'bundle run ' 4. Destroy deployed resources: invoke_databricks_cli 'bundle destroy --target ' -Bundle Commands Reference: - - bundle init # Initialize new bundle from template - - bundle validate # Validate bundle configuration - - bundle deploy # Deploy bundle to workspace - - bundle run # Run a job or pipeline - - bundle destroy # Remove deployed resources - - bundle schema # Show bundle configuration schema - -💡 Tip: Use 'invoke_databricks_cli bundle init' to see available templates - - -COMMON PATTERNS -=============== - -Multi-environment deployment: - Deploy to different environments using targets in databricks.yml: - invoke_databricks_cli 'bundle deploy --target dev' - invoke_databricks_cli 'bundle deploy --target prod' - -Working with pipelines/jobs in bundles: - Add resources to databricks.yml, then: - invoke_databricks_cli 'bundle validate' - invoke_databricks_cli 'bundle deploy' - invoke_databricks_cli 'bundle run ' - BEST PRACTICES ============== @@ -178,9 +117,6 @@ BEST PRACTICES ✅ DO test SQL with query tool before implementing in code (Verify syntax and results interactively) -✅ DO use 'experimental apps-mcp tools init-template your-app-name' for new projects - (Faster scaffolding with auto-configured warehouse) - ✅ DO validate bundles before deploying: invoke_databricks_cli 'bundle validate' diff --git a/experimental/apps-mcp/lib/prompts/initialization_message.tmpl b/experimental/apps-mcp/lib/prompts/initialization_message.tmpl index 7529ad6a33..4621d24ad8 100644 --- a/experimental/apps-mcp/lib/prompts/initialization_message.tmpl +++ b/experimental/apps-mcp/lib/prompts/initialization_message.tmpl @@ -3,13 +3,11 @@ Your session in Databricks MCP has been successfully initialized. ## Available Tools: - **explore**: Discover workspace resources, get CLI command examples, and workflow recommendations - **invoke_databricks_cli**: Execute any Databricks CLI command (bundle, apps, workspace, etc.) -- **Databricks API tools**: Query catalogs, schemas, tables, and execute SQL queries ## Workflow Best Practices: - Use `explore` at the start of tasks to understand workspace context and get relevant commands - Use `invoke_databricks_cli` for all Databricks CLI operations (better for tracking and allowlisting) - For Databricks Asset Bundles: Use `invoke_databricks_cli 'bundle validate'` before deploying -- For Databricks Apps: Use `invoke_databricks_cli 'apps deploy'` for deployment - Always validate before deploying to ensure configuration is correct - When not sure about the user's intent, ask clarifying questions before proceeding - Do NOT create summary files, reports, or README unless explicitly requested From 723492a7847c4763737abd0254fdee04919416e5 Mon Sep 17 00:00:00 2001 From: Igor Rekun Date: Thu, 27 Nov 2025 19:07:44 +0000 Subject: [PATCH 17/18] Inject claude.md instructions after bundle init if present. --- experimental/apps-mcp/cmd/init_template.go | 47 ++++++++++++++++++++- experimental/apps-mcp/lib/prompts/apps.tmpl | 2 +- 2 files changed, 47 insertions(+), 2 deletions(-) diff --git a/experimental/apps-mcp/cmd/init_template.go b/experimental/apps-mcp/cmd/init_template.go index 90d92946b5..c2d7dae1ac 100644 --- a/experimental/apps-mcp/cmd/init_template.go +++ b/experimental/apps-mcp/cmd/init_template.go @@ -1,10 +1,12 @@ package mcp import ( + "context" "encoding/json" "errors" "fmt" "os" + "path/filepath" "github.com/databricks/cli/cmd/root" "github.com/databricks/cli/libs/cmdio" @@ -12,6 +14,46 @@ import ( "github.com/spf13/cobra" ) +func readClaudeMd(ctx context.Context, configFile string) { + showFallback := func() { + cmdio.LogString(ctx, "\nConsult with CLAUDE.md provided in the bundle if present.") + } + + if configFile == "" { + showFallback() + return + } + + configBytes, err := os.ReadFile(configFile) + if err != nil { + showFallback() + return + } + + var config map[string]any + if err := json.Unmarshal(configBytes, &config); err != nil { + showFallback() + return + } + + projectName, ok := config["project_name"].(string) + if !ok || projectName == "" { + showFallback() + return + } + + claudePath := filepath.Join(".", projectName, "CLAUDE.md") + content, err := os.ReadFile(claudePath) + if err != nil { + showFallback() + return + } + + cmdio.LogString(ctx, "\n=== CLAUDE.md ===") + cmdio.LogString(ctx, string(content)) + cmdio.LogString(ctx, "=================\n") +} + func newInitTemplateCmd() *cobra.Command { cmd := &cobra.Command{ Use: "init-template [TEMPLATE_PATH]", @@ -109,7 +151,6 @@ See https://docs.databricks.com/en/dev-tools/bundles/templates.html for more inf return nil } - // NEW: Handle config_json by creating temp config file if configJSON != "" { var userConfigMap map[string]any if err := json.Unmarshal([]byte(configJSON), &userConfigMap); err != nil { @@ -162,6 +203,10 @@ See https://docs.databricks.com/en/dev-tools/bundles/templates.html for more inf return err } tmpl.Writer.LogTelemetry(ctx) + + // Try to read and display CLAUDE.md if present + readClaudeMd(ctx, configFile) + return nil } return cmd diff --git a/experimental/apps-mcp/lib/prompts/apps.tmpl b/experimental/apps-mcp/lib/prompts/apps.tmpl index a4665bcef8..fe684344d2 100644 --- a/experimental/apps-mcp/lib/prompts/apps.tmpl +++ b/experimental/apps-mcp/lib/prompts/apps.tmpl @@ -13,7 +13,7 @@ DATABRICKS APPS DEVELOPMENT ⚠️ ALWAYS start by scaffolding a new app using command: -invoke_databricks_cli 'experimental apps-mcp tools init-template https://github.com/databricks/cli --template-dir experimental/apps-mcp/templates/appkit --branch mcp/appkit-bundle --config-json '{"project_name":"my-app-name","sql_warehouse_id":"{{if .WarehouseID}}{{.WarehouseID}}{{end}}"}'' +invoke_databricks_cli 'experimental apps-mcp tools init-template https://github.com/databricks/cli --template-dir experimental/apps-mcp/templates/appkit --branch mcp/appkit-bundle --config-json '{"project_name":"my-app-name","app_description":"my-app-description","sql_warehouse_id":"{{if .WarehouseID}}{{.WarehouseID}}{{end}}"}'' # Validation From e5a4634d2485541d19061060728df10c613b0ac1 Mon Sep 17 00:00:00 2001 From: Fabian Jakobs Date: Fri, 28 Nov 2025 10:45:34 +0100 Subject: [PATCH 18/18] MCP: Fix working dir (#4027) ## Changes ## Why ## Tests --- experimental/apps-mcp/cmd/install.go | 2 +- experimental/apps-mcp/lib/prompts/apps.tmpl | 2 +- .../clitools/invoke_databricks_cli.go | 23 +++++++++++-------- .../lib/providers/clitools/provider.go | 7 +++--- 4 files changed, 20 insertions(+), 14 deletions(-) diff --git a/experimental/apps-mcp/cmd/install.go b/experimental/apps-mcp/cmd/install.go index 59f798849c..b75e69224b 100644 --- a/experimental/apps-mcp/cmd/install.go +++ b/experimental/apps-mcp/cmd/install.go @@ -90,7 +90,7 @@ func runInstall(ctx context.Context) error { cmdio.LogString(ctx, "") cmdio.LogString(ctx, "You can now use your coding agent to interact with Databricks.") cmdio.LogString(ctx, "") - cmdio.LogString(ctx, "Try asking: "+color.YellowString("Create an app that calculates taxi trip metrics: average fare by distance bracket and time of day.")) + cmdio.LogString(ctx, "Try asking: "+color.YellowString("Create a Databricks app that calculates taxi trip metrics: average fare by distance bracket and time of day.")) } return nil diff --git a/experimental/apps-mcp/lib/prompts/apps.tmpl b/experimental/apps-mcp/lib/prompts/apps.tmpl index fe684344d2..98c29d0c2a 100644 --- a/experimental/apps-mcp/lib/prompts/apps.tmpl +++ b/experimental/apps-mcp/lib/prompts/apps.tmpl @@ -11,7 +11,7 @@ DATABRICKS APPS DEVELOPMENT # Initialization -⚠️ ALWAYS start by scaffolding a new app using command: +⚠️ ALWAYS start by scaffolding a new Databricks app using command: invoke_databricks_cli 'experimental apps-mcp tools init-template https://github.com/databricks/cli --template-dir experimental/apps-mcp/templates/appkit --branch mcp/appkit-bundle --config-json '{"project_name":"my-app-name","app_description":"my-app-description","sql_warehouse_id":"{{if .WarehouseID}}{{.WarehouseID}}{{end}}"}'' diff --git a/experimental/apps-mcp/lib/providers/clitools/invoke_databricks_cli.go b/experimental/apps-mcp/lib/providers/clitools/invoke_databricks_cli.go index 4f584993ef..7906c43339 100644 --- a/experimental/apps-mcp/lib/providers/clitools/invoke_databricks_cli.go +++ b/experimental/apps-mcp/lib/providers/clitools/invoke_databricks_cli.go @@ -4,26 +4,31 @@ import ( "context" "errors" "fmt" + "os" + "os/exec" "github.com/databricks/cli/experimental/apps-mcp/lib/common" - "github.com/databricks/cli/libs/exec" + "github.com/databricks/cli/experimental/apps-mcp/lib/middlewares" ) // InvokeDatabricksCLI runs a Databricks CLI command and returns the output. -func InvokeDatabricksCLI(ctx context.Context, command string) (string, error) { - if command == "" { +func InvokeDatabricksCLI(ctx context.Context, command []string, workingDirectory string) (string, error) { + if len(command) == 0 { return "", errors.New("command is required") } - executor, err := exec.NewCommandExecutor(".") - if err != nil { - return "", fmt.Errorf("failed to create command executor: %w", err) - } + workspaceClient := middlewares.MustGetDatabricksClient(ctx) + host := workspaceClient.Config.Host // GetCLIPath returns the path to the current CLI executable cliPath := common.GetCLIPath() - fullCommand := fmt.Sprintf(`"%s" %s`, cliPath, command) - output, err := executor.Exec(ctx, fullCommand) + cmd := exec.CommandContext(ctx, cliPath, command...) + cmd.Dir = workingDirectory + env := os.Environ() + env = append(env, "DATABRICKS_HOST="+host) + cmd.Env = env + + output, err := cmd.CombinedOutput() result := string(output) if err != nil { diff --git a/experimental/apps-mcp/lib/providers/clitools/provider.go b/experimental/apps-mcp/lib/providers/clitools/provider.go index b106ae0252..7330f3ac2a 100644 --- a/experimental/apps-mcp/lib/providers/clitools/provider.go +++ b/experimental/apps-mcp/lib/providers/clitools/provider.go @@ -103,7 +103,8 @@ func (p *Provider) RegisterTools(server *mcpsdk.Server) error { // Register invoke_databricks_cli tool type InvokeDatabricksCLIInput struct { - Command string `json:"command" jsonschema:"required" jsonschema_description:"The full Databricks CLI command to run, e.g. 'bundle deploy' or 'bundle validate'. Do not include the 'databricks' prefix."` + WorkingDirectory string `json:"working_directory" jsonschema:"required" jsonschema_description:"The directory to run the command in."` + Args []string `json:"args" jsonschema:"required" jsonschema_description:"The arguments to pass to the Databricks CLI command e.g. ['bundle', 'deploy'] or ['bundle', 'validate']. Do not include the 'databricks' prefix."` } mcpsdk.AddTool(server, @@ -112,8 +113,8 @@ func (p *Provider) RegisterTools(server *mcpsdk.Server) error { Description: "Execute Databricks CLI command. Pass all arguments as a single string.", }, func(ctx context.Context, req *mcpsdk.CallToolRequest, args InvokeDatabricksCLIInput) (*mcpsdk.CallToolResult, any, error) { - log.Debugf(ctx, "invoke_databricks_cli called: command=%s", args.Command) - result, err := InvokeDatabricksCLI(ctx, args.Command) + log.Debugf(ctx, "invoke_databricks_cli called: args=%s, working_directory=%s", args.Args, args.WorkingDirectory) + result, err := InvokeDatabricksCLI(ctx, args.Args, args.WorkingDirectory) if err != nil { return nil, nil, err }