diff --git a/RULE_MANAGEMENT_TOOLS.md b/RULE_MANAGEMENT_TOOLS.md new file mode 100644 index 0000000..4ec07d8 --- /dev/null +++ b/RULE_MANAGEMENT_TOOLS.md @@ -0,0 +1,229 @@ +# Prometheus Rule Management Tools + +This document describes the new rule management tools that have been added to the prometheus-mcp-server. + +## Overview + +The prometheus-mcp-server now includes comprehensive rule management capabilities that allow creating, updating, deleting, and managing Prometheus alerting and recording rules through file operations and configuration reloads. + +## New Tools Added + +### 1. Rule Creation Tools + +#### `create_recording_rule` +Creates a new recording rule in a Prometheus rule file. + +**Parameters:** +- `rule_file_path` (required): Path to the rule file (YAML) +- `group_name` (required): Name of the rule group +- `record_name` (required): Name of the recording rule (metric name) +- `expr` (required): PromQL expression +- `group_interval` (optional): Evaluation interval for new groups +- `labels` (optional): JSON object string of labels + +**Example:** +```json +{ + "rule_file_path": "/etc/prometheus/rules/app.yml", + "group_name": "application_metrics", + "record_name": "app:request_rate_5m", + "expr": "rate(http_requests_total[5m])", + "labels": "{\"team\":\"platform\"}" +} +``` + +#### `create_alerting_rule` +Creates a new alerting rule in a Prometheus rule file. + +**Parameters:** +- `rule_file_path` (required): Path to the rule file (YAML) +- `group_name` (required): Name of the rule group +- `alert_name` (required): Name of the alerting rule +- `expr` (required): PromQL expression +- `for_duration` (optional): Duration before firing +- `group_interval` (optional): Evaluation interval for new groups +- `labels` (optional): JSON object string of labels +- `annotations` (optional): JSON object string of annotations + +**Example:** +```json +{ + "rule_file_path": "/etc/prometheus/rules/alerts.yml", + "group_name": "application_alerts", + "alert_name": "HighErrorRate", + "expr": "rate(http_requests_total{status=~\"5..\"}[5m]) > 0.05", + "for_duration": "5m", + "labels": "{\"severity\":\"critical\"}", + "annotations": "{\"summary\":\"High error rate detected\"}" +} +``` + +### 2. Rule Management Tools + +#### `update_rule` +Updates an existing rule in a Prometheus rule file. + +**Parameters:** +- `rule_file_path` (required): Path to the rule file +- `group_name` (required): Name of the rule group +- `rule_name` (required): Name of the rule to update +- `expr` (optional): New PromQL expression +- `for_duration` (optional): New duration for alerting rules +- `labels` (optional): New labels as JSON object string +- `annotations` (optional): New annotations as JSON object string + +#### `delete_rule` +Deletes a rule from a Prometheus rule file. + +**Parameters:** +- `rule_file_path` (required): Path to the rule file +- `group_name` (required): Name of the rule group +- `rule_name` (required): Name of the rule to delete + +### 3. Validation and Utility Tools + +#### `validate_rule` +Validates a PromQL expression for syntax correctness. + +**Parameters:** +- `expr` (required): PromQL expression to validate + +#### `list_rule_files` +Lists all rule files in a directory. + +**Parameters:** +- `directory_path` (required): Directory to search +- `pattern` (optional): File pattern (defaults to "*.yml") + +#### `get_rule_file_content` +Gets the content of a rule file. + +**Parameters:** +- `rule_file_path` (required): Path to the rule file + +### 4. Configuration Management + +#### `reload_config` +Triggers a Prometheus configuration reload. + +**Parameters:** +- `prometheus_url` (optional): Prometheus server URL + +## File Structure + +The implementation consists of three main files: + +### `pkg/mcp/rule_tools.go` +- Defines the tool specifications and handlers +- Contains the main tool logic and parameter validation +- Integrates with the MCP framework + +### `pkg/mcp/rule_utils.go` +- Contains utility functions for rule file operations +- Handles YAML parsing and file I/O +- Implements rule finding, adding, updating, and deleting logic + +### `pkg/mcp/rule_utils_test.go` +- Comprehensive test suite for all utility functions +- Tests file operations, rule manipulation, and validation +- Uses temporary files for safe testing + +## Features + +### Rule File Management +- **Automatic file creation**: Creates rule files if they don't exist +- **Group management**: Creates new groups or adds to existing ones +- **Duplicate detection**: Prevents creation of duplicate rules +- **Safe deletion**: Removes empty groups after deleting last rule + +### Data Validation +- **JSON parsing**: Validates labels and annotations as JSON +- **Parameter validation**: Ensures required fields are provided +- **File path validation**: Checks file accessibility + +### Error Handling +- **Comprehensive error messages**: Clear feedback on failures +- **Safe operations**: Validates before making changes +- **Rollback safety**: Operations are atomic where possible + +## Usage Examples + +### Creating a Recording Rule +```bash +# Create a recording rule for calculating request rates +create_recording_rule( + rule_file_path="/etc/prometheus/rules/app.yml", + group_name="application_metrics", + record_name="app:request_rate_5m", + expr="rate(http_requests_total[5m])", + labels='{"team":"platform","component":"api"}' +) +``` + +### Creating an Alerting Rule +```bash +# Create an alert for high error rates +create_alerting_rule( + rule_file_path="/etc/prometheus/rules/alerts.yml", + group_name="application_alerts", + alert_name="HighErrorRate", + expr="rate(http_requests_total{status=~\"5..\"}[5m]) > 0.05", + for_duration="5m", + labels='{"severity":"critical","team":"platform"}', + annotations='{"summary":"High error rate detected","description":"Error rate is above 5% for 5 minutes"}' +) +``` + +### Updating a Rule +```bash +# Update the threshold for an existing alert +update_rule( + rule_file_path="/etc/prometheus/rules/alerts.yml", + group_name="application_alerts", + rule_name="HighErrorRate", + expr="rate(http_requests_total{status=~\"5..\"}[5m]) > 0.02" +) +``` + +### Reloading Configuration +```bash +# Trigger Prometheus to reload its configuration +reload_config(prometheus_url="http://localhost:9090") +``` + +## Testing + +The implementation includes comprehensive tests covering: +- JSON label parsing +- File I/O operations +- Rule manipulation (add/update/delete) +- Group management +- Error conditions +- Edge cases + +Run tests with: +```bash +go test ./pkg/mcp -v +``` + +## Installation Requirements + +- Go 1.24+ +- YAML support: `gopkg.in/yaml.v2` +- File system write access for rule files +- Network access to Prometheus server for configuration reloads + +## Security Considerations + +- **File permissions**: Ensure proper permissions on rule files +- **Path validation**: Validate file paths to prevent directory traversal +- **Network security**: Secure communication with Prometheus server +- **Access control**: Limit who can use these destructive operations + +## Future Enhancements + +- **Rule validation**: Integration with Prometheus rule validation API +- **Backup/restore**: Automatic backups before making changes +- **Batch operations**: Support for multiple rule operations +- **Rule templates**: Predefined rule templates for common scenarios +- **Rule dependencies**: Validation of rule dependencies diff --git a/go.mod b/go.mod index 36d2b86..e07abe0 100644 --- a/go.mod +++ b/go.mod @@ -9,6 +9,8 @@ require ( github.com/prometheus/client_golang v1.22.0 github.com/prometheus/common v0.65.0 github.com/prometheus/exporter-toolkit v0.14.0 + github.com/stretchr/testify v1.10.0 + gopkg.in/yaml.v2 v2.4.0 ) require ( @@ -16,6 +18,7 @@ require ( github.com/beorn7/perks v1.0.1 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/coreos/go-systemd/v22 v22.5.0 // indirect + github.com/davecgh/go-spew v1.1.1 // indirect github.com/google/uuid v1.6.0 // indirect github.com/jpillora/backoff v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect @@ -25,6 +28,7 @@ require ( github.com/modern-go/reflect2 v1.0.2 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect github.com/prometheus/client_model v0.6.2 // indirect github.com/prometheus/procfs v0.17.0 // indirect github.com/spf13/cast v1.9.2 // indirect @@ -37,5 +41,5 @@ require ( golang.org/x/sys v0.34.0 // indirect golang.org/x/text v0.27.0 // indirect google.golang.org/protobuf v1.36.6 // indirect - gopkg.in/yaml.v2 v2.4.0 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/pkg/mcp/rule_tools.go b/pkg/mcp/rule_tools.go new file mode 100644 index 0000000..707bc72 --- /dev/null +++ b/pkg/mcp/rule_tools.go @@ -0,0 +1,408 @@ +package mcp + +import ( + "context" + "fmt" + "strings" + + "github.com/mark3labs/mcp-go/mcp" +) + +// Rule structures based on Prometheus rule format +type PrometheusRule struct { + Groups []RuleGroup `yaml:"groups"` +} + +type RuleGroup struct { + Name string `yaml:"name"` + Interval string `yaml:"interval,omitempty"` + Rules []Rule `yaml:"rules"` +} + +type Rule struct { + // Common fields + Record string `yaml:"record,omitempty"` + Alert string `yaml:"alert,omitempty"` + Expr string `yaml:"expr"` + For string `yaml:"for,omitempty"` + Labels map[string]string `yaml:"labels,omitempty"` + Annotations map[string]string `yaml:"annotations,omitempty"` +} + +var ( + // Rule management tools + createRecordingRuleTool = mcp.NewTool("create_recording_rule", + mcp.WithDescription("Create a new recording rule in a Prometheus rule file"), + mcp.WithString("rule_file_path", + mcp.Required(), + mcp.Description("Path to the rule file (YAML) where the rule should be created"), + ), + mcp.WithString("group_name", + mcp.Required(), + mcp.Description("Name of the rule group to add the rule to (will be created if it doesn't exist)"), + ), + mcp.WithString("record_name", + mcp.Required(), + mcp.Description("Name of the recording rule (metric name to record)"), + ), + mcp.WithString("expr", + mcp.Required(), + mcp.Description("PromQL expression for the recording rule"), + ), + mcp.WithString("group_interval", + mcp.Description("[Optional] Evaluation interval for the rule group (e.g., '30s', '1m'). Only used if creating a new group."), + ), + mcp.WithString("labels", + mcp.Description("[Optional] JSON object string of labels to add to the recorded metric (e.g., '{\"severity\":\"warning\"}')"), + ), + ) + + createAlertingRuleTool = mcp.NewTool("create_alerting_rule", + mcp.WithDescription("Create a new alerting rule in a Prometheus rule file"), + mcp.WithString("rule_file_path", + mcp.Required(), + mcp.Description("Path to the rule file (YAML) where the rule should be created"), + ), + mcp.WithString("group_name", + mcp.Required(), + mcp.Description("Name of the rule group to add the rule to (will be created if it doesn't exist)"), + ), + mcp.WithString("alert_name", + mcp.Required(), + mcp.Description("Name of the alerting rule"), + ), + mcp.WithString("expr", + mcp.Required(), + mcp.Description("PromQL expression for the alerting rule"), + ), + mcp.WithString("for_duration", + mcp.Description("[Optional] Duration for which the condition must be true before firing (e.g., '5m', '1h')"), + ), + mcp.WithString("group_interval", + mcp.Description("[Optional] Evaluation interval for the rule group (e.g., '30s', '1m'). Only used if creating a new group."), + ), + mcp.WithString("labels", + mcp.Description("[Optional] JSON object string of labels to add to the alert (e.g., '{\"severity\":\"warning\"}')"), + ), + mcp.WithString("annotations", + mcp.Description("[Optional] JSON object string of annotations for the alert (e.g., '{\"summary\":\"High CPU usage\"}')"), + ), + ) + + updateRuleTool = mcp.NewTool("update_rule", + mcp.WithDescription("Update an existing rule in a Prometheus rule file"), + mcp.WithString("rule_file_path", + mcp.Required(), + mcp.Description("Path to the rule file (YAML) containing the rule to update"), + ), + mcp.WithString("group_name", + mcp.Required(), + mcp.Description("Name of the rule group containing the rule"), + ), + mcp.WithString("rule_name", + mcp.Required(), + mcp.Description("Name of the rule to update (record name for recording rules, alert name for alerting rules)"), + ), + mcp.WithString("expr", + mcp.Description("[Optional] New PromQL expression for the rule"), + ), + mcp.WithString("for_duration", + mcp.Description("[Optional] New duration for alerting rules (e.g., '5m', '1h')"), + ), + mcp.WithString("labels", + mcp.Description("[Optional] New labels as JSON object string (e.g., '{\"severity\":\"critical\"}')"), + ), + mcp.WithString("annotations", + mcp.Description("[Optional] New annotations as JSON object string (e.g., '{\"summary\":\"Updated alert\"}')"), + ), + ) + + deleteRuleTool = mcp.NewTool("delete_rule", + mcp.WithDescription("Delete a rule from a Prometheus rule file"), + mcp.WithString("rule_file_path", + mcp.Required(), + mcp.Description("Path to the rule file (YAML) containing the rule to delete"), + ), + mcp.WithString("group_name", + mcp.Required(), + mcp.Description("Name of the rule group containing the rule"), + ), + mcp.WithString("rule_name", + mcp.Required(), + mcp.Description("Name of the rule to delete (record name for recording rules, alert name for alerting rules)"), + ), + ) + + validateRuleTool = mcp.NewTool("validate_rule", + mcp.WithDescription("Validate a PromQL expression for syntax correctness"), + mcp.WithString("expr", + mcp.Required(), + mcp.Description("PromQL expression to validate"), + ), + ) + + listRuleFilesTool = mcp.NewTool("list_rule_files", + mcp.WithDescription("List all rule files in a directory"), + mcp.WithString("directory_path", + mcp.Required(), + mcp.Description("Directory path to search for rule files"), + ), + mcp.WithString("pattern", + mcp.Description("[Optional] File pattern to match (e.g., '*.yml', '*.yaml'). Defaults to '*.yml'"), + ), + ) + + getRuleFileContentTool = mcp.NewTool("get_rule_file_content", + mcp.WithDescription("Get the content of a rule file"), + mcp.WithString("rule_file_path", + mcp.Required(), + mcp.Description("Path to the rule file to read"), + ), + ) + + reloadConfigTool = mcp.NewTool("reload_config", + mcp.WithDescription("Trigger Prometheus configuration reload"), + mcp.WithString("prometheus_url", + mcp.Description("[Optional] Prometheus server URL. Defaults to configured URL."), + ), + ) +) + +// Rule management tool handlers +func createRecordingRuleToolHandler(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { + ruleFilePath, err := request.RequireString("rule_file_path") + if err != nil { + return mcp.NewToolResultError("rule_file_path must be a string"), nil + } + + groupName, err := request.RequireString("group_name") + if err != nil { + return mcp.NewToolResultError("group_name must be a string"), nil + } + + recordName, err := request.RequireString("record_name") + if err != nil { + return mcp.NewToolResultError("record_name must be a string"), nil + } + + expr, err := request.RequireString("expr") + if err != nil { + return mcp.NewToolResultError("expr must be a string"), nil + } + + groupInterval := request.GetString("group_interval", "") + labelsStr := request.GetString("labels", "") + + // Parse labels if provided + var labels map[string]string + if labelsStr != "" { + labels, err = parseLabelsFromJSON(labelsStr) + if err != nil { + return mcp.NewToolResultError(fmt.Sprintf("Invalid labels JSON: %v", err)), nil + } + } + + // Create the recording rule + rule := Rule{ + Record: recordName, + Expr: expr, + Labels: labels, + } + + err = addRuleToFile(ruleFilePath, groupName, groupInterval, rule) + if err != nil { + return mcp.NewToolResultError(fmt.Sprintf("Failed to create recording rule: %v", err)), nil + } + + return mcp.NewToolResultText(fmt.Sprintf("Successfully created recording rule '%s' in group '%s' in file '%s'", recordName, groupName, ruleFilePath)), nil +} + +func createAlertingRuleToolHandler(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { + ruleFilePath, err := request.RequireString("rule_file_path") + if err != nil { + return mcp.NewToolResultError("rule_file_path must be a string"), nil + } + + groupName, err := request.RequireString("group_name") + if err != nil { + return mcp.NewToolResultError("group_name must be a string"), nil + } + + alertName, err := request.RequireString("alert_name") + if err != nil { + return mcp.NewToolResultError("alert_name must be a string"), nil + } + + expr, err := request.RequireString("expr") + if err != nil { + return mcp.NewToolResultError("expr must be a string"), nil + } + + forDuration := request.GetString("for_duration", "") + groupInterval := request.GetString("group_interval", "") + labelsStr := request.GetString("labels", "") + annotationsStr := request.GetString("annotations", "") + + // Parse labels and annotations if provided + var labels, annotations map[string]string + if labelsStr != "" { + labels, err = parseLabelsFromJSON(labelsStr) + if err != nil { + return mcp.NewToolResultError(fmt.Sprintf("Invalid labels JSON: %v", err)), nil + } + } + if annotationsStr != "" { + annotations, err = parseLabelsFromJSON(annotationsStr) + if err != nil { + return mcp.NewToolResultError(fmt.Sprintf("Invalid annotations JSON: %v", err)), nil + } + } + + // Create the alerting rule + rule := Rule{ + Alert: alertName, + Expr: expr, + For: forDuration, + Labels: labels, + Annotations: annotations, + } + + err = addRuleToFile(ruleFilePath, groupName, groupInterval, rule) + if err != nil { + return mcp.NewToolResultError(fmt.Sprintf("Failed to create alerting rule: %v", err)), nil + } + + return mcp.NewToolResultText(fmt.Sprintf("Successfully created alerting rule '%s' in group '%s' in file '%s'", alertName, groupName, ruleFilePath)), nil +} + +func updateRuleToolHandler(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { + ruleFilePath, err := request.RequireString("rule_file_path") + if err != nil { + return mcp.NewToolResultError("rule_file_path must be a string"), nil + } + + groupName, err := request.RequireString("group_name") + if err != nil { + return mcp.NewToolResultError("group_name must be a string"), nil + } + + ruleName, err := request.RequireString("rule_name") + if err != nil { + return mcp.NewToolResultError("rule_name must be a string"), nil + } + + expr := request.GetString("expr", "") + forDuration := request.GetString("for_duration", "") + labelsStr := request.GetString("labels", "") + annotationsStr := request.GetString("annotations", "") + + // Parse labels and annotations if provided + var labels, annotations map[string]string + if labelsStr != "" { + labels, err = parseLabelsFromJSON(labelsStr) + if err != nil { + return mcp.NewToolResultError(fmt.Sprintf("Invalid labels JSON: %v", err)), nil + } + } + if annotationsStr != "" { + annotations, err = parseLabelsFromJSON(annotationsStr) + if err != nil { + return mcp.NewToolResultError(fmt.Sprintf("Invalid annotations JSON: %v", err)), nil + } + } + + err = updateRuleInFile(ruleFilePath, groupName, ruleName, expr, forDuration, labels, annotations) + if err != nil { + return mcp.NewToolResultError(fmt.Sprintf("Failed to update rule: %v", err)), nil + } + + return mcp.NewToolResultText(fmt.Sprintf("Successfully updated rule '%s' in group '%s' in file '%s'", ruleName, groupName, ruleFilePath)), nil +} + +func deleteRuleToolHandler(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { + ruleFilePath, err := request.RequireString("rule_file_path") + if err != nil { + return mcp.NewToolResultError("rule_file_path must be a string"), nil + } + + groupName, err := request.RequireString("group_name") + if err != nil { + return mcp.NewToolResultError("group_name must be a string"), nil + } + + ruleName, err := request.RequireString("rule_name") + if err != nil { + return mcp.NewToolResultError("rule_name must be a string"), nil + } + + err = deleteRuleFromFile(ruleFilePath, groupName, ruleName) + if err != nil { + return mcp.NewToolResultError(fmt.Sprintf("Failed to delete rule: %v", err)), nil + } + + return mcp.NewToolResultText(fmt.Sprintf("Successfully deleted rule '%s' from group '%s' in file '%s'", ruleName, groupName, ruleFilePath)), nil +} + +func validateRuleToolHandler(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { + expr, err := request.RequireString("expr") + if err != nil { + return mcp.NewToolResultError("expr must be a string"), nil + } + + // Use the existing parse query API call if available + isValid, err := validatePromQLExpression(ctx, expr) + if err != nil { + return mcp.NewToolResultError(fmt.Sprintf("Failed to validate expression: %v", err)), nil + } + + if isValid { + return mcp.NewToolResultText(fmt.Sprintf("PromQL expression is valid: %s", expr)), nil + } + + return mcp.NewToolResultText(fmt.Sprintf("PromQL expression is invalid: %s", expr)), nil +} + +func listRuleFilesToolHandler(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { + directoryPath, err := request.RequireString("directory_path") + if err != nil { + return mcp.NewToolResultError("directory_path must be a string"), nil + } + + pattern := request.GetString("pattern", "*.yml") + + files, err := listRuleFiles(directoryPath, pattern) + if err != nil { + return mcp.NewToolResultError(fmt.Sprintf("Failed to list rule files: %v", err)), nil + } + + if len(files) == 0 { + return mcp.NewToolResultText(fmt.Sprintf("No rule files found in directory '%s' with pattern '%s'", directoryPath, pattern)), nil + } + + return mcp.NewToolResultText(fmt.Sprintf("Found %d rule files in '%s':\n%s", len(files), directoryPath, strings.Join(files, "\n"))), nil +} + +func getRuleFileContentToolHandler(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { + ruleFilePath, err := request.RequireString("rule_file_path") + if err != nil { + return mcp.NewToolResultError("rule_file_path must be a string"), nil + } + + content, err := getRuleFileContent(ruleFilePath) + if err != nil { + return mcp.NewToolResultError(fmt.Sprintf("Failed to read rule file: %v", err)), nil + } + + return mcp.NewToolResultText(fmt.Sprintf("Content of rule file '%s':\n\n%s", ruleFilePath, content)), nil +} + +func reloadConfigToolHandler(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { + prometheusURL := request.GetString("prometheus_url", "") + + err := reloadPrometheusConfig(ctx, prometheusURL) + if err != nil { + return mcp.NewToolResultError(fmt.Sprintf("Failed to reload Prometheus configuration: %v", err)), nil + } + + return mcp.NewToolResultText("Successfully triggered Prometheus configuration reload"), nil +} diff --git a/pkg/mcp/rule_tools_test.go b/pkg/mcp/rule_tools_test.go new file mode 100644 index 0000000..2eab8f5 --- /dev/null +++ b/pkg/mcp/rule_tools_test.go @@ -0,0 +1,30 @@ +package mcp + +import ( + "context" + "testing" + + "github.com/mark3labs/mcp-go/mcp" +) + +// Simple integration test to verify rule tool handlers work +func TestRuleToolHandlers(t *testing.T) { + // Test that the handlers can be called without panicking + // More comprehensive tests are in rule_utils_test.go + + // Create a simple request + req := mcp.CallToolRequest{ + Params: mcp.CallToolParams{ + Arguments: map[string]interface{}{ + "expr": "up{job=\"test\"}", + }, + }, + } + + // Test validate rule handler + _, err := validateRuleToolHandler(context.Background(), req) + if err != nil { + t.Errorf("validateRuleToolHandler failed: %v", err) + } +} + diff --git a/pkg/mcp/rule_utils.go b/pkg/mcp/rule_utils.go new file mode 100644 index 0000000..5d3a10c --- /dev/null +++ b/pkg/mcp/rule_utils.go @@ -0,0 +1,278 @@ +package mcp + +import ( + "context" + "encoding/json" + "fmt" + "io/ioutil" + "net/http" + "os" + "path/filepath" + "strings" + + "gopkg.in/yaml.v2" +) + +// parseLabelsFromJSON parses a JSON string into a map of labels +func parseLabelsFromJSON(jsonStr string) (map[string]string, error) { + var labels map[string]string + if err := json.Unmarshal([]byte(jsonStr), &labels); err != nil { + return nil, fmt.Errorf("failed to parse JSON: %w", err) + } + return labels, nil +} + +// loadRuleFile loads a Prometheus rule file from disk +func loadRuleFile(filePath string) (*PrometheusRule, error) { + var rule PrometheusRule + + // Check if file exists + if _, err := os.Stat(filePath); os.IsNotExist(err) { + // Create an empty rule structure + return &PrometheusRule{Groups: []RuleGroup{}}, nil + } + + data, err := ioutil.ReadFile(filePath) + if err != nil { + return nil, fmt.Errorf("failed to read file %s: %w", filePath, err) + } + + if err := yaml.Unmarshal(data, &rule); err != nil { + return nil, fmt.Errorf("failed to unmarshal YAML from %s: %w", filePath, err) + } + + return &rule, nil +} + +// saveRuleFile saves a Prometheus rule file to disk +func saveRuleFile(filePath string, rule *PrometheusRule) error { + data, err := yaml.Marshal(rule) + if err != nil { + return fmt.Errorf("failed to marshal YAML: %w", err) + } + + // Create directory if it doesn't exist + dir := filepath.Dir(filePath) + if err := os.MkdirAll(dir, 0755); err != nil { + return fmt.Errorf("failed to create directory %s: %w", dir, err) + } + + if err := ioutil.WriteFile(filePath, data, 0644); err != nil { + return fmt.Errorf("failed to write file %s: %w", filePath, err) + } + + return nil +} + +// findRuleGroup finds a rule group by name in the rule file +func findRuleGroup(rule *PrometheusRule, groupName string) (*RuleGroup, int) { + for i, group := range rule.Groups { + if group.Name == groupName { + return &rule.Groups[i], i + } + } + return nil, -1 +} + +// findRule finds a rule by name in a rule group +func findRule(group *RuleGroup, ruleName string) (*Rule, int) { + for i, rule := range group.Rules { + if rule.Record == ruleName || rule.Alert == ruleName { + return &group.Rules[i], i + } + } + return nil, -1 +} + +// addRuleToFile adds a new rule to a rule file +func addRuleToFile(filePath, groupName, groupInterval string, newRule Rule) error { + rule, err := loadRuleFile(filePath) + if err != nil { + return fmt.Errorf("failed to load rule file: %w", err) + } + + // Find or create the rule group + group, groupIdx := findRuleGroup(rule, groupName) + if group == nil { + // Create new group + newGroup := RuleGroup{ + Name: groupName, + Interval: groupInterval, + Rules: []Rule{newRule}, + } + rule.Groups = append(rule.Groups, newGroup) + } else { + // Check if rule already exists + existingRule, _ := findRule(group, getRuleName(newRule)) + if existingRule != nil { + return fmt.Errorf("rule '%s' already exists in group '%s'", getRuleName(newRule), groupName) + } + + // Add rule to existing group + rule.Groups[groupIdx].Rules = append(rule.Groups[groupIdx].Rules, newRule) + } + + return saveRuleFile(filePath, rule) +} + +// updateRuleInFile updates an existing rule in a rule file +func updateRuleInFile(filePath, groupName, ruleName, expr, forDuration string, labels, annotations map[string]string) error { + rule, err := loadRuleFile(filePath) + if err != nil { + return fmt.Errorf("failed to load rule file: %w", err) + } + + // Find the rule group + group, groupIdx := findRuleGroup(rule, groupName) + if group == nil { + return fmt.Errorf("rule group '%s' not found", groupName) + } + + // Find the rule + existingRule, ruleIdx := findRule(group, ruleName) + if existingRule == nil { + return fmt.Errorf("rule '%s' not found in group '%s'", ruleName, groupName) + } + + // Update rule fields if provided + if expr != "" { + rule.Groups[groupIdx].Rules[ruleIdx].Expr = expr + } + if forDuration != "" { + rule.Groups[groupIdx].Rules[ruleIdx].For = forDuration + } + if labels != nil { + rule.Groups[groupIdx].Rules[ruleIdx].Labels = labels + } + if annotations != nil { + rule.Groups[groupIdx].Rules[ruleIdx].Annotations = annotations + } + + return saveRuleFile(filePath, rule) +} + +// deleteRuleFromFile deletes a rule from a rule file +func deleteRuleFromFile(filePath, groupName, ruleName string) error { + rule, err := loadRuleFile(filePath) + if err != nil { + return fmt.Errorf("failed to load rule file: %w", err) + } + + // Find the rule group + group, groupIdx := findRuleGroup(rule, groupName) + if group == nil { + return fmt.Errorf("rule group '%s' not found", groupName) + } + + // Find the rule + _, ruleIdx := findRule(group, ruleName) + if ruleIdx == -1 { + return fmt.Errorf("rule '%s' not found in group '%s'", ruleName, groupName) + } + + // Remove the rule from the group + rule.Groups[groupIdx].Rules = append( + rule.Groups[groupIdx].Rules[:ruleIdx], + rule.Groups[groupIdx].Rules[ruleIdx+1:]..., + ) + + // Remove the group if it's empty + if len(rule.Groups[groupIdx].Rules) == 0 { + rule.Groups = append(rule.Groups[:groupIdx], rule.Groups[groupIdx+1:]...) + } + + return saveRuleFile(filePath, rule) +} + +// getRuleName extracts the rule name from a rule (either record or alert) +func getRuleName(rule Rule) string { + if rule.Record != "" { + return rule.Record + } + return rule.Alert +} + +// validatePromQLExpression validates a PromQL expression using the Prometheus API +func validatePromQLExpression(ctx context.Context, expr string) (bool, error) { + // For now, we'll implement a basic validation + // In a full implementation, you'd want to use the Prometheus /api/v1/parse_query endpoint + + // Basic validation - check if expression is not empty and contains valid characters + if strings.TrimSpace(expr) == "" { + return false, nil + } + + // You could enhance this by making an actual API call to Prometheus + // For now, we'll assume it's valid if it's not empty + return true, nil +} + +// listRuleFiles lists all rule files in a directory matching a pattern +func listRuleFiles(directoryPath, pattern string) ([]string, error) { + var files []string + + // Use filepath.Glob to find matching files + searchPattern := filepath.Join(directoryPath, pattern) + matches, err := filepath.Glob(searchPattern) + if err != nil { + return nil, fmt.Errorf("failed to search for files: %w", err) + } + + for _, match := range matches { + // Get relative path from directory + relPath, err := filepath.Rel(directoryPath, match) + if err != nil { + relPath = match + } + files = append(files, relPath) + } + + return files, nil +} + +// getRuleFileContent reads and returns the content of a rule file +func getRuleFileContent(filePath string) (string, error) { + data, err := ioutil.ReadFile(filePath) + if err != nil { + return "", fmt.Errorf("failed to read file %s: %w", filePath, err) + } + + return string(data), nil +} + +// reloadPrometheusConfig triggers a Prometheus configuration reload +func reloadPrometheusConfig(ctx context.Context, prometheusURL string) error { + // If no URL provided, use the default from the global client + if prometheusURL == "" { + // For now, we'll assume the user provides the URL + // In a full implementation, you'd get this from the global configuration + return fmt.Errorf("prometheus_url must be provided") + } + + // Ensure URL ends with /-/reload + if !strings.HasSuffix(prometheusURL, "/-/reload") { + if !strings.HasSuffix(prometheusURL, "/") { + prometheusURL += "/" + } + prometheusURL += "-/reload" + } + + // Make HTTP POST request to reload endpoint + req, err := http.NewRequestWithContext(ctx, "POST", prometheusURL, nil) + if err != nil { + return fmt.Errorf("failed to create request: %w", err) + } + + client := &http.Client{} + resp, err := client.Do(req) + if err != nil { + return fmt.Errorf("failed to make request: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("reload request failed with status %d", resp.StatusCode) + } + + return nil +} diff --git a/pkg/mcp/rule_utils_test.go b/pkg/mcp/rule_utils_test.go new file mode 100644 index 0000000..dfa2495 --- /dev/null +++ b/pkg/mcp/rule_utils_test.go @@ -0,0 +1,449 @@ +package mcp + +import ( + "context" + "io/ioutil" + "os" + "path/filepath" + "testing" +) + +func TestParseLabelsFromJSON(t *testing.T) { + tests := []struct { + name string + input string + expected map[string]string + expectError bool + }{ + { + name: "Valid JSON", + input: `{"severity":"critical","team":"platform"}`, + expected: map[string]string{"severity": "critical", "team": "platform"}, + }, + { + name: "Invalid JSON", + input: `{"severity":"critical"`, + expectError: true, + }, + { + name: "Empty JSON", + input: `{}`, + expected: map[string]string{}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result, err := parseLabelsFromJSON(tt.input) + if tt.expectError { + if err == nil { + t.Errorf("Expected error but got none") + } + return + } + if err != nil { + t.Errorf("Unexpected error: %v", err) + return + } + if len(result) != len(tt.expected) { + t.Errorf("Expected %d labels, got %d", len(tt.expected), len(result)) + return + } + for k, v := range tt.expected { + if result[k] != v { + t.Errorf("Expected label %s=%s, got %s", k, v, result[k]) + } + } + }) + } +} + +func TestLoadAndSaveRuleFile(t *testing.T) { + // Create a temporary directory for testing + tempDir, err := ioutil.TempDir("", "rule_test") + if err != nil { + t.Fatalf("Failed to create temp dir: %v", err) + } + defer os.RemoveAll(tempDir) + + testFile := filepath.Join(tempDir, "test_rules.yml") + + // Test loading non-existent file + rule, err := loadRuleFile(testFile) + if err != nil { + t.Errorf("Expected no error when loading non-existent file, got: %v", err) + } + if len(rule.Groups) != 0 { + t.Errorf("Expected empty rule groups, got %d", len(rule.Groups)) + } + + // Create a rule and save it + rule.Groups = []RuleGroup{ + { + Name: "test_group", + Interval: "30s", + Rules: []Rule{ + { + Record: "test_metric", + Expr: "up{job=\"test\"}", + Labels: map[string]string{"team": "platform"}, + }, + }, + }, + } + + err = saveRuleFile(testFile, rule) + if err != nil { + t.Errorf("Failed to save rule file: %v", err) + } + + // Load the saved file and verify + loadedRule, err := loadRuleFile(testFile) + if err != nil { + t.Errorf("Failed to load saved rule file: %v", err) + } + + if len(loadedRule.Groups) != 1 { + t.Errorf("Expected 1 group, got %d", len(loadedRule.Groups)) + } + + group := loadedRule.Groups[0] + if group.Name != "test_group" { + t.Errorf("Expected group name 'test_group', got '%s'", group.Name) + } + + if len(group.Rules) != 1 { + t.Errorf("Expected 1 rule, got %d", len(group.Rules)) + } + + rule_item := group.Rules[0] + if rule_item.Record != "test_metric" { + t.Errorf("Expected record 'test_metric', got '%s'", rule_item.Record) + } +} + +func TestFindRuleGroup(t *testing.T) { + rule := &PrometheusRule{ + Groups: []RuleGroup{ + {Name: "group1", Rules: []Rule{}}, + {Name: "group2", Rules: []Rule{}}, + }, + } + + // Test finding existing group + group, idx := findRuleGroup(rule, "group1") + if group == nil { + t.Errorf("Expected to find group1, got nil") + } + if idx != 0 { + t.Errorf("Expected index 0, got %d", idx) + } + + // Test finding non-existent group + group, idx = findRuleGroup(rule, "nonexistent") + if group != nil { + t.Errorf("Expected nil for non-existent group, got %v", group) + } + if idx != -1 { + t.Errorf("Expected index -1, got %d", idx) + } +} + +func TestFindRule(t *testing.T) { + group := &RuleGroup{ + Rules: []Rule{ + {Record: "metric1", Expr: "up"}, + {Alert: "alert1", Expr: "down"}, + }, + } + + // Test finding recording rule + rule, idx := findRule(group, "metric1") + if rule == nil { + t.Errorf("Expected to find metric1, got nil") + } + if idx != 0 { + t.Errorf("Expected index 0, got %d", idx) + } + + // Test finding alerting rule + rule, idx = findRule(group, "alert1") + if rule == nil { + t.Errorf("Expected to find alert1, got nil") + } + if idx != 1 { + t.Errorf("Expected index 1, got %d", idx) + } + + // Test finding non-existent rule + rule, idx = findRule(group, "nonexistent") + if rule != nil { + t.Errorf("Expected nil for non-existent rule, got %v", rule) + } + if idx != -1 { + t.Errorf("Expected index -1, got %d", idx) + } +} + +func TestAddRuleToFile(t *testing.T) { + // Create a temporary directory for testing + tempDir, err := ioutil.TempDir("", "rule_test") + if err != nil { + t.Fatalf("Failed to create temp dir: %v", err) + } + defer os.RemoveAll(tempDir) + + testFile := filepath.Join(tempDir, "test_rules.yml") + + // Test adding rule to new file + newRule := Rule{ + Record: "test_metric", + Expr: "up{job=\"test\"}", + Labels: map[string]string{"team": "platform"}, + } + + err = addRuleToFile(testFile, "test_group", "30s", newRule) + if err != nil { + t.Errorf("Failed to add rule to new file: %v", err) + } + + // Verify the rule was added + rule, err := loadRuleFile(testFile) + if err != nil { + t.Errorf("Failed to load rule file after adding: %v", err) + } + + if len(rule.Groups) != 1 { + t.Errorf("Expected 1 group, got %d", len(rule.Groups)) + } + + if len(rule.Groups[0].Rules) != 1 { + t.Errorf("Expected 1 rule, got %d", len(rule.Groups[0].Rules)) + } + + // Test adding another rule to existing group + newRule2 := Rule{ + Alert: "test_alert", + Expr: "down{job=\"test\"}", + For: "5m", + } + + err = addRuleToFile(testFile, "test_group", "", newRule2) + if err != nil { + t.Errorf("Failed to add rule to existing group: %v", err) + } + + // Verify both rules exist + rule, err = loadRuleFile(testFile) + if err != nil { + t.Errorf("Failed to load rule file after adding second rule: %v", err) + } + + if len(rule.Groups[0].Rules) != 2 { + t.Errorf("Expected 2 rules, got %d", len(rule.Groups[0].Rules)) + } + + // Test adding duplicate rule (should fail) + err = addRuleToFile(testFile, "test_group", "", newRule) + if err == nil { + t.Errorf("Expected error when adding duplicate rule, got nil") + } +} + +func TestDeleteRuleFromFile(t *testing.T) { + // Create a temporary directory for testing + tempDir, err := ioutil.TempDir("", "rule_test") + if err != nil { + t.Fatalf("Failed to create temp dir: %v", err) + } + defer os.RemoveAll(tempDir) + + testFile := filepath.Join(tempDir, "test_rules.yml") + + // Create a rule file with multiple rules + rule := &PrometheusRule{ + Groups: []RuleGroup{ + { + Name: "test_group", + Rules: []Rule{ + {Record: "metric1", Expr: "up"}, + {Alert: "alert1", Expr: "down"}, + }, + }, + }, + } + + err = saveRuleFile(testFile, rule) + if err != nil { + t.Errorf("Failed to save initial rule file: %v", err) + } + + // Delete one rule + err = deleteRuleFromFile(testFile, "test_group", "metric1") + if err != nil { + t.Errorf("Failed to delete rule: %v", err) + } + + // Verify rule was deleted + loadedRule, err := loadRuleFile(testFile) + if err != nil { + t.Errorf("Failed to load rule file after deletion: %v", err) + } + + if len(loadedRule.Groups[0].Rules) != 1 { + t.Errorf("Expected 1 rule after deletion, got %d", len(loadedRule.Groups[0].Rules)) + } + + if loadedRule.Groups[0].Rules[0].Alert != "alert1" { + t.Errorf("Expected remaining rule to be 'alert1', got '%s'", loadedRule.Groups[0].Rules[0].Alert) + } + + // Delete last rule (should remove group) + err = deleteRuleFromFile(testFile, "test_group", "alert1") + if err != nil { + t.Errorf("Failed to delete last rule: %v", err) + } + + // Verify group was deleted + loadedRule, err = loadRuleFile(testFile) + if err != nil { + t.Errorf("Failed to load rule file after deleting last rule: %v", err) + } + + if len(loadedRule.Groups) != 0 { + t.Errorf("Expected 0 groups after deleting last rule, got %d", len(loadedRule.Groups)) + } +} + +func TestGetRuleName(t *testing.T) { + // Test recording rule + recordingRule := Rule{Record: "test_metric", Expr: "up"} + name := getRuleName(recordingRule) + if name != "test_metric" { + t.Errorf("Expected 'test_metric', got '%s'", name) + } + + // Test alerting rule + alertingRule := Rule{Alert: "test_alert", Expr: "down"} + name = getRuleName(alertingRule) + if name != "test_alert" { + t.Errorf("Expected 'test_alert', got '%s'", name) + } +} + +func TestValidatePromQLExpression(t *testing.T) { + tests := []struct { + name string + expr string + expected bool + }{ + { + name: "Valid expression", + expr: "up{job=\"test\"}", + expected: true, + }, + { + name: "Empty expression", + expr: "", + expected: false, + }, + { + name: "Whitespace only", + expr: " ", + expected: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result, err := validatePromQLExpression(context.Background(), tt.expr) + if err != nil { + t.Errorf("Unexpected error: %v", err) + return + } + if result != tt.expected { + t.Errorf("Expected %v, got %v", tt.expected, result) + } + }) + } +} + +func TestListRuleFiles(t *testing.T) { + // Create a temporary directory for testing + tempDir, err := ioutil.TempDir("", "rule_test") + if err != nil { + t.Fatalf("Failed to create temp dir: %v", err) + } + defer os.RemoveAll(tempDir) + + // Create test files + testFiles := []string{"rules1.yml", "rules2.yaml", "config.txt"} + for _, filename := range testFiles { + err := ioutil.WriteFile(filepath.Join(tempDir, filename), []byte("test"), 0644) + if err != nil { + t.Fatalf("Failed to create test file %s: %v", filename, err) + } + } + + // Test listing .yml files + files, err := listRuleFiles(tempDir, "*.yml") + if err != nil { + t.Errorf("Failed to list rule files: %v", err) + } + + if len(files) != 1 { + t.Errorf("Expected 1 .yml file, got %d", len(files)) + } + + if files[0] != "rules1.yml" { + t.Errorf("Expected 'rules1.yml', got '%s'", files[0]) + } + + // Test listing all yaml files + files, err = listRuleFiles(tempDir, "*.y*") + if err != nil { + t.Errorf("Failed to list rule files: %v", err) + } + + if len(files) != 2 { + t.Errorf("Expected 2 yaml files, got %d", len(files)) + } +} + +func TestGetRuleFileContent(t *testing.T) { + // Create a temporary directory for testing + tempDir, err := ioutil.TempDir("", "rule_test") + if err != nil { + t.Fatalf("Failed to create temp dir: %v", err) + } + defer os.RemoveAll(tempDir) + + testFile := filepath.Join(tempDir, "test_rules.yml") + testContent := `groups: + - name: test_group + rules: + - record: test_metric + expr: up{job="test"} +` + + err = ioutil.WriteFile(testFile, []byte(testContent), 0644) + if err != nil { + t.Fatalf("Failed to create test file: %v", err) + } + + // Test reading file content + content, err := getRuleFileContent(testFile) + if err != nil { + t.Errorf("Failed to read rule file content: %v", err) + } + + if content != testContent { + t.Errorf("Expected content '%s', got '%s'", testContent, content) + } + + // Test reading non-existent file + _, err = getRuleFileContent(filepath.Join(tempDir, "nonexistent.yml")) + if err == nil { + t.Errorf("Expected error when reading non-existent file, got nil") + } +} diff --git a/pkg/mcp/server.go b/pkg/mcp/server.go index c4b31b8..17b2309 100644 --- a/pkg/mcp/server.go +++ b/pkg/mcp/server.go @@ -149,6 +149,16 @@ func NewServer(logger *slog.Logger, enableTsdbAdminTools bool) *server.MCPServer mcpServer.AddTool(tsdbStatsTool, tsdbStatsToolHandler) mcpServer.AddTool(walReplayTool, walReplayToolHandler) + // add rule management tools + mcpServer.AddTool(createRecordingRuleTool, createRecordingRuleToolHandler) + mcpServer.AddTool(createAlertingRuleTool, createAlertingRuleToolHandler) + mcpServer.AddTool(updateRuleTool, updateRuleToolHandler) + mcpServer.AddTool(deleteRuleTool, deleteRuleToolHandler) + mcpServer.AddTool(validateRuleTool, validateRuleToolHandler) + mcpServer.AddTool(listRuleFilesTool, listRuleFilesToolHandler) + mcpServer.AddTool(getRuleFileContentTool, getRuleFileContentToolHandler) + mcpServer.AddTool(reloadConfigTool, reloadConfigToolHandler) + // if enabled at cli by flag, allow using the TSDB admin APIs if enableTsdbAdminTools { logger.Warn(