From 8b870295a36334ca9734c7dc74194d636ce1d364 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Mon, 3 Nov 2025 06:29:58 -0700 Subject: [PATCH 01/20] 4914:adding managed by to common and making threshold use retry logic --- internal/resources/asserts/common_lister.go | 14 + .../resources/asserts/resource_prom_rules.go | 462 ++++++++++++++++ .../asserts/resource_prom_rules_test.go | 507 ++++++++++++++++++ internal/resources/asserts/resources.go | 1 + 4 files changed, 984 insertions(+) create mode 100644 internal/resources/asserts/resource_prom_rules.go create mode 100644 internal/resources/asserts/resource_prom_rules_test.go diff --git a/internal/resources/asserts/common_lister.go b/internal/resources/asserts/common_lister.go index de681d768..61b626d78 100644 --- a/internal/resources/asserts/common_lister.go +++ b/internal/resources/asserts/common_lister.go @@ -98,3 +98,17 @@ func listLogConfigs(ctx context.Context, client *assertsapi.APIClient, stackID s } return names, nil } + +// listPromRules retrieves the list of all Prometheus rules file names for a specific stack +func listPromRules(ctx context.Context, client *assertsapi.APIClient, stackID string) ([]string, error) { + request := client.PromRulesConfigurationAPI.ListPromRules(ctx). + XScopeOrgID(stackID) + + namesDto, _, err := request.Execute() + if err != nil { + return nil, err + } + + // The DTO contains an array of rule file names + return namesDto.RuleNames, nil +} diff --git a/internal/resources/asserts/resource_prom_rules.go b/internal/resources/asserts/resource_prom_rules.go new file mode 100644 index 000000000..05d3b31a0 --- /dev/null +++ b/internal/resources/asserts/resource_prom_rules.go @@ -0,0 +1,462 @@ +package asserts + +import ( + "context" + "fmt" + + "github.com/hashicorp/terraform-plugin-sdk/v2/diag" + "github.com/hashicorp/terraform-plugin-sdk/v2/helper/retry" + "github.com/hashicorp/terraform-plugin-sdk/v2/helper/schema" + + assertsapi "github.com/grafana/grafana-asserts-public-clients/go/gcom" + "github.com/grafana/terraform-provider-grafana/v4/internal/common" +) + +func makeResourcePromRules() *common.Resource { + schema := &schema.Resource{ + Description: "Manages Prometheus Rules configurations through Grafana Asserts API. " + + "Allows creation and management of custom Prometheus recording and alerting rules.", + + CreateContext: resourcePromRulesCreate, + ReadContext: resourcePromRulesRead, + UpdateContext: resourcePromRulesUpdate, + DeleteContext: resourcePromRulesDelete, + + Importer: &schema.ResourceImporter{ + StateContext: schema.ImportStatePassthroughContext, + }, + + Schema: map[string]*schema.Schema{ + "name": { + Type: schema.TypeString, + Required: true, + ForceNew: true, // Force recreation if name changes + Description: "The name of the Prometheus rules file. This will be stored with a .custom extension. " + + "Must follow naming validation rules (alphanumeric, hyphens, underscores).", + }, + "active": { + Type: schema.TypeBool, + Optional: true, + Default: true, + Description: "Whether the rules file is active. Inactive rules are not evaluated.", + }, + "group": { + Type: schema.TypeList, + Required: true, + Description: "List of Prometheus rule groups. Each group contains one or more rules " + + "and can have its own evaluation interval.", + Elem: &schema.Resource{ + Schema: map[string]*schema.Schema{ + "name": { + Type: schema.TypeString, + Required: true, + Description: "The name of the rule group (e.g., 'latency_monitoring').", + }, + "interval": { + Type: schema.TypeString, + Optional: true, + Description: "Evaluation interval for this group (e.g., '30s', '1m'). " + + "If not specified, uses the global evaluation interval.", + }, + "rule": { + Type: schema.TypeList, + Required: true, + Description: "List of Prometheus rules in this group.", + Elem: &schema.Resource{ + Schema: map[string]*schema.Schema{ + "record": { + Type: schema.TypeString, + Optional: true, + Description: "The name of the time series to output for recording rules. " + + "Either 'record' or 'alert' must be specified, but not both.", + }, + "alert": { + Type: schema.TypeString, + Optional: true, + Description: "The name of the alert for alerting rules. " + + "Either 'record' or 'alert' must be specified, but not both.", + }, + "expr": { + Type: schema.TypeString, + Required: true, + Description: "The PromQL expression to evaluate.", + }, + "duration": { + Type: schema.TypeString, + Optional: true, + Description: "How long the condition must be true before firing the alert " + + "(e.g., '5m'). Only applicable for alerting rules. Maps to 'for' in Prometheus.", + }, + "active": { + Type: schema.TypeBool, + Optional: true, + Default: true, + Description: "Whether this specific rule is active.", + }, + "labels": { + Type: schema.TypeMap, + Optional: true, + Description: "Labels to attach to the resulting time series or alert.", + Elem: &schema.Schema{Type: schema.TypeString}, + }, + "annotations": { + Type: schema.TypeMap, + Optional: true, + Description: "Annotations to add to alerts (e.g., summary, description).", + Elem: &schema.Schema{Type: schema.TypeString}, + }, + "disable_in_groups": { + Type: schema.TypeSet, + Optional: true, + Description: "List of group names where this rule should be disabled. " + + "Useful for conditional rule enablement.", + Elem: &schema.Schema{Type: schema.TypeString}, + }, + }, + }, + }, + }, + }, + }, + }, + } + + return common.NewLegacySDKResource( + common.CategoryAsserts, + "grafana_asserts_prom_rule_file", + common.NewResourceID(common.StringIDField("name")), + schema, + ).WithLister(assertsListerFunction(listPromRules)) +} + +func resourcePromRulesCreate(ctx context.Context, d *schema.ResourceData, meta interface{}) diag.Diagnostics { + client, stackID, diags := validateAssertsClient(meta) + if diags.HasError() { + return diags + } + + name := d.Get("name").(string) + active := d.Get("active").(bool) + + // Build the PrometheusRulesDto + rulesDto := assertsapi.PrometheusRulesDto{ + Name: &name, + Active: &active, + } + + // Build groups + groups, err := buildRuleGroups(d.Get("group").([]interface{})) + if err != nil { + return diag.FromErr(err) + } + rulesDto.Groups = groups + + // Call the API to create/update the rules file + // Note: PUT is idempotent, so create and update use the same operation + request := client.PromRulesConfigurationAPI.PutPromRules(ctx). + PrometheusRulesDto(rulesDto). + XScopeOrgID(fmt.Sprintf("%d", stackID)) + + _, err = request.Execute() + if err != nil { + return diag.FromErr(fmt.Errorf("failed to create Prometheus rules file: %w", err)) + } + + d.SetId(name) + + return resourcePromRulesRead(ctx, d, meta) +} + +func resourcePromRulesRead(ctx context.Context, d *schema.ResourceData, meta interface{}) diag.Diagnostics { + client, stackID, diags := validateAssertsClient(meta) + if diags.HasError() { + return diags + } + + name := d.Id() + + // Retry logic for read operation to handle eventual consistency + var foundRules *assertsapi.PrometheusRulesDto + err := withRetryRead(ctx, func(retryCount, maxRetries int) *retry.RetryError { + // Get specific rules file + request := client.PromRulesConfigurationAPI.GetPromRules(ctx, name). + XScopeOrgID(fmt.Sprintf("%d", stackID)) + + rules, resp, err := request.Execute() + if err != nil { + // If 404, the resource doesn't exist + if resp != nil && resp.StatusCode == 404 { + // Check if we should give up or retry + if retryCount >= maxRetries { + return createNonRetryableError("Prometheus rules file", name, retryCount) + } + return createRetryableError("Prometheus rules file", name, retryCount, maxRetries) + } + return createAPIError("get Prometheus rules file", retryCount, maxRetries, err) + } + + foundRules = rules + return nil + }) + + if err != nil { + // If not found after retries, remove from state + if foundRules == nil { + d.SetId("") + return nil + } + return diag.FromErr(err) + } + + // Set the resource data + if foundRules.Name != nil { + if err := d.Set("name", *foundRules.Name); err != nil { + return diag.FromErr(err) + } + } + + if foundRules.Active != nil { + if err := d.Set("active", *foundRules.Active); err != nil { + return diag.FromErr(err) + } + } + + // Flatten groups back into Terraform state + if len(foundRules.Groups) > 0 { + groups, err := flattenRuleGroups(foundRules.Groups) + if err != nil { + return diag.FromErr(err) + } + if err := d.Set("group", groups); err != nil { + return diag.FromErr(err) + } + } + + return nil +} + +func resourcePromRulesUpdate(ctx context.Context, d *schema.ResourceData, meta interface{}) diag.Diagnostics { + client, stackID, diags := validateAssertsClient(meta) + if diags.HasError() { + return diags + } + + name := d.Get("name").(string) + active := d.Get("active").(bool) + + // Build the PrometheusRulesDto + rulesDto := assertsapi.PrometheusRulesDto{ + Name: &name, + Active: &active, + } + + // Build groups + groups, err := buildRuleGroups(d.Get("group").([]interface{})) + if err != nil { + return diag.FromErr(err) + } + rulesDto.Groups = groups + + // Update using PUT (idempotent) + request := client.PromRulesConfigurationAPI.PutPromRules(ctx). + PrometheusRulesDto(rulesDto). + XScopeOrgID(fmt.Sprintf("%d", stackID)) + + _, err = request.Execute() + if err != nil { + return diag.FromErr(fmt.Errorf("failed to update Prometheus rules file: %w", err)) + } + + return resourcePromRulesRead(ctx, d, meta) +} + +func resourcePromRulesDelete(ctx context.Context, d *schema.ResourceData, meta interface{}) diag.Diagnostics { + client, stackID, diags := validateAssertsClient(meta) + if diags.HasError() { + return diags + } + + name := d.Id() + + // Delete the rules file + request := client.PromRulesConfigurationAPI.DeletePromRules(ctx, name). + XScopeOrgID(fmt.Sprintf("%d", stackID)) + + _, err := request.Execute() + if err != nil { + // Ignore 404 errors - resource already deleted + if !common.IsNotFoundError(err) { + return diag.FromErr(fmt.Errorf("failed to delete Prometheus rules file: %w", err)) + } + } + + return nil +} + +// buildRuleGroups converts Terraform schema data into PrometheusRuleGroupDto slice +func buildRuleGroups(groupsData []interface{}) ([]assertsapi.PrometheusRuleGroupDto, error) { + if len(groupsData) == 0 { + return nil, fmt.Errorf("at least one rule group is required") + } + + groups := make([]assertsapi.PrometheusRuleGroupDto, 0, len(groupsData)) + + for _, groupItem := range groupsData { + groupMap := groupItem.(map[string]interface{}) + + groupName := groupMap["name"].(string) + group := assertsapi.PrometheusRuleGroupDto{ + Name: &groupName, + } + + // Optional interval + if interval, ok := groupMap["interval"].(string); ok && interval != "" { + group.Interval = &interval + } + + // Build rules + rulesData := groupMap["rule"].([]interface{}) + if len(rulesData) == 0 { + return nil, fmt.Errorf("group '%s' must have at least one rule", groupName) + } + + rules := make([]assertsapi.PrometheusRuleDto, 0, len(rulesData)) + for _, ruleItem := range rulesData { + ruleMap := ruleItem.(map[string]interface{}) + + // Must have either record or alert (but not both) + record, hasRecord := ruleMap["record"].(string) + alert, hasAlert := ruleMap["alert"].(string) + + if (hasRecord && record != "") && (hasAlert && alert != "") { + return nil, fmt.Errorf("rule in group '%s' cannot have both 'record' and 'alert' specified", groupName) + } + if (!hasRecord || record == "") && (!hasAlert || alert == "") { + return nil, fmt.Errorf("rule in group '%s' must have either 'record' or 'alert' specified", groupName) + } + + expr := ruleMap["expr"].(string) + if expr == "" { + return nil, fmt.Errorf("rule in group '%s' must have 'expr' specified", groupName) + } + + rule := assertsapi.PrometheusRuleDto{ + Expr: &expr, + } + + if hasRecord && record != "" { + rule.Record = &record + } + + if hasAlert && alert != "" { + rule.Alert = &alert + } + + // Optional fields + if duration, ok := ruleMap["duration"].(string); ok && duration != "" { + rule.For = &duration + } + + if active, ok := ruleMap["active"].(bool); ok { + rule.Active = &active + } + + // Labels + if labelsData, ok := ruleMap["labels"].(map[string]interface{}); ok && len(labelsData) > 0 { + labels := make(map[string]string) + for k, v := range labelsData { + labels[k] = v.(string) + } + rule.Labels = labels + } + + // Annotations + if annotationsData, ok := ruleMap["annotations"].(map[string]interface{}); ok && len(annotationsData) > 0 { + annotations := make(map[string]string) + for k, v := range annotationsData { + annotations[k] = v.(string) + } + rule.Annotations = annotations + } + + // Disable in groups + if disableInGroupsData, ok := ruleMap["disable_in_groups"].(*schema.Set); ok && disableInGroupsData.Len() > 0 { + disableInGroups := make([]string, 0, disableInGroupsData.Len()) + for _, item := range disableInGroupsData.List() { + disableInGroups = append(disableInGroups, item.(string)) + } + rule.DisableInGroups = disableInGroups + } + + rules = append(rules, rule) + } + + group.Rules = rules + groups = append(groups, group) + } + + return groups, nil +} + +// flattenRuleGroups converts PrometheusRuleGroupDto slice into Terraform schema data +func flattenRuleGroups(groups []assertsapi.PrometheusRuleGroupDto) ([]interface{}, error) { + result := make([]interface{}, 0, len(groups)) + + for _, group := range groups { + groupMap := make(map[string]interface{}) + + if group.Name != nil { + groupMap["name"] = *group.Name + } + + if group.Interval != nil { + groupMap["interval"] = *group.Interval + } + + // Flatten rules + rules := make([]interface{}, 0, len(group.Rules)) + for _, rule := range group.Rules { + ruleMap := make(map[string]interface{}) + + if rule.Record != nil { + ruleMap["record"] = *rule.Record + } + + if rule.Alert != nil { + ruleMap["alert"] = *rule.Alert + } + + if rule.Expr != nil { + ruleMap["expr"] = *rule.Expr + } + + if rule.For != nil { + ruleMap["duration"] = *rule.For + } + + if rule.Active != nil { + ruleMap["active"] = *rule.Active + } + + if rule.Labels != nil && len(rule.Labels) > 0 { + ruleMap["labels"] = rule.Labels + } + + if rule.Annotations != nil && len(rule.Annotations) > 0 { + ruleMap["annotations"] = rule.Annotations + } + + if rule.DisableInGroups != nil && len(rule.DisableInGroups) > 0 { + ruleMap["disable_in_groups"] = rule.DisableInGroups + } + + rules = append(rules, ruleMap) + } + + groupMap["rule"] = rules + result = append(result, groupMap) + } + + return result, nil +} + diff --git a/internal/resources/asserts/resource_prom_rules_test.go b/internal/resources/asserts/resource_prom_rules_test.go new file mode 100644 index 000000000..51b94e7c2 --- /dev/null +++ b/internal/resources/asserts/resource_prom_rules_test.go @@ -0,0 +1,507 @@ +package asserts_test + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/grafana/terraform-provider-grafana/v4/internal/common" + "github.com/grafana/terraform-provider-grafana/v4/internal/testutils" + "github.com/hashicorp/terraform-plugin-sdk/v2/helper/acctest" + "github.com/hashicorp/terraform-plugin-sdk/v2/helper/resource" + "github.com/hashicorp/terraform-plugin-sdk/v2/terraform" +) + +func TestAccAssertsPromRules_basic(t *testing.T) { + testutils.CheckCloudInstanceTestsEnabled(t) + + stackID := getTestStackID(t) + rName := fmt.Sprintf("test-acc-%s", acctest.RandString(8)) + + resource.ParallelTest(t, resource.TestCase{ + ProtoV5ProviderFactories: testutils.ProtoV5ProviderFactories, + CheckDestroy: testAccAssertsPromRulesCheckDestroy, + Steps: []resource.TestStep{ + { + Config: testAccAssertsPromRulesConfig(stackID, rName), + Check: resource.ComposeTestCheckFunc( + testAccAssertsPromRulesCheckExists("grafana_asserts_prom_rule_file.test", stackID, rName), + resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "name", rName), + resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "active", "true"), + resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "group.#", "1"), + resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "group.0.name", "latency_monitoring"), + resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "group.0.interval", "30s"), + resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "group.0.rule.#", "2"), + resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "group.0.rule.0.record", "custom:latency:p99"), + resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "group.0.rule.1.alert", "HighLatency"), + testutils.CheckLister("grafana_asserts_prom_rule_file.test"), + ), + }, + { + // Test import + ResourceName: "grafana_asserts_prom_rule_file.test", + ImportState: true, + ImportStateVerify: true, + }, + { + // Test update + Config: testAccAssertsPromRulesConfigUpdated(stackID, rName), + Check: resource.ComposeTestCheckFunc( + testAccAssertsPromRulesCheckExists("grafana_asserts_prom_rule_file.test", stackID, rName), + resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "name", rName), + resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "active", "true"), + resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "group.#", "2"), + resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "group.0.name", "latency_monitoring"), + resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "group.1.name", "error_monitoring"), + ), + }, + }, + }) +} + +func TestAccAssertsPromRules_recordingRule(t *testing.T) { + testutils.CheckCloudInstanceTestsEnabled(t) + + stackID := getTestStackID(t) + rName := fmt.Sprintf("test-recording-%s", acctest.RandString(8)) + + resource.ParallelTest(t, resource.TestCase{ + ProtoV5ProviderFactories: testutils.ProtoV5ProviderFactories, + CheckDestroy: testAccAssertsPromRulesCheckDestroy, + Steps: []resource.TestStep{ + { + Config: testAccAssertsPromRulesRecordingConfig(stackID, rName), + Check: resource.ComposeTestCheckFunc( + testAccAssertsPromRulesCheckExists("grafana_asserts_prom_rule_file.test", stackID, rName), + resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "name", rName), + resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "group.0.rule.0.record", "custom:requests:rate"), + resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "group.0.rule.0.labels.source", "custom"), + resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "group.0.rule.0.labels.severity", "info"), + ), + }, + }, + }) +} + +func TestAccAssertsPromRules_alertingRule(t *testing.T) { + testutils.CheckCloudInstanceTestsEnabled(t) + + stackID := getTestStackID(t) + rName := fmt.Sprintf("test-alerting-%s", acctest.RandString(8)) + + resource.ParallelTest(t, resource.TestCase{ + ProtoV5ProviderFactories: testutils.ProtoV5ProviderFactories, + CheckDestroy: testAccAssertsPromRulesCheckDestroy, + Steps: []resource.TestStep{ + { + Config: testAccAssertsPromRulesAlertingConfig(stackID, rName), + Check: resource.ComposeTestCheckFunc( + testAccAssertsPromRulesCheckExists("grafana_asserts_prom_rule_file.test", stackID, rName), + resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "name", rName), + resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "group.0.rule.0.alert", "HighLatency"), + resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "group.0.rule.0.duration", "5m"), + resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "group.0.rule.0.labels.severity", "warning"), + resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "group.0.rule.0.annotations.summary", "High latency detected"), + ), + }, + }, + }) +} + +func TestAccAssertsPromRules_multipleGroups(t *testing.T) { + testutils.CheckCloudInstanceTestsEnabled(t) + + stackID := getTestStackID(t) + rName := fmt.Sprintf("test-multi-%s", acctest.RandString(8)) + + resource.ParallelTest(t, resource.TestCase{ + ProtoV5ProviderFactories: testutils.ProtoV5ProviderFactories, + CheckDestroy: testAccAssertsPromRulesCheckDestroy, + Steps: []resource.TestStep{ + { + Config: testAccAssertsPromRulesMultiGroupConfig(stackID, rName), + Check: resource.ComposeTestCheckFunc( + testAccAssertsPromRulesCheckExists("grafana_asserts_prom_rule_file.test", stackID, rName), + resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "group.#", "3"), + resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "group.0.name", "latency_rules"), + resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "group.1.name", "error_rules"), + resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "group.2.name", "throughput_rules"), + ), + }, + }, + }) +} + +func TestAccAssertsPromRules_inactive(t *testing.T) { + testutils.CheckCloudInstanceTestsEnabled(t) + + stackID := getTestStackID(t) + rName := fmt.Sprintf("test-inactive-%s", acctest.RandString(8)) + + resource.ParallelTest(t, resource.TestCase{ + ProtoV5ProviderFactories: testutils.ProtoV5ProviderFactories, + CheckDestroy: testAccAssertsPromRulesCheckDestroy, + Steps: []resource.TestStep{ + { + Config: testAccAssertsPromRulesInactiveConfig(stackID, rName), + Check: resource.ComposeTestCheckFunc( + testAccAssertsPromRulesCheckExists("grafana_asserts_prom_rule_file.test", stackID, rName), + resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "active", "false"), + ), + }, + }, + }) +} + +func TestAccAssertsPromRules_eventualConsistencyStress(t *testing.T) { + testutils.CheckCloudInstanceTestsEnabled(t) + testutils.CheckStressTestsEnabled(t) + + stackID := getTestStackID(t) + baseName := fmt.Sprintf("stress-test-%s", acctest.RandString(8)) + + resource.ParallelTest(t, resource.TestCase{ + ProtoV5ProviderFactories: testutils.ProtoV5ProviderFactories, + CheckDestroy: testAccAssertsPromRulesCheckDestroy, + Steps: []resource.TestStep{ + { + Config: testAccAssertsPromRulesStressConfig(stackID, baseName), + Check: resource.ComposeTestCheckFunc( + // Check that all resources were created successfully + testAccAssertsPromRulesCheckExists("grafana_asserts_prom_rule_file.test1", stackID, baseName+"-1"), + testAccAssertsPromRulesCheckExists("grafana_asserts_prom_rule_file.test2", stackID, baseName+"-2"), + testAccAssertsPromRulesCheckExists("grafana_asserts_prom_rule_file.test3", stackID, baseName+"-3"), + resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test1", "name", baseName+"-1"), + resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test2", "name", baseName+"-2"), + resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test3", "name", baseName+"-3"), + ), + }, + }, + }) +} + +func testAccAssertsPromRulesCheckExists(rn string, stackID int64, name string) resource.TestCheckFunc { + return func(s *terraform.State) error { + rs, ok := s.RootModule().Resources[rn] + if !ok { + return fmt.Errorf("resource not found: %s\n %#v", rn, s.RootModule().Resources) + } + + if rs.Primary.ID == "" { + return fmt.Errorf("resource id not set") + } + + client := testutils.Provider.Meta().(*common.Client).AssertsAPIClient + ctx := context.Background() + + // Get specific rules file + request := client.PromRulesConfigurationAPI.GetPromRules(ctx, name). + XScopeOrgID(fmt.Sprintf("%d", stackID)) + + _, resp, err := request.Execute() + if err != nil { + if resp != nil && resp.StatusCode == 404 { + return fmt.Errorf("Prometheus rules file %s not found", name) + } + return fmt.Errorf("error getting Prometheus rules file: %s", err) + } + + return nil + } +} + +func testAccAssertsPromRulesCheckDestroy(s *terraform.State) error { + client := testutils.Provider.Meta().(*common.Client).AssertsAPIClient + ctx := context.Background() + + deadline := time.Now().Add(60 * time.Second) + for _, rs := range s.RootModule().Resources { + if rs.Type != "grafana_asserts_prom_rule_file" { + continue + } + + // Resource ID is just the name now + name := rs.Primary.ID + stackID := fmt.Sprintf("%d", testutils.Provider.Meta().(*common.Client).GrafanaStackID) + + for { + // Try to get the rules file + request := client.PromRulesConfigurationAPI.GetPromRules(ctx, name). + XScopeOrgID(stackID) + + _, resp, err := request.Execute() + if err != nil { + // If 404, resource is deleted - that's what we want + if resp != nil && resp.StatusCode == 404 { + break + } + // If we can't get it for other reasons, assume it's deleted + if common.IsNotFoundError(err) { + break + } + return fmt.Errorf("error checking Prometheus rules file destruction: %s", err) + } + + // Resource still exists + if time.Now().After(deadline) { + return fmt.Errorf("Prometheus rules file %s still exists", name) + } + time.Sleep(2 * time.Second) + } + } + + return nil +} + +func testAccAssertsPromRulesConfig(stackID int64, name string) string { + return fmt.Sprintf(` +resource "grafana_asserts_prom_rule_file" "test" { + name = "%s" + active = true + + group { + name = "latency_monitoring" + interval = "30s" + + rule { + record = "custom:latency:p99" + expr = "histogram_quantile(0.99, rate(http_request_duration_seconds_bucket[5m]))" + labels = { + source = "custom_instrumentation" + severity = "info" + } + } + + rule { + alert = "HighLatency" + expr = "custom:latency:p99 > 0.5" + duration = "5m" + labels = { + severity = "warning" + category = "Latency" + } + annotations = { + summary = "High latency detected" + description = "P99 latency is above 500ms" + } + } + } +} +`, name) +} + +func testAccAssertsPromRulesConfigUpdated(stackID int64, name string) string { + return fmt.Sprintf(` +resource "grafana_asserts_prom_rule_file" "test" { + name = "%s" + active = true + + group { + name = "latency_monitoring" + interval = "1m" + + rule { + record = "custom:latency:p99" + expr = "histogram_quantile(0.99, rate(http_request_duration_seconds_bucket[10m]))" + labels = { + source = "custom_instrumentation_v2" + severity = "info" + } + } + + rule { + alert = "HighLatency" + expr = "custom:latency:p99 > 0.8" + duration = "10m" + labels = { + severity = "critical" + category = "Latency" + } + annotations = { + summary = "Very high latency detected" + description = "P99 latency is above 800ms" + } + } + } + + group { + name = "error_monitoring" + interval = "30s" + + rule { + alert = "HighErrorRate" + expr = "rate(http_requests_total{status=~\"5..\"}[5m]) > 0.1" + duration = "5m" + labels = { + severity = "critical" + category = "Errors" + } + annotations = { + summary = "High error rate detected" + description = "Error rate is above 10%%" + } + } + } +} +`, name) +} + +func testAccAssertsPromRulesRecordingConfig(stackID int64, name string) string { + return fmt.Sprintf(` +resource "grafana_asserts_prom_rule_file" "test" { + name = "%s" + active = true + + group { + name = "recording_rules" + interval = "1m" + + rule { + record = "custom:requests:rate" + expr = "rate(http_requests_total[5m])" + labels = { + source = "custom" + severity = "info" + } + } + } +} +`, name) +} + +func testAccAssertsPromRulesAlertingConfig(stackID int64, name string) string { + return fmt.Sprintf(` +resource "grafana_asserts_prom_rule_file" "test" { + name = "%s" + active = true + + group { + name = "alerting_rules" + interval = "30s" + + rule { + alert = "HighLatency" + expr = "histogram_quantile(0.99, rate(http_request_duration_seconds_bucket[5m])) > 0.5" + duration = "5m" + labels = { + severity = "warning" + category = "Performance" + } + annotations = { + summary = "High latency detected" + description = "P99 latency is consistently above 500ms for 5 minutes" + } + } + } +} +`, name) +} + +func testAccAssertsPromRulesMultiGroupConfig(stackID int64, name string) string { + return fmt.Sprintf(` +resource "grafana_asserts_prom_rule_file" "test" { + name = "%s" + active = true + + group { + name = "latency_rules" + interval = "30s" + + rule { + record = "custom:latency:p95" + expr = "histogram_quantile(0.95, rate(http_request_duration_seconds_bucket[5m]))" + } + } + + group { + name = "error_rules" + interval = "1m" + + rule { + alert = "HighErrorRate" + expr = "rate(http_requests_total{status=~\"5..\"}[5m]) > 0.05" + duration = "5m" + labels = { + severity = "warning" + } + } + } + + group { + name = "throughput_rules" + interval = "2m" + + rule { + record = "custom:throughput:total" + expr = "sum(rate(http_requests_total[5m]))" + } + } +} +`, name) +} + +func testAccAssertsPromRulesInactiveConfig(stackID int64, name string) string { + return fmt.Sprintf(` +resource "grafana_asserts_prom_rule_file" "test" { + name = "%s" + active = false + + group { + name = "inactive_rules" + + rule { + record = "custom:test:metric" + expr = "up" + } + } +} +`, name) +} + +func testAccAssertsPromRulesStressConfig(stackID int64, baseName string) string { + return fmt.Sprintf(` +resource "grafana_asserts_prom_rule_file" "test1" { + name = "%s-1" + active = true + + group { + name = "stress_test_group_1" + + rule { + record = "stress:test:metric1" + expr = "up" + } + } +} + +resource "grafana_asserts_prom_rule_file" "test2" { + name = "%s-2" + active = true + + group { + name = "stress_test_group_2" + + rule { + record = "stress:test:metric2" + expr = "up" + } + } +} + +resource "grafana_asserts_prom_rule_file" "test3" { + name = "%s-3" + active = true + + group { + name = "stress_test_group_3" + + rule { + record = "stress:test:metric3" + expr = "up" + } + } +} +`, baseName, baseName, baseName) +} + diff --git a/internal/resources/asserts/resources.go b/internal/resources/asserts/resources.go index 36da880be..a219614cd 100644 --- a/internal/resources/asserts/resources.go +++ b/internal/resources/asserts/resources.go @@ -11,6 +11,7 @@ var Resources = []*common.Resource{ makeResourceDisabledAlertConfig(), makeResourceCustomModelRules(), makeResourceLogConfig(), + makeResourcePromRules(), makeResourceThresholds(), } From a6fbbbabbf872d7cf427a5b368d81b98b0365a7c Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Mon, 3 Nov 2025 07:02:02 -0700 Subject: [PATCH 02/20] 4914:fixing test --- internal/resources/asserts/common_lister.go | 2 +- internal/resources/asserts/resource_prom_rules.go | 15 +++++++-------- .../resources/asserts/resource_prom_rules_test.go | 4 ++-- 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/internal/resources/asserts/common_lister.go b/internal/resources/asserts/common_lister.go index 61b626d78..ff5203093 100644 --- a/internal/resources/asserts/common_lister.go +++ b/internal/resources/asserts/common_lister.go @@ -101,7 +101,7 @@ func listLogConfigs(ctx context.Context, client *assertsapi.APIClient, stackID s // listPromRules retrieves the list of all Prometheus rules file names for a specific stack func listPromRules(ctx context.Context, client *assertsapi.APIClient, stackID string) ([]string, error) { - request := client.PromRulesConfigurationAPI.ListPromRules(ctx). + request := client.PromRulesConfigControllerAPI.ListPromRules(ctx). XScopeOrgID(stackID) namesDto, _, err := request.Execute() diff --git a/internal/resources/asserts/resource_prom_rules.go b/internal/resources/asserts/resource_prom_rules.go index 05d3b31a0..6e284ecb9 100644 --- a/internal/resources/asserts/resource_prom_rules.go +++ b/internal/resources/asserts/resource_prom_rules.go @@ -153,7 +153,7 @@ func resourcePromRulesCreate(ctx context.Context, d *schema.ResourceData, meta i // Call the API to create/update the rules file // Note: PUT is idempotent, so create and update use the same operation - request := client.PromRulesConfigurationAPI.PutPromRules(ctx). + request := client.PromRulesConfigControllerAPI.PutPromRules(ctx). PrometheusRulesDto(rulesDto). XScopeOrgID(fmt.Sprintf("%d", stackID)) @@ -179,7 +179,7 @@ func resourcePromRulesRead(ctx context.Context, d *schema.ResourceData, meta int var foundRules *assertsapi.PrometheusRulesDto err := withRetryRead(ctx, func(retryCount, maxRetries int) *retry.RetryError { // Get specific rules file - request := client.PromRulesConfigurationAPI.GetPromRules(ctx, name). + request := client.PromRulesConfigControllerAPI.GetPromRules(ctx, name). XScopeOrgID(fmt.Sprintf("%d", stackID)) rules, resp, err := request.Execute() @@ -258,7 +258,7 @@ func resourcePromRulesUpdate(ctx context.Context, d *schema.ResourceData, meta i rulesDto.Groups = groups // Update using PUT (idempotent) - request := client.PromRulesConfigurationAPI.PutPromRules(ctx). + request := client.PromRulesConfigControllerAPI.PutPromRules(ctx). PrometheusRulesDto(rulesDto). XScopeOrgID(fmt.Sprintf("%d", stackID)) @@ -279,7 +279,7 @@ func resourcePromRulesDelete(ctx context.Context, d *schema.ResourceData, meta i name := d.Id() // Delete the rules file - request := client.PromRulesConfigurationAPI.DeletePromRules(ctx, name). + request := client.PromRulesConfigControllerAPI.DeletePromRules(ctx, name). XScopeOrgID(fmt.Sprintf("%d", stackID)) _, err := request.Execute() @@ -438,15 +438,15 @@ func flattenRuleGroups(groups []assertsapi.PrometheusRuleGroupDto) ([]interface{ ruleMap["active"] = *rule.Active } - if rule.Labels != nil && len(rule.Labels) > 0 { + if len(rule.Labels) > 0 { ruleMap["labels"] = rule.Labels } - if rule.Annotations != nil && len(rule.Annotations) > 0 { + if len(rule.Annotations) > 0 { ruleMap["annotations"] = rule.Annotations } - if rule.DisableInGroups != nil && len(rule.DisableInGroups) > 0 { + if len(rule.DisableInGroups) > 0 { ruleMap["disable_in_groups"] = rule.DisableInGroups } @@ -459,4 +459,3 @@ func flattenRuleGroups(groups []assertsapi.PrometheusRuleGroupDto) ([]interface{ return result, nil } - diff --git a/internal/resources/asserts/resource_prom_rules_test.go b/internal/resources/asserts/resource_prom_rules_test.go index 51b94e7c2..00412302b 100644 --- a/internal/resources/asserts/resource_prom_rules_test.go +++ b/internal/resources/asserts/resource_prom_rules_test.go @@ -196,7 +196,7 @@ func testAccAssertsPromRulesCheckExists(rn string, stackID int64, name string) r ctx := context.Background() // Get specific rules file - request := client.PromRulesConfigurationAPI.GetPromRules(ctx, name). + request := client.PromRulesConfigControllerAPI.GetPromRules(ctx, name). XScopeOrgID(fmt.Sprintf("%d", stackID)) _, resp, err := request.Execute() @@ -227,7 +227,7 @@ func testAccAssertsPromRulesCheckDestroy(s *terraform.State) error { for { // Try to get the rules file - request := client.PromRulesConfigurationAPI.GetPromRules(ctx, name). + request := client.PromRulesConfigControllerAPI.GetPromRules(ctx, name). XScopeOrgID(stackID) _, resp, err := request.Execute() From 6709eaa75b4f47fb6f9d479b3f0df01110d62d7e Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Mon, 3 Nov 2025 07:07:58 -0700 Subject: [PATCH 03/20] 4914: adding example --- docs/resources/asserts_prom_rule_file.md | 314 ++++++++++++++++++ .../grafana_asserts_prom_rule_file/import.sh | 1 + .../resource.tf | 246 ++++++++++++++ 3 files changed, 561 insertions(+) create mode 100644 docs/resources/asserts_prom_rule_file.md create mode 100644 examples/resources/grafana_asserts_prom_rule_file/import.sh create mode 100644 examples/resources/grafana_asserts_prom_rule_file/resource.tf diff --git a/docs/resources/asserts_prom_rule_file.md b/docs/resources/asserts_prom_rule_file.md new file mode 100644 index 000000000..473279875 --- /dev/null +++ b/docs/resources/asserts_prom_rule_file.md @@ -0,0 +1,314 @@ +--- +# generated by https://github.com/hashicorp/terraform-plugin-docs +page_title: "grafana_asserts_prom_rule_file Resource - terraform-provider-grafana" +subcategory: "Knowledge Graph" +description: |- + Manages Prometheus Rules configurations through Grafana Asserts API. Allows creation and management of custom Prometheus recording and alerting rules. +--- + +# grafana_asserts_prom_rule_file (Resource) + +Manages Prometheus Rules configurations through Grafana Asserts API. Allows creation and management of custom Prometheus recording and alerting rules. + +## Example Usage + +```terraform +# Basic recording rule for latency metrics +resource "grafana_asserts_prom_rule_file" "latency_metrics" { + name = "custom-latency-metrics" + active = true + + group { + name = "latency_recording_rules" + interval = "30s" + + rule { + record = "custom:latency:p95" + expr = "histogram_quantile(0.95, rate(http_request_duration_seconds_bucket[5m]))" + labels = { + source = "custom_instrumentation" + severity = "info" + } + } + + rule { + record = "custom:latency:p99" + expr = "histogram_quantile(0.99, rate(http_request_duration_seconds_bucket[5m]))" + labels = { + source = "custom_instrumentation" + severity = "info" + } + } + } +} + +# Alert rules for high latency +resource "grafana_asserts_prom_rule_file" "latency_alerts" { + name = "custom-latency-alerts" + active = true + + group { + name = "latency_alerting" + interval = "30s" + + rule { + alert = "HighLatency" + expr = "histogram_quantile(0.99, rate(http_request_duration_seconds_bucket[5m])) > 0.5" + duration = "5m" + labels = { + severity = "warning" + category = "Latency" + } + annotations = { + summary = "High latency detected" + description = "P99 latency is above 500ms for 5 minutes" + } + } + + rule { + alert = "VeryHighLatency" + expr = "histogram_quantile(0.99, rate(http_request_duration_seconds_bucket[5m])) > 1.0" + duration = "2m" + labels = { + severity = "critical" + category = "Latency" + } + annotations = { + summary = "Very high latency detected" + description = "P99 latency is above 1 second" + } + } + } +} + +# Comprehensive monitoring rules with multiple groups +resource "grafana_asserts_prom_rule_file" "comprehensive_monitoring" { + name = "custom-comprehensive-monitoring" + active = true + + # Latency monitoring + group { + name = "latency_monitoring" + interval = "30s" + + rule { + record = "custom:latency:p99" + expr = "histogram_quantile(0.99, rate(http_request_duration_seconds_bucket[5m]))" + labels = { + source = "custom" + } + } + + rule { + alert = "HighLatency" + expr = "custom:latency:p99 > 0.5" + duration = "5m" + labels = { + severity = "warning" + } + annotations = { + summary = "High latency detected" + } + } + } + + # Error rate monitoring + group { + name = "error_monitoring" + interval = "1m" + + rule { + record = "custom:error:rate" + expr = "rate(http_requests_total{status=~\"5..\"}[5m])" + labels = { + source = "custom" + } + } + + rule { + alert = "HighErrorRate" + expr = "custom:error:rate > 0.1" + duration = "10m" + labels = { + severity = "critical" + category = "Errors" + } + annotations = { + summary = "High error rate detected" + description = "Error rate is above 10%" + } + } + } + + # Throughput monitoring + group { + name = "throughput_monitoring" + interval = "1m" + + rule { + record = "custom:throughput:total" + expr = "sum(rate(http_requests_total[5m]))" + labels = { + source = "custom" + } + } + + rule { + alert = "LowThroughput" + expr = "custom:throughput:total < 10" + duration = "5m" + labels = { + severity = "warning" + category = "Throughput" + } + annotations = { + summary = "Low throughput detected" + description = "Request throughput is below 10 requests/second" + } + } + } +} + +# Rules with conditional enablement +resource "grafana_asserts_prom_rule_file" "conditional_rules" { + name = "custom-conditional-rules" + active = true + + group { + name = "environment_specific_rules" + interval = "30s" + + rule { + alert = "TestAlert" + expr = "up == 0" + duration = "1m" + labels = { + severity = "info" + } + annotations = { + summary = "Test alert that is disabled in production" + } + # This rule will be disabled in the production group + disable_in_groups = ["production"] + } + + rule { + alert = "CriticalAlert" + expr = "up == 0" + duration = "30s" + labels = { + severity = "critical" + } + annotations = { + summary = "Critical alert that fires in all environments" + } + } + } +} + +# Inactive rules (for staging/testing) +resource "grafana_asserts_prom_rule_file" "staging_rules" { + name = "custom-staging-rules" + active = false # Rules file is inactive + + group { + name = "staging_tests" + interval = "1m" + + rule { + record = "staging:test:metric" + expr = "up" + labels = { + environment = "staging" + } + } + } +} + +# SLO-based alerting +resource "grafana_asserts_prom_rule_file" "slo_alerts" { + name = "custom-slo-alerts" + active = true + + group { + name = "slo_monitoring" + interval = "1m" + + rule { + record = "custom:slo:availability" + expr = "sum(rate(http_requests_total{status!~\"5..\"}[5m])) / sum(rate(http_requests_total[5m]))" + labels = { + slo_type = "availability" + } + } + + rule { + alert = "SLOAvailabilityBreach" + expr = "custom:slo:availability < 0.995" + duration = "5m" + labels = { + severity = "critical" + category = "SLO" + } + annotations = { + summary = "SLO availability breach" + description = "Availability is below 99.5% SLO target" + runbook_url = "https://docs.example.com/runbooks/availability-breach" + } + } + } +} +``` + + +## Schema + +### Required + +- `group` (Block List, Min: 1) List of Prometheus rule groups. Each group contains one or more rules and can have its own evaluation interval. (see [below for nested schema](#nestedblock--group)) +- `name` (String) The name of the Prometheus rules file. This will be stored with a .custom extension. Must follow naming validation rules (alphanumeric, hyphens, underscores). + +### Optional + +- `active` (Boolean) Whether the rules file is active. Inactive rules are not evaluated. Defaults to `true`. + +### Read-Only + +- `id` (String) The ID of this resource. + + +### Nested Schema for `group` + +Required: + +- `name` (String) The name of the rule group (e.g., 'latency_monitoring'). +- `rule` (Block List, Min: 1) List of Prometheus rules in this group. (see [below for nested schema](#nestedblock--group--rule)) + +Optional: + +- `interval` (String) Evaluation interval for this group (e.g., '30s', '1m'). If not specified, uses the global evaluation interval. + + +### Nested Schema for `group.rule` + +Required: + +- `expr` (String) The PromQL expression to evaluate. + +Optional: + +- `active` (Boolean) Whether this specific rule is active. Defaults to `true`. +- `alert` (String) The name of the alert for alerting rules. Either 'record' or 'alert' must be specified, but not both. +- `annotations` (Map of String) Annotations to add to alerts (e.g., summary, description). +- `disable_in_groups` (Set of String) List of group names where this rule should be disabled. Useful for conditional rule enablement. +- `duration` (String) How long the condition must be true before firing the alert (e.g., '5m'). Only applicable for alerting rules. Maps to 'for' in Prometheus. +- `labels` (Map of String) Labels to attach to the resulting time series or alert. +- `record` (String) The name of the time series to output for recording rules. Either 'record' or 'alert' must be specified, but not both. + +## Import + +Import is supported using the following syntax: + +```shell +terraform import grafana_asserts_prom_rule_file.name "{{ name }}" +``` diff --git a/examples/resources/grafana_asserts_prom_rule_file/import.sh b/examples/resources/grafana_asserts_prom_rule_file/import.sh new file mode 100644 index 000000000..64588c5df --- /dev/null +++ b/examples/resources/grafana_asserts_prom_rule_file/import.sh @@ -0,0 +1 @@ +terraform import grafana_asserts_prom_rule_file.name "{{ name }}" diff --git a/examples/resources/grafana_asserts_prom_rule_file/resource.tf b/examples/resources/grafana_asserts_prom_rule_file/resource.tf new file mode 100644 index 000000000..f848ef4da --- /dev/null +++ b/examples/resources/grafana_asserts_prom_rule_file/resource.tf @@ -0,0 +1,246 @@ +# Basic recording rule for latency metrics +resource "grafana_asserts_prom_rule_file" "latency_metrics" { + name = "custom-latency-metrics" + active = true + + group { + name = "latency_recording_rules" + interval = "30s" + + rule { + record = "custom:latency:p95" + expr = "histogram_quantile(0.95, rate(http_request_duration_seconds_bucket[5m]))" + labels = { + source = "custom_instrumentation" + severity = "info" + } + } + + rule { + record = "custom:latency:p99" + expr = "histogram_quantile(0.99, rate(http_request_duration_seconds_bucket[5m]))" + labels = { + source = "custom_instrumentation" + severity = "info" + } + } + } +} + +# Alert rules for high latency +resource "grafana_asserts_prom_rule_file" "latency_alerts" { + name = "custom-latency-alerts" + active = true + + group { + name = "latency_alerting" + interval = "30s" + + rule { + alert = "HighLatency" + expr = "histogram_quantile(0.99, rate(http_request_duration_seconds_bucket[5m])) > 0.5" + duration = "5m" + labels = { + severity = "warning" + category = "Latency" + } + annotations = { + summary = "High latency detected" + description = "P99 latency is above 500ms for 5 minutes" + } + } + + rule { + alert = "VeryHighLatency" + expr = "histogram_quantile(0.99, rate(http_request_duration_seconds_bucket[5m])) > 1.0" + duration = "2m" + labels = { + severity = "critical" + category = "Latency" + } + annotations = { + summary = "Very high latency detected" + description = "P99 latency is above 1 second" + } + } + } +} + +# Comprehensive monitoring rules with multiple groups +resource "grafana_asserts_prom_rule_file" "comprehensive_monitoring" { + name = "custom-comprehensive-monitoring" + active = true + + # Latency monitoring + group { + name = "latency_monitoring" + interval = "30s" + + rule { + record = "custom:latency:p99" + expr = "histogram_quantile(0.99, rate(http_request_duration_seconds_bucket[5m]))" + labels = { + source = "custom" + } + } + + rule { + alert = "HighLatency" + expr = "custom:latency:p99 > 0.5" + duration = "5m" + labels = { + severity = "warning" + } + annotations = { + summary = "High latency detected" + } + } + } + + # Error rate monitoring + group { + name = "error_monitoring" + interval = "1m" + + rule { + record = "custom:error:rate" + expr = "rate(http_requests_total{status=~\"5..\"}[5m])" + labels = { + source = "custom" + } + } + + rule { + alert = "HighErrorRate" + expr = "custom:error:rate > 0.1" + duration = "10m" + labels = { + severity = "critical" + category = "Errors" + } + annotations = { + summary = "High error rate detected" + description = "Error rate is above 10%" + } + } + } + + # Throughput monitoring + group { + name = "throughput_monitoring" + interval = "1m" + + rule { + record = "custom:throughput:total" + expr = "sum(rate(http_requests_total[5m]))" + labels = { + source = "custom" + } + } + + rule { + alert = "LowThroughput" + expr = "custom:throughput:total < 10" + duration = "5m" + labels = { + severity = "warning" + category = "Throughput" + } + annotations = { + summary = "Low throughput detected" + description = "Request throughput is below 10 requests/second" + } + } + } +} + +# Rules with conditional enablement +resource "grafana_asserts_prom_rule_file" "conditional_rules" { + name = "custom-conditional-rules" + active = true + + group { + name = "environment_specific_rules" + interval = "30s" + + rule { + alert = "TestAlert" + expr = "up == 0" + duration = "1m" + labels = { + severity = "info" + } + annotations = { + summary = "Test alert that is disabled in production" + } + # This rule will be disabled in the production group + disable_in_groups = ["production"] + } + + rule { + alert = "CriticalAlert" + expr = "up == 0" + duration = "30s" + labels = { + severity = "critical" + } + annotations = { + summary = "Critical alert that fires in all environments" + } + } + } +} + +# Inactive rules (for staging/testing) +resource "grafana_asserts_prom_rule_file" "staging_rules" { + name = "custom-staging-rules" + active = false # Rules file is inactive + + group { + name = "staging_tests" + interval = "1m" + + rule { + record = "staging:test:metric" + expr = "up" + labels = { + environment = "staging" + } + } + } +} + +# SLO-based alerting +resource "grafana_asserts_prom_rule_file" "slo_alerts" { + name = "custom-slo-alerts" + active = true + + group { + name = "slo_monitoring" + interval = "1m" + + rule { + record = "custom:slo:availability" + expr = "sum(rate(http_requests_total{status!~\"5..\"}[5m])) / sum(rate(http_requests_total[5m]))" + labels = { + slo_type = "availability" + } + } + + rule { + alert = "SLOAvailabilityBreach" + expr = "custom:slo:availability < 0.995" + duration = "5m" + labels = { + severity = "critical" + category = "SLO" + } + annotations = { + summary = "SLO availability breach" + description = "Availability is below 99.5% SLO target" + runbook_url = "https://docs.example.com/runbooks/availability-breach" + } + } + } +} + From 028244f6c13ec39bed585a21e41d8b73760f7119 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Mon, 3 Nov 2025 07:11:20 -0700 Subject: [PATCH 04/20] 4914: adding to catalog --- internal/resources/asserts/catalog-resource.yaml | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/internal/resources/asserts/catalog-resource.yaml b/internal/resources/asserts/catalog-resource.yaml index 60399e98f..2a1be9b3e 100644 --- a/internal/resources/asserts/catalog-resource.yaml +++ b/internal/resources/asserts/catalog-resource.yaml @@ -53,6 +53,19 @@ spec: --- apiVersion: backstage.io/v1alpha1 kind: Component +metadata: + name: resource-grafana_asserts_prom_rule_file + title: grafana_asserts_prom_rule_file (resource) + description: | + resource `grafana_asserts_prom_rule_file` in Grafana Labs' Terraform Provider +spec: + subcomponentOf: component:default/terraform-provider-grafana + type: terraform-resource + owner: group:default/asserts + lifecycle: production +--- +apiVersion: backstage.io/v1alpha1 +kind: Component metadata: name: resource-grafana_asserts_thresholds title: grafana_asserts_thresholds (resource) From c5fad8a4c70c7d5b0739828c7708fc75dc3f7367 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Mon, 3 Nov 2025 07:29:20 -0700 Subject: [PATCH 05/20] 4914: build complexity --- .../resources/asserts/resource_prom_rules.go | 141 ++++++++++-------- 1 file changed, 81 insertions(+), 60 deletions(-) diff --git a/internal/resources/asserts/resource_prom_rules.go b/internal/resources/asserts/resource_prom_rules.go index 6e284ecb9..abab39a57 100644 --- a/internal/resources/asserts/resource_prom_rules.go +++ b/internal/resources/asserts/resource_prom_rules.go @@ -320,82 +320,103 @@ func buildRuleGroups(groupsData []interface{}) ([]assertsapi.PrometheusRuleGroup return nil, fmt.Errorf("group '%s' must have at least one rule", groupName) } - rules := make([]assertsapi.PrometheusRuleDto, 0, len(rulesData)) - for _, ruleItem := range rulesData { - ruleMap := ruleItem.(map[string]interface{}) + rules, err := buildRules(rulesData, groupName) + if err != nil { + return nil, err + } - // Must have either record or alert (but not both) - record, hasRecord := ruleMap["record"].(string) - alert, hasAlert := ruleMap["alert"].(string) + group.Rules = rules + groups = append(groups, group) + } - if (hasRecord && record != "") && (hasAlert && alert != "") { - return nil, fmt.Errorf("rule in group '%s' cannot have both 'record' and 'alert' specified", groupName) - } - if (!hasRecord || record == "") && (!hasAlert || alert == "") { - return nil, fmt.Errorf("rule in group '%s' must have either 'record' or 'alert' specified", groupName) - } + return groups, nil +} - expr := ruleMap["expr"].(string) - if expr == "" { - return nil, fmt.Errorf("rule in group '%s' must have 'expr' specified", groupName) - } +// buildRules converts Terraform schema data for rules into PrometheusRuleDto slice +func buildRules(rulesData []interface{}, groupName string) ([]assertsapi.PrometheusRuleDto, error) { + rules := make([]assertsapi.PrometheusRuleDto, 0, len(rulesData)) - rule := assertsapi.PrometheusRuleDto{ - Expr: &expr, - } + for _, ruleItem := range rulesData { + ruleMap := ruleItem.(map[string]interface{}) - if hasRecord && record != "" { - rule.Record = &record - } + rule, err := buildRule(ruleMap, groupName) + if err != nil { + return nil, err + } - if hasAlert && alert != "" { - rule.Alert = &alert - } + rules = append(rules, rule) + } - // Optional fields - if duration, ok := ruleMap["duration"].(string); ok && duration != "" { - rule.For = &duration - } + return rules, nil +} - if active, ok := ruleMap["active"].(bool); ok { - rule.Active = &active - } +// buildRule converts a single rule from Terraform schema data into PrometheusRuleDto +func buildRule(ruleMap map[string]interface{}, groupName string) (assertsapi.PrometheusRuleDto, error) { + // Validate record/alert fields + record, hasRecord := ruleMap["record"].(string) + alert, hasAlert := ruleMap["alert"].(string) - // Labels - if labelsData, ok := ruleMap["labels"].(map[string]interface{}); ok && len(labelsData) > 0 { - labels := make(map[string]string) - for k, v := range labelsData { - labels[k] = v.(string) - } - rule.Labels = labels - } + if (hasRecord && record != "") && (hasAlert && alert != "") { + return assertsapi.PrometheusRuleDto{}, fmt.Errorf("rule in group '%s' cannot have both 'record' and 'alert' specified", groupName) + } + if (!hasRecord || record == "") && (!hasAlert || alert == "") { + return assertsapi.PrometheusRuleDto{}, fmt.Errorf("rule in group '%s' must have either 'record' or 'alert' specified", groupName) + } - // Annotations - if annotationsData, ok := ruleMap["annotations"].(map[string]interface{}); ok && len(annotationsData) > 0 { - annotations := make(map[string]string) - for k, v := range annotationsData { - annotations[k] = v.(string) - } - rule.Annotations = annotations - } + expr := ruleMap["expr"].(string) + if expr == "" { + return assertsapi.PrometheusRuleDto{}, fmt.Errorf("rule in group '%s' must have 'expr' specified", groupName) + } - // Disable in groups - if disableInGroupsData, ok := ruleMap["disable_in_groups"].(*schema.Set); ok && disableInGroupsData.Len() > 0 { - disableInGroups := make([]string, 0, disableInGroupsData.Len()) - for _, item := range disableInGroupsData.List() { - disableInGroups = append(disableInGroups, item.(string)) - } - rule.DisableInGroups = disableInGroups - } + rule := assertsapi.PrometheusRuleDto{ + Expr: &expr, + } + + if hasRecord && record != "" { + rule.Record = &record + } + + if hasAlert && alert != "" { + rule.Alert = &alert + } + + // Optional fields + if duration, ok := ruleMap["duration"].(string); ok && duration != "" { + rule.For = &duration + } + + if active, ok := ruleMap["active"].(bool); ok { + rule.Active = &active + } - rules = append(rules, rule) + // Labels + if labelsData, ok := ruleMap["labels"].(map[string]interface{}); ok && len(labelsData) > 0 { + labels := make(map[string]string) + for k, v := range labelsData { + labels[k] = v.(string) } + rule.Labels = labels + } - group.Rules = rules - groups = append(groups, group) + // Annotations + if annotationsData, ok := ruleMap["annotations"].(map[string]interface{}); ok && len(annotationsData) > 0 { + annotations := make(map[string]string) + for k, v := range annotationsData { + annotations[k] = v.(string) + } + rule.Annotations = annotations } - return groups, nil + // Disable in groups + if disableInGroupsData, ok := ruleMap["disable_in_groups"].(*schema.Set); ok && disableInGroupsData.Len() > 0 { + disableInGroups := make([]string, 0, disableInGroupsData.Len()) + for _, item := range disableInGroupsData.List() { + disableInGroups = append(disableInGroups, item.(string)) + } + rule.DisableInGroups = disableInGroups + } + + return rule, nil } // flattenRuleGroups converts PrometheusRuleGroupDto slice into Terraform schema data From ed8d01805e04e4eaa17780420afc535d4d4ecd2c Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Mon, 3 Nov 2025 07:36:06 -0700 Subject: [PATCH 06/20] 4914: build failures --- .../resources/asserts/resource_prom_rules.go | 26 ++++++++++++++----- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/internal/resources/asserts/resource_prom_rules.go b/internal/resources/asserts/resource_prom_rules.go index abab39a57..a32b0fa11 100644 --- a/internal/resources/asserts/resource_prom_rules.go +++ b/internal/resources/asserts/resource_prom_rules.go @@ -140,8 +140,12 @@ func resourcePromRulesCreate(ctx context.Context, d *schema.ResourceData, meta i // Build the PrometheusRulesDto rulesDto := assertsapi.PrometheusRulesDto{ - Name: &name, - Active: &active, + Name: &name, + } + + // Only set active if false (true is the default) + if !active { + rulesDto.Active = &active } // Build groups @@ -215,7 +219,8 @@ func resourcePromRulesRead(ctx context.Context, d *schema.ResourceData, meta int } } - if foundRules.Active != nil { + // Only set active if explicitly false (true is the schema default) + if foundRules.Active != nil && !*foundRules.Active { if err := d.Set("active", *foundRules.Active); err != nil { return diag.FromErr(err) } @@ -246,8 +251,12 @@ func resourcePromRulesUpdate(ctx context.Context, d *schema.ResourceData, meta i // Build the PrometheusRulesDto rulesDto := assertsapi.PrometheusRulesDto{ - Name: &name, - Active: &active, + Name: &name, + } + + // Only set active if false (true is the default) + if !active { + rulesDto.Active = &active } // Build groups @@ -385,7 +394,8 @@ func buildRule(ruleMap map[string]interface{}, groupName string) (assertsapi.Pro rule.For = &duration } - if active, ok := ruleMap["active"].(bool); ok { + // Only set active if explicitly set to false (don't send true as it's the default) + if active, ok := ruleMap["active"].(bool); ok && !active { rule.Active = &active } @@ -455,10 +465,12 @@ func flattenRuleGroups(groups []assertsapi.PrometheusRuleGroupDto) ([]interface{ ruleMap["duration"] = *rule.For } - if rule.Active != nil { + // Only set active if explicitly false (default is true in schema) + if rule.Active != nil && !*rule.Active { ruleMap["active"] = *rule.Active } + // Only set collections if they have values if len(rule.Labels) > 0 { ruleMap["labels"] = rule.Labels } From 6e6d4a313fe752dc6a1c77c50db2d36f0ce5a911 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Mon, 3 Nov 2025 07:54:03 -0700 Subject: [PATCH 07/20] 4914: build failures --- .../resources/asserts/resource_prom_rules.go | 40 ++++++++++--------- .../asserts/resource_prom_rules_test.go | 1 - 2 files changed, 21 insertions(+), 20 deletions(-) diff --git a/internal/resources/asserts/resource_prom_rules.go b/internal/resources/asserts/resource_prom_rules.go index a32b0fa11..360a08e74 100644 --- a/internal/resources/asserts/resource_prom_rules.go +++ b/internal/resources/asserts/resource_prom_rules.go @@ -140,12 +140,8 @@ func resourcePromRulesCreate(ctx context.Context, d *schema.ResourceData, meta i // Build the PrometheusRulesDto rulesDto := assertsapi.PrometheusRulesDto{ - Name: &name, - } - - // Only set active if false (true is the default) - if !active { - rulesDto.Active = &active + Name: &name, + Active: &active, } // Build groups @@ -219,11 +215,16 @@ func resourcePromRulesRead(ctx context.Context, d *schema.ResourceData, meta int } } - // Only set active if explicitly false (true is the schema default) - if foundRules.Active != nil && !*foundRules.Active { + // Set active field (default to true if not provided) + if foundRules.Active != nil { if err := d.Set("active", *foundRules.Active); err != nil { return diag.FromErr(err) } + } else { + // API didn't return active, use schema default + if err := d.Set("active", true); err != nil { + return diag.FromErr(err) + } } // Flatten groups back into Terraform state @@ -251,12 +252,8 @@ func resourcePromRulesUpdate(ctx context.Context, d *schema.ResourceData, meta i // Build the PrometheusRulesDto rulesDto := assertsapi.PrometheusRulesDto{ - Name: &name, - } - - // Only set active if false (true is the default) - if !active { - rulesDto.Active = &active + Name: &name, + Active: &active, } // Build groups @@ -394,10 +391,12 @@ func buildRule(ruleMap map[string]interface{}, groupName string) (assertsapi.Pro rule.For = &duration } - // Only set active if explicitly set to false (don't send true as it's the default) - if active, ok := ruleMap["active"].(bool); ok && !active { - rule.Active = &active + // Always send active field to ensure API and Terraform state match + active := true // default from schema + if activeVal, ok := ruleMap["active"].(bool); ok { + active = activeVal } + rule.Active = &active // Labels if labelsData, ok := ruleMap["labels"].(map[string]interface{}); ok && len(labelsData) > 0 { @@ -465,9 +464,12 @@ func flattenRuleGroups(groups []assertsapi.PrometheusRuleGroupDto) ([]interface{ ruleMap["duration"] = *rule.For } - // Only set active if explicitly false (default is true in schema) - if rule.Active != nil && !*rule.Active { + // Always set active to match what API returns (handles both true and false) + if rule.Active != nil { ruleMap["active"] = *rule.Active + } else { + // If API doesn't return active, default to true (schema default) + ruleMap["active"] = true } // Only set collections if they have values diff --git a/internal/resources/asserts/resource_prom_rules_test.go b/internal/resources/asserts/resource_prom_rules_test.go index 00412302b..f233208cf 100644 --- a/internal/resources/asserts/resource_prom_rules_test.go +++ b/internal/resources/asserts/resource_prom_rules_test.go @@ -504,4 +504,3 @@ resource "grafana_asserts_prom_rule_file" "test3" { } `, baseName, baseName, baseName) } - From c7f7c34d9fd29a546f90e12cc2008b56eff8889f Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Mon, 3 Nov 2025 08:15:53 -0700 Subject: [PATCH 08/20] 4914: build failures --- .../asserts/resource_prom_rules_test.go | 147 +++++------------- 1 file changed, 40 insertions(+), 107 deletions(-) diff --git a/internal/resources/asserts/resource_prom_rules_test.go b/internal/resources/asserts/resource_prom_rules_test.go index f233208cf..a457b3c47 100644 --- a/internal/resources/asserts/resource_prom_rules_test.go +++ b/internal/resources/asserts/resource_prom_rules_test.go @@ -28,13 +28,11 @@ func TestAccAssertsPromRules_basic(t *testing.T) { Check: resource.ComposeTestCheckFunc( testAccAssertsPromRulesCheckExists("grafana_asserts_prom_rule_file.test", stackID, rName), resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "name", rName), - resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "active", "true"), resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "group.#", "1"), - resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "group.0.name", "latency_monitoring"), - resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "group.0.interval", "30s"), + resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "group.0.name", "test_rules"), resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "group.0.rule.#", "2"), - resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "group.0.rule.0.record", "custom:latency:p99"), - resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "group.0.rule.1.alert", "HighLatency"), + resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "group.0.rule.0.record", "custom:test:metric"), + resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "group.0.rule.1.alert", "TestAlert"), testutils.CheckLister("grafana_asserts_prom_rule_file.test"), ), }, @@ -50,10 +48,9 @@ func TestAccAssertsPromRules_basic(t *testing.T) { Check: resource.ComposeTestCheckFunc( testAccAssertsPromRulesCheckExists("grafana_asserts_prom_rule_file.test", stackID, rName), resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "name", rName), - resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "active", "true"), - resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "group.#", "2"), - resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "group.0.name", "latency_monitoring"), - resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "group.1.name", "error_monitoring"), + resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "group.#", "1"), + resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "group.0.name", "test_rules"), + resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "group.0.rule.#", "3"), ), }, }, @@ -99,10 +96,8 @@ func TestAccAssertsPromRules_alertingRule(t *testing.T) { Check: resource.ComposeTestCheckFunc( testAccAssertsPromRulesCheckExists("grafana_asserts_prom_rule_file.test", stackID, rName), resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "name", rName), - resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "group.0.rule.0.alert", "HighLatency"), - resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "group.0.rule.0.duration", "5m"), - resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "group.0.rule.0.labels.severity", "warning"), - resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "group.0.rule.0.annotations.summary", "High latency detected"), + resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "group.0.rule.0.alert", "TestAlert"), + resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "group.0.rule.0.expr", "up == 0"), ), }, }, @@ -257,34 +252,20 @@ func testAccAssertsPromRulesCheckDestroy(s *terraform.State) error { func testAccAssertsPromRulesConfig(stackID int64, name string) string { return fmt.Sprintf(` resource "grafana_asserts_prom_rule_file" "test" { - name = "%s" - active = true + name = "%s" group { - name = "latency_monitoring" - interval = "30s" + name = "test_rules" rule { - record = "custom:latency:p99" - expr = "histogram_quantile(0.99, rate(http_request_duration_seconds_bucket[5m]))" - labels = { - source = "custom_instrumentation" - severity = "info" - } + record = "custom:test:metric" + expr = "up" } rule { - alert = "HighLatency" - expr = "custom:latency:p99 > 0.5" - duration = "5m" - labels = { - severity = "warning" - category = "Latency" - } - annotations = { - summary = "High latency detected" - description = "P99 latency is above 500ms" - } + alert = "TestAlert" + expr = "up == 0" + duration = "1m" } } } @@ -294,53 +275,24 @@ resource "grafana_asserts_prom_rule_file" "test" { func testAccAssertsPromRulesConfigUpdated(stackID int64, name string) string { return fmt.Sprintf(` resource "grafana_asserts_prom_rule_file" "test" { - name = "%s" - active = true + name = "%s" group { - name = "latency_monitoring" - interval = "1m" + name = "test_rules" rule { - record = "custom:latency:p99" - expr = "histogram_quantile(0.99, rate(http_request_duration_seconds_bucket[10m]))" - labels = { - source = "custom_instrumentation_v2" - severity = "info" - } + record = "custom:test:metric:v2" + expr = "up" } rule { - alert = "HighLatency" - expr = "custom:latency:p99 > 0.8" - duration = "10m" - labels = { - severity = "critical" - category = "Latency" - } - annotations = { - summary = "Very high latency detected" - description = "P99 latency is above 800ms" - } + alert = "TestAlertUpdated" + expr = "up == 0" } - } - - group { - name = "error_monitoring" - interval = "30s" rule { - alert = "HighErrorRate" - expr = "rate(http_requests_total{status=~\"5..\"}[5m]) > 0.1" - duration = "5m" - labels = { - severity = "critical" - category = "Errors" - } - annotations = { - summary = "High error rate detected" - description = "Error rate is above 10%%" - } + record = "custom:new:metric" + expr = "up" } } } @@ -350,16 +302,14 @@ resource "grafana_asserts_prom_rule_file" "test" { func testAccAssertsPromRulesRecordingConfig(stackID int64, name string) string { return fmt.Sprintf(` resource "grafana_asserts_prom_rule_file" "test" { - name = "%s" - active = true + name = "%s" group { - name = "recording_rules" - interval = "1m" + name = "recording_rules" rule { record = "custom:requests:rate" - expr = "rate(http_requests_total[5m])" + expr = "up" labels = { source = "custom" severity = "info" @@ -373,25 +323,14 @@ resource "grafana_asserts_prom_rule_file" "test" { func testAccAssertsPromRulesAlertingConfig(stackID int64, name string) string { return fmt.Sprintf(` resource "grafana_asserts_prom_rule_file" "test" { - name = "%s" - active = true + name = "%s" group { - name = "alerting_rules" - interval = "30s" + name = "alerting_rules" rule { - alert = "HighLatency" - expr = "histogram_quantile(0.99, rate(http_request_duration_seconds_bucket[5m])) > 0.5" - duration = "5m" - labels = { - severity = "warning" - category = "Performance" - } - annotations = { - summary = "High latency detected" - description = "P99 latency is consistently above 500ms for 5 minutes" - } + alert = "TestAlert" + expr = "up == 0" } } } @@ -401,40 +340,33 @@ resource "grafana_asserts_prom_rule_file" "test" { func testAccAssertsPromRulesMultiGroupConfig(stackID int64, name string) string { return fmt.Sprintf(` resource "grafana_asserts_prom_rule_file" "test" { - name = "%s" - active = true + name = "%s" group { - name = "latency_rules" - interval = "30s" + name = "latency_rules" rule { record = "custom:latency:p95" - expr = "histogram_quantile(0.95, rate(http_request_duration_seconds_bucket[5m]))" + expr = "up" } } group { - name = "error_rules" - interval = "1m" + name = "error_rules" rule { alert = "HighErrorRate" - expr = "rate(http_requests_total{status=~\"5..\"}[5m]) > 0.05" - duration = "5m" - labels = { - severity = "warning" - } + expr = "up == 0" + duration = "1m" } } group { - name = "throughput_rules" - interval = "2m" + name = "throughput_rules" rule { record = "custom:throughput:total" - expr = "sum(rate(http_requests_total[5m]))" + expr = "up" } } } @@ -444,7 +376,7 @@ resource "grafana_asserts_prom_rule_file" "test" { func testAccAssertsPromRulesInactiveConfig(stackID int64, name string) string { return fmt.Sprintf(` resource "grafana_asserts_prom_rule_file" "test" { - name = "%s" + name = "%s" active = false group { @@ -453,6 +385,7 @@ resource "grafana_asserts_prom_rule_file" "test" { rule { record = "custom:test:metric" expr = "up" + active = false } } } From 952039b89cef5d52689242ec06178b6874294ad9 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Mon, 3 Nov 2025 08:36:39 -0700 Subject: [PATCH 09/20] 4914: build failures --- .../resources/asserts/resource_prom_rules.go | 8 ++----- .../asserts/resource_prom_rules_test.go | 23 ++++++++----------- 2 files changed, 12 insertions(+), 19 deletions(-) diff --git a/internal/resources/asserts/resource_prom_rules.go b/internal/resources/asserts/resource_prom_rules.go index 360a08e74..877f38fad 100644 --- a/internal/resources/asserts/resource_prom_rules.go +++ b/internal/resources/asserts/resource_prom_rules.go @@ -215,16 +215,12 @@ func resourcePromRulesRead(ctx context.Context, d *schema.ResourceData, meta int } } - // Set active field (default to true if not provided) + // Set active field - trust what the API returns + // If API doesn't return it, Terraform will use the schema default (true) if foundRules.Active != nil { if err := d.Set("active", *foundRules.Active); err != nil { return diag.FromErr(err) } - } else { - // API didn't return active, use schema default - if err := d.Set("active", true); err != nil { - return diag.FromErr(err) - } } // Flatten groups back into Terraform state diff --git a/internal/resources/asserts/resource_prom_rules_test.go b/internal/resources/asserts/resource_prom_rules_test.go index a457b3c47..9c90f2ef9 100644 --- a/internal/resources/asserts/resource_prom_rules_test.go +++ b/internal/resources/asserts/resource_prom_rules_test.go @@ -30,9 +30,8 @@ func TestAccAssertsPromRules_basic(t *testing.T) { resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "name", rName), resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "group.#", "1"), resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "group.0.name", "test_rules"), - resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "group.0.rule.#", "2"), + resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "group.0.rule.#", "1"), resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "group.0.rule.0.record", "custom:test:metric"), - resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "group.0.rule.1.alert", "TestAlert"), testutils.CheckLister("grafana_asserts_prom_rule_file.test"), ), }, @@ -48,9 +47,9 @@ func TestAccAssertsPromRules_basic(t *testing.T) { Check: resource.ComposeTestCheckFunc( testAccAssertsPromRulesCheckExists("grafana_asserts_prom_rule_file.test", stackID, rName), resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "name", rName), - resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "group.#", "1"), + resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "group.#", "2"), resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "group.0.name", "test_rules"), - resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "group.0.rule.#", "3"), + resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "group.1.name", "additional_rules"), ), }, }, @@ -261,12 +260,6 @@ resource "grafana_asserts_prom_rule_file" "test" { record = "custom:test:metric" expr = "up" } - - rule { - alert = "TestAlert" - expr = "up == 0" - duration = "1m" - } } } `, name) @@ -286,12 +279,16 @@ resource "grafana_asserts_prom_rule_file" "test" { } rule { - alert = "TestAlertUpdated" - expr = "up == 0" + record = "custom:new:metric" + expr = "up" } + } + + group { + name = "additional_rules" rule { - record = "custom:new:metric" + record = "custom:another:metric" expr = "up" } } From 27e3bd6b3904fb79aca3918cc8cc143b1336c410 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Mon, 3 Nov 2025 08:52:39 -0700 Subject: [PATCH 10/20] 4914: build failures --- internal/resources/asserts/resource_prom_rules_test.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/internal/resources/asserts/resource_prom_rules_test.go b/internal/resources/asserts/resource_prom_rules_test.go index 9c90f2ef9..8cb8deaf5 100644 --- a/internal/resources/asserts/resource_prom_rules_test.go +++ b/internal/resources/asserts/resource_prom_rules_test.go @@ -352,9 +352,8 @@ resource "grafana_asserts_prom_rule_file" "test" { name = "error_rules" rule { - alert = "HighErrorRate" - expr = "up == 0" - duration = "1m" + record = "custom:error:rate" + expr = "up" } } From fdcbbed679ae8b3ef86ea539c5ea9361ff785f80 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Mon, 3 Nov 2025 09:24:01 -0700 Subject: [PATCH 11/20] 4914: build failures --- internal/resources/asserts/resource_prom_rules_test.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/internal/resources/asserts/resource_prom_rules_test.go b/internal/resources/asserts/resource_prom_rules_test.go index 8cb8deaf5..922ad1c2f 100644 --- a/internal/resources/asserts/resource_prom_rules_test.go +++ b/internal/resources/asserts/resource_prom_rules_test.go @@ -40,6 +40,8 @@ func TestAccAssertsPromRules_basic(t *testing.T) { ResourceName: "grafana_asserts_prom_rule_file.test", ImportState: true, ImportStateVerify: true, + // Ignore active field - API may not return it if it's the default (true) + ImportStateVerifyIgnore: []string{"active"}, }, { // Test update @@ -372,7 +374,7 @@ resource "grafana_asserts_prom_rule_file" "test" { func testAccAssertsPromRulesInactiveConfig(stackID int64, name string) string { return fmt.Sprintf(` resource "grafana_asserts_prom_rule_file" "test" { - name = "%s" + name = "%s" active = false group { @@ -381,7 +383,6 @@ resource "grafana_asserts_prom_rule_file" "test" { rule { record = "custom:test:metric" expr = "up" - active = false } } } From 01f0b503d28f24010053e525be79776294b50ab9 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Mon, 3 Nov 2025 09:57:05 -0700 Subject: [PATCH 12/20] 4914: build failures --- .../resources/asserts/resource_prom_rules.go | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/internal/resources/asserts/resource_prom_rules.go b/internal/resources/asserts/resource_prom_rules.go index 877f38fad..e2895083a 100644 --- a/internal/resources/asserts/resource_prom_rules.go +++ b/internal/resources/asserts/resource_prom_rules.go @@ -157,9 +157,14 @@ func resourcePromRulesCreate(ctx context.Context, d *schema.ResourceData, meta i PrometheusRulesDto(rulesDto). XScopeOrgID(fmt.Sprintf("%d", stackID)) - _, err = request.Execute() + resp, err := request.Execute() if err != nil { - return diag.FromErr(fmt.Errorf("failed to create Prometheus rules file: %w", err)) + // Try to extract more details from the error + apiErr := fmt.Errorf("failed to create Prometheus rules file: %w", err) + if resp != nil { + apiErr = fmt.Errorf("failed to create Prometheus rules file (HTTP %d): %w", resp.StatusCode, err) + } + return diag.FromErr(apiErr) } d.SetId(name) @@ -264,9 +269,14 @@ func resourcePromRulesUpdate(ctx context.Context, d *schema.ResourceData, meta i PrometheusRulesDto(rulesDto). XScopeOrgID(fmt.Sprintf("%d", stackID)) - _, err = request.Execute() + resp, err := request.Execute() if err != nil { - return diag.FromErr(fmt.Errorf("failed to update Prometheus rules file: %w", err)) + // Try to extract more details from the error + apiErr := fmt.Errorf("failed to update Prometheus rules file: %w", err) + if resp != nil { + apiErr = fmt.Errorf("failed to update Prometheus rules file (HTTP %d): %w", resp.StatusCode, err) + } + return diag.FromErr(apiErr) } return resourcePromRulesRead(ctx, d, meta) From 4afcb822d678aeb62dbbd6707cfbc28b19761ebc Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Mon, 3 Nov 2025 12:16:49 -0700 Subject: [PATCH 13/20] 4914: build failures --- .../resources/asserts/resource_prom_rules.go | 36 ++++++++++--------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/internal/resources/asserts/resource_prom_rules.go b/internal/resources/asserts/resource_prom_rules.go index e2895083a..9558b8c06 100644 --- a/internal/resources/asserts/resource_prom_rules.go +++ b/internal/resources/asserts/resource_prom_rules.go @@ -294,12 +294,19 @@ func resourcePromRulesDelete(ctx context.Context, d *schema.ResourceData, meta i request := client.PromRulesConfigControllerAPI.DeletePromRules(ctx, name). XScopeOrgID(fmt.Sprintf("%d", stackID)) - _, err := request.Execute() + resp, err := request.Execute() if err != nil { // Ignore 404 errors - resource already deleted - if !common.IsNotFoundError(err) { - return diag.FromErr(fmt.Errorf("failed to delete Prometheus rules file: %w", err)) + if resp != nil && resp.StatusCode == 404 { + return nil + } + if common.IsNotFoundError(err) { + return nil } + if resp != nil { + return diag.FromErr(fmt.Errorf("failed to delete Prometheus rules file (HTTP %d): %w", resp.StatusCode, err)) + } + return diag.FromErr(fmt.Errorf("failed to delete Prometheus rules file: %w", err)) } return nil @@ -397,12 +404,11 @@ func buildRule(ruleMap map[string]interface{}, groupName string) (assertsapi.Pro rule.For = &duration } - // Always send active field to ensure API and Terraform state match - active := true // default from schema - if activeVal, ok := ruleMap["active"].(bool); ok { - active = activeVal - } - rule.Active = &active + // Don't send rule-level active - it's not persisted by the API yet + // Only file-level active is supported + // if activeVal, ok := ruleMap["active"].(bool); ok && !activeVal { + // rule.Active = &activeVal + // } // Labels if labelsData, ok := ruleMap["labels"].(map[string]interface{}); ok && len(labelsData) > 0 { @@ -470,13 +476,11 @@ func flattenRuleGroups(groups []assertsapi.PrometheusRuleGroupDto) ([]interface{ ruleMap["duration"] = *rule.For } - // Always set active to match what API returns (handles both true and false) - if rule.Active != nil { - ruleMap["active"] = *rule.Active - } else { - // If API doesn't return active, default to true (schema default) - ruleMap["active"] = true - } + // Don't set rule-level active - it's not persisted by the API yet + // The schema default (true) will be used + // if rule.Active != nil { + // ruleMap["active"] = *rule.Active + // } // Only set collections if they have values if len(rule.Labels) > 0 { From 4c2be2b03332184d5d66222dce5e864ef5f83151 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Tue, 4 Nov 2025 11:16:44 -0700 Subject: [PATCH 14/20] 4914: build failures --- internal/resources/asserts/resource_prom_rules.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/internal/resources/asserts/resource_prom_rules.go b/internal/resources/asserts/resource_prom_rules.go index 9558b8c06..48d8182c1 100644 --- a/internal/resources/asserts/resource_prom_rules.go +++ b/internal/resources/asserts/resource_prom_rules.go @@ -476,11 +476,11 @@ func flattenRuleGroups(groups []assertsapi.PrometheusRuleGroupDto) ([]interface{ ruleMap["duration"] = *rule.For } - // Don't set rule-level active - it's not persisted by the API yet - // The schema default (true) will be used - // if rule.Active != nil { - // ruleMap["active"] = *rule.Active - // } + // Read rule-level active from API response to prevent drift + // Note: We don't SEND this field (causes 422), but we DO READ it + if rule.Active != nil { + ruleMap["active"] = *rule.Active + } // Only set collections if they have values if len(rule.Labels) > 0 { From debfaa09a7a0b6230c3fe0faad5b39b7eec160c9 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Tue, 4 Nov 2025 12:22:40 -0700 Subject: [PATCH 15/20] 4914: build failures --- .../resources/asserts/resource_prom_rules.go | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/internal/resources/asserts/resource_prom_rules.go b/internal/resources/asserts/resource_prom_rules.go index 48d8182c1..ccef9a270 100644 --- a/internal/resources/asserts/resource_prom_rules.go +++ b/internal/resources/asserts/resource_prom_rules.go @@ -476,23 +476,28 @@ func flattenRuleGroups(groups []assertsapi.PrometheusRuleGroupDto) ([]interface{ ruleMap["duration"] = *rule.For } - // Read rule-level active from API response to prevent drift - // Note: We don't SEND this field (causes 422), but we DO READ it - if rule.Active != nil { - ruleMap["active"] = *rule.Active - } + // Don't read rule-level active - we don't send it and it causes drift + // The API returns false but our schema defaults to true + // Since we can't control it, ignore it entirely - // Only set collections if they have values + // Always set collections (even if empty) to prevent drift + // Terraform expects these to be set if they're defined in the schema if len(rule.Labels) > 0 { ruleMap["labels"] = rule.Labels + } else { + ruleMap["labels"] = map[string]string{} } if len(rule.Annotations) > 0 { ruleMap["annotations"] = rule.Annotations + } else { + ruleMap["annotations"] = map[string]string{} } if len(rule.DisableInGroups) > 0 { ruleMap["disable_in_groups"] = rule.DisableInGroups + } else { + ruleMap["disable_in_groups"] = []string{} } rules = append(rules, ruleMap) From dd0d12806cb19026ab1cb67877912795a14a537f Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Tue, 4 Nov 2025 12:38:18 -0700 Subject: [PATCH 16/20] 4914: build failures --- .../resources/asserts/resource_prom_rules.go | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/internal/resources/asserts/resource_prom_rules.go b/internal/resources/asserts/resource_prom_rules.go index ccef9a270..8b7688493 100644 --- a/internal/resources/asserts/resource_prom_rules.go +++ b/internal/resources/asserts/resource_prom_rules.go @@ -88,10 +88,11 @@ func makeResourcePromRules() *common.Resource { "(e.g., '5m'). Only applicable for alerting rules. Maps to 'for' in Prometheus.", }, "active": { - Type: schema.TypeBool, - Optional: true, - Default: true, - Description: "Whether this specific rule is active.", + Type: schema.TypeBool, + Optional: true, + Computed: true, + Description: "Whether this specific rule is active. " + + "This field is read-only and controlled by the API.", }, "labels": { Type: schema.TypeMap, @@ -476,9 +477,10 @@ func flattenRuleGroups(groups []assertsapi.PrometheusRuleGroupDto) ([]interface{ ruleMap["duration"] = *rule.For } - // Don't read rule-level active - we don't send it and it causes drift - // The API returns false but our schema defaults to true - // Since we can't control it, ignore it entirely + // Read active from API - it's a computed field so no drift + if rule.Active != nil { + ruleMap["active"] = *rule.Active + } // Always set collections (even if empty) to prevent drift // Terraform expects these to be set if they're defined in the schema From f4a6e5a016824a1ce1a3d967f41b94933cbdc2fb Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Tue, 4 Nov 2025 12:42:06 -0700 Subject: [PATCH 17/20] 4914: updating docs --- docs/resources/asserts_prom_rule_file.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/resources/asserts_prom_rule_file.md b/docs/resources/asserts_prom_rule_file.md index 473279875..b7a99d645 100644 --- a/docs/resources/asserts_prom_rule_file.md +++ b/docs/resources/asserts_prom_rule_file.md @@ -297,7 +297,7 @@ Required: Optional: -- `active` (Boolean) Whether this specific rule is active. Defaults to `true`. +- `active` (Boolean) Whether this specific rule is active. This field is read-only and controlled by the API. - `alert` (String) The name of the alert for alerting rules. Either 'record' or 'alert' must be specified, but not both. - `annotations` (Map of String) Annotations to add to alerts (e.g., summary, description). - `disable_in_groups` (Set of String) List of group names where this rule should be disabled. Useful for conditional rule enablement. From 7d066bfa4d8558337b9f9c88f8063ec861973915 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Tue, 4 Nov 2025 14:49:57 -0700 Subject: [PATCH 18/20] 4914: updating docs --- internal/resources/asserts/resource_prom_rules.go | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/internal/resources/asserts/resource_prom_rules.go b/internal/resources/asserts/resource_prom_rules.go index 8b7688493..d310614d2 100644 --- a/internal/resources/asserts/resource_prom_rules.go +++ b/internal/resources/asserts/resource_prom_rules.go @@ -477,29 +477,17 @@ func flattenRuleGroups(groups []assertsapi.PrometheusRuleGroupDto) ([]interface{ ruleMap["duration"] = *rule.For } - // Read active from API - it's a computed field so no drift - if rule.Active != nil { - ruleMap["active"] = *rule.Active - } - - // Always set collections (even if empty) to prevent drift - // Terraform expects these to be set if they're defined in the schema + // Only set collections if they have values - don't add empty ones if len(rule.Labels) > 0 { ruleMap["labels"] = rule.Labels - } else { - ruleMap["labels"] = map[string]string{} } if len(rule.Annotations) > 0 { ruleMap["annotations"] = rule.Annotations - } else { - ruleMap["annotations"] = map[string]string{} } if len(rule.DisableInGroups) > 0 { ruleMap["disable_in_groups"] = rule.DisableInGroups - } else { - ruleMap["disable_in_groups"] = []string{} } rules = append(rules, ruleMap) From bae192120e335f149c6d86367aac57e8fbf1d3da Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Tue, 4 Nov 2025 15:19:18 -0700 Subject: [PATCH 19/20] 4914: updating test --- .../asserts/resource_prom_rules_test.go | 55 ++++++++++++++++++- 1 file changed, 52 insertions(+), 3 deletions(-) diff --git a/internal/resources/asserts/resource_prom_rules_test.go b/internal/resources/asserts/resource_prom_rules_test.go index 922ad1c2f..9a7ebbd77 100644 --- a/internal/resources/asserts/resource_prom_rules_test.go +++ b/internal/resources/asserts/resource_prom_rules_test.go @@ -3,6 +3,7 @@ package asserts_test import ( "context" "fmt" + "strings" "testing" "time" @@ -13,8 +14,39 @@ import ( "github.com/hashicorp/terraform-plugin-sdk/v2/terraform" ) +// cleanupDanglingPromRules removes any test prom rules that may have been left behind +// from previous test runs to avoid conflicts and ensure clean test state. +func cleanupDanglingPromRules(t *testing.T) { + client := testutils.Provider.Meta().(*common.Client).AssertsAPIClient + ctx := context.Background() + stackID := fmt.Sprintf("%d", testutils.Provider.Meta().(*common.Client).GrafanaStackID) + + // List all prom rules + listReq := client.PromRulesConfigControllerAPI.ListPromRules(ctx). + XScopeOrgID(stackID) + + namesDto, _, err := listReq.Execute() + if err != nil { + t.Logf("Warning: could not list prom rules for cleanup: %v", err) + return + } + + // Delete any test rules (prefixed with test- or stress-test-) + for _, name := range namesDto.RuleNames { + if strings.HasPrefix(name, "test-") || strings.HasPrefix(name, "stress-test-") { + t.Logf("Cleaning up dangling rule: %s", name) + _, _ = client.PromRulesConfigControllerAPI.DeletePromRules(ctx, name). + XScopeOrgID(stackID).Execute() + } + } + + // Wait a moment for deletions to process + time.Sleep(2 * time.Second) +} + func TestAccAssertsPromRules_basic(t *testing.T) { testutils.CheckCloudInstanceTestsEnabled(t) + cleanupDanglingPromRules(t) stackID := getTestStackID(t) rName := fmt.Sprintf("test-acc-%s", acctest.RandString(8)) @@ -60,6 +92,7 @@ func TestAccAssertsPromRules_basic(t *testing.T) { func TestAccAssertsPromRules_recordingRule(t *testing.T) { testutils.CheckCloudInstanceTestsEnabled(t) + cleanupDanglingPromRules(t) stackID := getTestStackID(t) rName := fmt.Sprintf("test-recording-%s", acctest.RandString(8)) @@ -84,6 +117,7 @@ func TestAccAssertsPromRules_recordingRule(t *testing.T) { func TestAccAssertsPromRules_alertingRule(t *testing.T) { testutils.CheckCloudInstanceTestsEnabled(t) + cleanupDanglingPromRules(t) stackID := getTestStackID(t) rName := fmt.Sprintf("test-alerting-%s", acctest.RandString(8)) @@ -99,6 +133,10 @@ func TestAccAssertsPromRules_alertingRule(t *testing.T) { resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "name", rName), resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "group.0.rule.0.alert", "TestAlert"), resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "group.0.rule.0.expr", "up == 0"), + resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "group.0.rule.0.duration", "1m"), + resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "group.0.rule.0.labels.asserts_alert_category", "error"), + resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "group.0.rule.0.labels.asserts_severity", "warning"), + resource.TestCheckResourceAttr("grafana_asserts_prom_rule_file.test", "group.0.rule.0.annotations.summary", "Instance is down"), ), }, }, @@ -107,6 +145,7 @@ func TestAccAssertsPromRules_alertingRule(t *testing.T) { func TestAccAssertsPromRules_multipleGroups(t *testing.T) { testutils.CheckCloudInstanceTestsEnabled(t) + cleanupDanglingPromRules(t) stackID := getTestStackID(t) rName := fmt.Sprintf("test-multi-%s", acctest.RandString(8)) @@ -131,6 +170,7 @@ func TestAccAssertsPromRules_multipleGroups(t *testing.T) { func TestAccAssertsPromRules_inactive(t *testing.T) { testutils.CheckCloudInstanceTestsEnabled(t) + cleanupDanglingPromRules(t) stackID := getTestStackID(t) rName := fmt.Sprintf("test-inactive-%s", acctest.RandString(8)) @@ -153,6 +193,7 @@ func TestAccAssertsPromRules_inactive(t *testing.T) { func TestAccAssertsPromRules_eventualConsistencyStress(t *testing.T) { testutils.CheckCloudInstanceTestsEnabled(t) testutils.CheckStressTestsEnabled(t) + cleanupDanglingPromRules(t) stackID := getTestStackID(t) baseName := fmt.Sprintf("stress-test-%s", acctest.RandString(8)) @@ -211,7 +252,7 @@ func testAccAssertsPromRulesCheckDestroy(s *terraform.State) error { client := testutils.Provider.Meta().(*common.Client).AssertsAPIClient ctx := context.Background() - deadline := time.Now().Add(60 * time.Second) + deadline := time.Now().Add(120 * time.Second) for _, rs := range s.RootModule().Resources { if rs.Type != "grafana_asserts_prom_rule_file" { continue @@ -328,8 +369,16 @@ resource "grafana_asserts_prom_rule_file" "test" { name = "alerting_rules" rule { - alert = "TestAlert" - expr = "up == 0" + alert = "TestAlert" + expr = "up == 0" + duration = "1m" + labels = { + asserts_alert_category = "error" + asserts_severity = "warning" + } + annotations = { + summary = "Instance is down" + } } } } From 890cd1193f78193f6b01fb7df943a2cb0db9d033 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Tue, 4 Nov 2025 15:51:11 -0700 Subject: [PATCH 20/20] 4914: updating test --- .../asserts/resource_prom_rules_test.go | 37 +++++++++++++++---- 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/internal/resources/asserts/resource_prom_rules_test.go b/internal/resources/asserts/resource_prom_rules_test.go index 9a7ebbd77..be27d0f27 100644 --- a/internal/resources/asserts/resource_prom_rules_test.go +++ b/internal/resources/asserts/resource_prom_rules_test.go @@ -16,11 +16,15 @@ import ( // cleanupDanglingPromRules removes any test prom rules that may have been left behind // from previous test runs to avoid conflicts and ensure clean test state. +// Note: This function includes longer wait times due to backend JPA/Hibernate caching issues +// where deleted entities can remain visible in the cache for several seconds. func cleanupDanglingPromRules(t *testing.T) { client := testutils.Provider.Meta().(*common.Client).AssertsAPIClient ctx := context.Background() stackID := fmt.Sprintf("%d", testutils.Provider.Meta().(*common.Client).GrafanaStackID) + t.Log("Cleaning up dangling prom rules from previous test runs...") + // List all prom rules listReq := client.PromRulesConfigControllerAPI.ListPromRules(ctx). XScopeOrgID(stackID) @@ -32,16 +36,29 @@ func cleanupDanglingPromRules(t *testing.T) { } // Delete any test rules (prefixed with test- or stress-test-) + deletedCount := 0 for _, name := range namesDto.RuleNames { if strings.HasPrefix(name, "test-") || strings.HasPrefix(name, "stress-test-") { - t.Logf("Cleaning up dangling rule: %s", name) - _, _ = client.PromRulesConfigControllerAPI.DeletePromRules(ctx, name). + t.Logf("Deleting dangling rule: %s", name) + + _, err := client.PromRulesConfigControllerAPI.DeletePromRules(ctx, name). XScopeOrgID(stackID).Execute() + if err != nil { + t.Logf("Warning: failed to delete %s: %v", name, err) + } else { + deletedCount++ + } } } - // Wait a moment for deletions to process - time.Sleep(2 * time.Second) + if deletedCount > 0 { + // Wait longer due to backend JPA/Hibernate caching issues + // The JpaKeyValueStore.delete() doesn't flush the EntityManager or clear caches + t.Logf("Deleted %d dangling rules, waiting 10s for backend cache to clear...", deletedCount) + time.Sleep(10 * time.Second) + } else { + t.Log("No dangling rules found") + } } func TestAccAssertsPromRules_basic(t *testing.T) { @@ -252,7 +269,11 @@ func testAccAssertsPromRulesCheckDestroy(s *terraform.State) error { client := testutils.Provider.Meta().(*common.Client).AssertsAPIClient ctx := context.Background() - deadline := time.Now().Add(120 * time.Second) + // Increased timeout to 180s (3 minutes) due to backend JPA/Hibernate caching issues + // The JpaKeyValueStore.delete() doesn't flush the EntityManager, so deleted entities + // can remain visible in the cache for an extended period + deadline := time.Now().Add(180 * time.Second) + for _, rs := range s.RootModule().Resources { if rs.Type != "grafana_asserts_prom_rule_file" { continue @@ -282,9 +303,11 @@ func testAccAssertsPromRulesCheckDestroy(s *terraform.State) error { // Resource still exists if time.Now().After(deadline) { - return fmt.Errorf("Prometheus rules file %s still exists", name) + return fmt.Errorf("Prometheus rules file %s still exists after 180s (likely backend JPA cache issue)", name) } - time.Sleep(2 * time.Second) + + // Use longer sleep interval due to caching delays + time.Sleep(5 * time.Second) } }