grafana
diff --git a/‎actions/go-flaky-tests/.gitignore‎
Lines changed: 17 additions & 0 deletions b/‎actions/go-flaky-tests/.gitignore‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎actions/go-flaky-tests/CHANGELOG.md‎
Lines changed: 15 additions & 0 deletions b/‎actions/go-flaky-tests/CHANGELOG.md‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎actions/go-flaky-tests/README.md‎
Lines changed: 121 additions & 0 deletions b/‎actions/go-flaky-tests/README.md‎
Lines changed: 121 additions & 0 deletions
diff --git a/‎actions/go-flaky-tests/action.yaml‎
Lines changed: 47 additions & 0 deletions b/‎actions/go-flaky-tests/action.yaml‎
Lines changed: 47 additions & 0 deletions
diff --git a/‎actions/go-flaky-tests/cmd/go-flaky-tests/aggregate.go‎
Lines changed: 137 additions & 0 deletions b/‎actions/go-flaky-tests/cmd/go-flaky-tests/aggregate.go‎
Lines changed: 137 additions & 0 deletions
@@ -0,0 +1,17 @@
+# Local development files
+.env
+test-failure-analysis.json
+
+# Go build artifacts
+*.exe
+*.exe~
+*.dll
+*.so
+*.dylib
+
+# Go test files
+*.test
+*_actual.json
+
+# Go coverage files
+*.out
@@ -0,0 +1,15 @@
+# Changelog
+
+## [Unreleased]
+
+### Added
+
+- Initial implementation of flaky test analysis action
+- Loki integration for fetching test failure logs
+- Comprehensive test suite with golden file testing
+
+### Features
+
+- **Loki Log Analysis**: Fetches and parses test failure logs using LogQL
+- **Flaky Test Detection**: Identifies tests that fail inconsistently across branches
+- **Configurable Limits**: Top-K filtering to focus on most problematic tests
@@ -0,0 +1,121 @@
+# Go Flaky Tests
+
+A GitHub Action that detects and analyzes flaky Go tests by fetching logs from Loki.
+
+## Features
+
+- **Loki Integration**: Fetches test failure logs from Loki using LogQL queries
+- **Flaky Test Detection**: Identifies tests that fail inconsistently across different branches
+
+## Usage
+
+```yaml
+name: Go Flaky Tests
+on:
+  schedule:
+    - cron: "0 9 * * 1" # Run every Monday at 9 AM
+  workflow_dispatch:
+
+jobs:
+  analyze-failures:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Go Flaky Tests
+        uses: grafana/shared-workflows/actions/go-flaky-tests@main
+        with:
+          loki-url: ${{ secrets.LOKI_URL }}
+          loki-username: ${{ secrets.LOKI_USERNAME }}
+          loki-password: ${{ secrets.LOKI_PASSWORD }}
+          repository: ${{ github.repository }}
+          time-range: "7d"
+          top-k: "5"
+```
+
+## Inputs
+
+| Input           | Description                                                                                                                  | Required | Default |
+| --------------- | ---------------------------------------------------------------------------------------------------------------------------- | -------- | ------- |
+| `loki-url`      | Loki endpoint URL                                                                                                            | ✅       | -       |
+| `loki-username` | Username for Loki authentication                                                                                             | ❌       | -       |
+| `loki-password` | Password for Loki authentication. If using Grafana Cloud, then the access policy for this token needs the `logs:read` scope. | ❌       | -       |
+| `repository`    | Repository name in 'owner/repo' format                                                                                       | ✅       | -       |
+| `time-range`    | Time range for the query (e.g., '1h', '24h', '7d')                                                                           | ❌       | `1h`    |
+| `top-k`         | Include only the top K flaky tests by distinct branches count                                                                | ❌       | `3`     |
+
+## Outputs
+
+| Output             | Description                                     |
+| ------------------ | ----------------------------------------------- |
+| `test-count`       | Number of flaky tests found                     |
+| `analysis-summary` | Summary of the analysis results                 |
+| `report-path`      | Path to the generated analysis report JSON file |
+
+## How It Works
+
+1. **Fetch Logs**: Queries Loki for test failure logs within the specified time range
+2. **Parse Failures**: Extracts test names, branches, and workflow URLs from logs
+3. **Detect Flaky Tests**: Identifies tests that fail on multiple branches or multiple times on main/master
+
+## Flaky Test Detection Logic
+
+A test is considered "flaky" if:
+
+- It fails on the main or master branch, OR
+- It fails on multiple different branches
+
+Tests that only fail on feature branches are not considered flaky, as they likely indicate legitimate test failures for that specific feature.
+
+## Local Development
+
+Run the analysis locally using the provided script:
+
+```bash
+# Set required environment variables
+export LOKI_URL="your-loki-url"
+export REPOSITORY="owner/repo"
+export TIME_RANGE="24h"
+# Run the analysis
+go run ./cmd/go-flaky-tests
+```
+
+## Requirements
+
+- Go 1.22 or later
+- Access to Loki instance with test failure logs
+
+## Output Format
+
+The action generates a JSON report with the following structure:
+
+```json
+{
+  "test_count": 2,
+  "analysis_summary": "Found 2 flaky tests. Most common tests: TestUserLogin (3 total failures; recently changed by alice), TestPayment (1 total failures; recently changed by bob)",
+  "report_path": "/path/to/test-failure-analysis.json",
+  "flaky_tests": [
+    {
+      "test_name": "TestUserLogin",
+      "file_path": "handlers/auth_test.go",
+      "total_failures": 3,
+      "branch_counts": {
+        "main": 2,
+        "feature-branch": 1
+      },
+      "example_workflows": [
+        "https://github.com/owner/repo/actions/runs/123",
+        "https://github.com/owner/repo/actions/runs/124"
+      ],
+      "recent_commits": [
+        {
+          "hash": "abc123",
+          "author": "alice",
+          "timestamp": "2024-01-15T10:30:00Z",
+          "title": "Fix authentication flow"
+        }
+      ]
+    }
+  ]
+}
+```
@@ -0,0 +1,47 @@
+name: "Go Flaky Tests"
+description: "Detect and analyze flaky Go tests using Loki logs"
+author: "Grafana Labs"
+
+inputs:
+  loki-url:
+    description: "Loki endpoint URL"
+    required: true
+  loki-username:
+    description: "Username for Loki authentication"
+    required: false
+  loki-password:
+    description: "Password for Loki authentication"
+    required: false
+  repository:
+    description: "Repository name in 'owner/repo' format (e.g., 'grafana/grafana')"
+    required: true
+  time-range:
+    description: "Time range for the query (e.g., '1h', '24h', '7d')"
+    required: false
+    default: "1h"
+  top-k:
+    description: "Include only the top K flaky tests by distinct branches count in analysis"
+    required: false
+    default: "3"
+
+runs:
+  using: "composite"
+  steps:
+    - name: Set up Go
+      uses: actions/setup-go@v5
+      with:
+        go-version: "1.22"
+
+    - name: Build and run analyzer
+      shell: bash
+      run: |
+        cd ${{ github.action_path }}
+        go build -o analyzer ./cmd/go-flaky-tests
+        ./analyzer
+      env:
+        LOKI_URL: ${{ inputs.loki-url }}
+        LOKI_USERNAME: ${{ inputs.loki-username }}
+        LOKI_PASSWORD: ${{ inputs.loki-password }}
+        REPOSITORY: ${{ inputs.repository }}
+        TIME_RANGE: ${{ inputs.time-range }}
+        TOP_K: ${{ inputs.top-k }}
@@ -0,0 +1,137 @@
+package main
+
+import (
+	"fmt"
+	"log"
+	"slices"
+	"strconv"
+	"strings"
+)
+
+type RawLogEntry struct {
+	TestName           string `json:"test_name"`
+	Branch             string `json:"branch"`
+	WorkflowRunURL     string `json:"workflow_run_url"`
+	WorkflowJobName    string `json:"workflow_job_name"`
+	WorkflowRunAttempt int    `json:"workflow_run_attempt"`
+}
+
+func AggregateFlakyTestsFromResponse(lokiResp *LokiResponse) ([]FlakyTest, error) {
+	var rawEntries []RawLogEntry
+	for _, result := range lokiResp.Data.Result {
+		testName := result.Stream["parent_test_name"]
+		branch := result.Stream["ci_github_workflow_run_head_branch"]
+		workflowRunURL := result.Stream["ci_github_workflow_run_html_url"]
+		workflowJobName := result.Stream["ci_github_workflow_job_name"]
+		workflowRunAttempt, _ := strconv.Atoi(result.Stream["ci_github_workflow_run_run_attempt"])
+
+		if testName == "" || branch == "" {
+			continue
+		}
+		entry := RawLogEntry{
+			TestName:           testName,
+			Branch:             branch,
+			WorkflowRunURL:     workflowRunURL,
+			WorkflowJobName:    workflowJobName,
+			WorkflowRunAttempt: workflowRunAttempt,
+		}
+		rawEntries = append(rawEntries, entry)
+	}
+
+	log.Printf("🔄 Processed %d log lines, extracted %d valid test failure entries", len(lokiResp.Data.Result), len(rawEntries))
+
+	return detectFlakyTestsFromRawEntries(rawEntries), nil
+}
+
+func detectFlakyTestsFromRawEntries(rawEntries []RawLogEntry) []FlakyTest {
+	testMap := make(map[string]map[string]int)
+	exampleWorkflows := make(map[string]map[GithubActionsWorkflow]bool)
+
+	for _, entry := range rawEntries {
+		if entry.TestName == "" || entry.Branch == "" {
+			continue
+		}
+
+		if testMap[entry.TestName] == nil {
+			testMap[entry.TestName] = make(map[string]int)
+			exampleWorkflows[entry.TestName] = make(map[GithubActionsWorkflow]bool)
+		}
+
+		testMap[entry.TestName][entry.Branch]++
+
+		workflow := GithubActionsWorkflow{
+			RunURL:  entry.WorkflowRunURL,
+			JobName: entry.WorkflowJobName,
+			Attempt: entry.WorkflowRunAttempt,
+		}
+
+		if workflow != (GithubActionsWorkflow{}) && len(exampleWorkflows[entry.TestName]) < 3 {
+			exampleWorkflows[entry.TestName][workflow] = true
+		}
+	}
+
+	var flakyTests []FlakyTest
+
+	for testName, branches := range testMap {
+		isFlaky := false
+		totalFailures := 0
+
+		for branch, count := range branches {
+			totalFailures += count
+
+			if branch == "main" || branch == "master" {
+				isFlaky = true
+			}
+		}
+
+		if len(branches) > 1 {
+			isFlaky = true
+		}
+
+		if !isFlaky {
+			continue
+		}
+
+		var branchSummary []string
+		for branch, count := range branches {
+			branchSummary = append(branchSummary, fmt.Sprintf("%s:%d", branch, count))
+		}
+
+		var workflows []GithubActionsWorkflow
+		for workflowURL := range exampleWorkflows[testName] {
+			workflows = append(workflows, workflowURL)
+		}
+
+		flakyTests = append(flakyTests, FlakyTest{
+			TestName:         testName,
+			TotalFailures:    totalFailures,
+			BranchCounts:     branches,
+			ExampleWorkflows: workflows,
+		})
+
+		log.Printf("🔍 Detected flaky test: %s (%d total failures) - branches: %s",
+			testName, totalFailures, strings.Join(branchSummary, ", "))
+	}
+
+	log.Printf("📈 Test analysis stats:")
+	log.Printf("   - Total unique tests with failures: %d", len(testMap))
+	log.Printf("   - Tests classified as flaky: %d", len(flakyTests))
+
+	return sortFlakyTests(flakyTests)
+}
+
+func sortFlakyTests(tests []FlakyTest) []FlakyTest {
+	slices.SortFunc(tests, func(a, b FlakyTest) int {
+		branchesDelta := len(b.BranchCounts) - len(a.BranchCounts)
+		if branchesDelta != 0 {
+			return branchesDelta
+		}
+		if a.TestName < b.TestName {
+			return -1
+		} else if a.TestName > b.TestName {
+			return 1
+		}
+		return 0
+	})
+	return tests
+}