Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
116 changes: 113 additions & 3 deletions sdk/testing/server/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ import (
"syscall"
"time"

"github.com/stretchr/testify/require"
gopkgyaml "gopkg.in/yaml.v3"

"k8s.io/apimachinery/pkg/util/wait"
Expand Down Expand Up @@ -66,6 +65,13 @@ func scrapeMetricsForServer(t TestingT, srv RunningServer) {
t.Logf("PROMETHEUS_URL environment variable unset, skipping Prometheus scrape config generation")
return
}

caFile := filepath.Join(srv.CADirectory(), "apiserver.crt")
if _, err := os.Stat(caFile); os.IsNotExist(err) {
t.Logf("CA file %s does not exist, skipping Prometheus scrape config for server %s", caFile, srv.Name())
return
}

jobName := fmt.Sprintf("kcp-%s-%s", srv.Name(), t.Name())
labels := map[string]string{
"server": srv.Name(),
Expand All @@ -75,8 +81,23 @@ func scrapeMetricsForServer(t TestingT, srv RunningServer) {
ctx, cancel := context.WithTimeout(context.Background(), wait.ForeverTestTimeout)
defer cancel()
repoDir, err := kcptestinghelpers.RepositoryDir()
require.NoError(t, err)
require.NoError(t, ScrapeMetrics(ctx, srv.RootShardSystemMasterBaseConfig(t), promUrl, repoDir, jobName, filepath.Join(srv.CADirectory(), "apiserver.crt"), labels))
if err != nil {
t.Logf("error getting repository directory for server %s: %v", srv.Name(), err)
return
}

if err := ScrapeMetrics(ctx, srv.RootShardSystemMasterBaseConfig(t), promUrl, repoDir, jobName, caFile, labels); err != nil {
t.Logf("error configuring Prometheus scraping for server %s: %v", srv.Name(), err)
}

// Clean up Prometheus configuration when test finishes
t.Cleanup(func() {
cleanupCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
if err := CleanupScrapeMetrics(cleanupCtx, promUrl, repoDir, jobName); err != nil {
t.Logf("error cleaning up Prometheus scrape config for server %s: %v", srv.Name(), err)
}
})
}

func ScrapeMetrics(ctx context.Context, cfg *rest.Config, promUrl, promCfgDir, jobName, caFile string, labels map[string]string) error {
Expand Down Expand Up @@ -162,3 +183,92 @@ func ScrapeMetrics(ctx context.Context, cfg *rest.Config, promUrl, promCfgDir, j
resp.Body.Close()
return nil
}

func CleanupScrapeMetrics(ctx context.Context, promUrl, promCfgDir, jobNamePrefix string) error {
type staticConfigs struct {
Targets []string `yaml:"targets,omitempty"`
Labels map[string]string `yaml:"labels,omitempty"`
}
type tlsConfig struct {
InsecureSkipVerify bool `yaml:"insecure_skip_verify,omitempty"`
CaFile string `yaml:"ca_file,omitempty"`
}
type scrapeConfig struct {
JobName string `yaml:"job_name,omitempty"`
ScrapeInterval string `yaml:"scrape_interval,omitempty"`
BearerToken string `yaml:"bearer_token,omitempty"`
TlsConfig tlsConfig `yaml:"tls_config,omitempty"`
Scheme string `yaml:"scheme,omitempty"`
StaticConfigs []staticConfigs `yaml:"static_configs,omitempty"`
}
type config struct {
ScrapeConfigs []scrapeConfig `yaml:"scrape_configs,omitempty"`
}

err := func() error {
scrapeConfigFile := filepath.Join(promCfgDir, ".prometheus-config.yaml")
f, err := os.OpenFile(scrapeConfigFile, os.O_RDWR, 0o644)
if os.IsNotExist(err) {
return nil // Nothing to clean up
}
if err != nil {
return err
}
defer f.Close()

// lock config file exclusively
err = syscall.Flock(int(f.Fd()), syscall.LOCK_EX)
if err != nil {
return err
}
defer func() {
_ = syscall.Flock(int(f.Fd()), syscall.LOCK_UN)
}()

promCfg := config{}
err = gopkgyaml.NewDecoder(f).Decode(&promCfg)
if err != nil && !errors.Is(err, io.EOF) {
return err
}

// Remove scrape configs that match the job name prefix
var filteredConfigs []scrapeConfig
for _, cfg := range promCfg.ScrapeConfigs {
// Check if CA file still exists - if not, remove the config
if cfg.TlsConfig.CaFile != "" {
if _, err := os.Stat(cfg.TlsConfig.CaFile); os.IsNotExist(err) {
continue // Skip this config - CA file is gone
}
}
filteredConfigs = append(filteredConfigs, cfg)
}

promCfg.ScrapeConfigs = filteredConfigs

err = f.Truncate(0)
if err != nil {
return err
}
_, err = f.Seek(0, 0)
if err != nil {
return err
}
return gopkgyaml.NewEncoder(f).Encode(&promCfg)
}()
if err != nil {
return err
}

// Reload Prometheus configuration
req, err := http.NewRequestWithContext(ctx, http.MethodPost, promUrl+"/-/reload", http.NoBody)
if err != nil {
return err
}
c := &http.Client{}
resp, err := c.Do(req)
if err != nil {
return err
}
resp.Body.Close()
return nil
}