Skip to content

Commit db687f6

Browse files
committed
docs: update provider docs and improve error handling
- Add documentation for check modes, built-in CEL functions, and standard extensions in checks README - Clarify default health behavior in HTTP provider docs - Update Helm and Satellite provider docs for accuracy and new options - Improve error context in TLS and HTTP providers - Make config loading thread-safe and expose validation errors via method - Use maps.Copy and slices utilities for safer map/slice operations - Add input validation in Kubernetes resource fetcher - Avoid panics on provider instantiation errors - Minor test and code cleanup
1 parent 53aa3bc commit db687f6

File tree

14 files changed

+175
-68
lines changed

14 files changed

+175
-68
lines changed

pkg/checks/README.md

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,45 @@ ph context my-app -o yaml
2121

2222
CEL expressions must evaluate to a boolean (`true` for healthy, `false` for unhealthy). Expressions have access to provider-specific context variables (e.g., `response` for HTTP, `resource` for Kubernetes).
2323

24+
### Check Modes
25+
26+
Each check can optionally specify a `mode` field:
27+
28+
- **default** (no `mode` field): The expression is evaluated once against the full provider context.
29+
- **`mode: "each"`**: The expression is evaluated per-item for providers that return collections (e.g., Kubernetes with label selectors). Each item is evaluated independently, and failures reference the specific item.
30+
31+
```yaml
32+
checks:
33+
- check: "resource.status.readyReplicas >= resource.spec.replicas"
34+
message: "Not all replicas ready"
35+
mode: "each"
36+
```
37+
38+
## Built-in Functions
39+
40+
The following custom functions are available in all CEL expressions:
41+
42+
- `time.Now()` - Returns the current timestamp
43+
- `time.Since(timestamp)` - Returns the duration elapsed since the given timestamp
44+
- `time.Until(timestamp)` - Returns the duration until the given timestamp
45+
46+
```yaml
47+
checks:
48+
- check: "time.Until(tls.validUntil) > duration('168h')"
49+
message: "Certificate expires within 7 days"
50+
```
51+
52+
## Standard Extensions
53+
54+
The following CEL extension libraries are available:
55+
56+
- **Strings**: Additional string functions (`charAt`, `indexOf`, `replace`, `split`, `substring`, `trim`, `upperAscii`, `lowerAscii`)
57+
- **Lists**: List manipulation functions (`slice`, `flatten`)
58+
- **Encoders**: Base64 encoding/decoding (`base64.encode`, `base64.decode`)
59+
- **Math**: Math functions (`math.greatest`, `math.least`)
60+
- **Sets**: Set operations (`sets.contains`, `sets.intersects`, `sets.equivalent`)
61+
- **Bindings**: Variable binding via `cel.bind()`
62+
2463
## Common CEL Patterns
2564

2665
> **Note:** The examples below use `data` as a generic illustrative placeholder variable. Actual CEL context variables are provider-specific — for example, `response` for [HTTP](../provider/http), `resource` for [Kubernetes](../provider/kubernetes), `tls` for [TLS](../provider/tls), `health` for [Vault](../provider/vault), and `release`/`chart` for [Helm](../provider/helm). See each provider's README for its available CEL variables, or use `ph context` to inspect the evaluation context.

pkg/commands/migrate/migrate.go

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package migrate
33
import (
44
"bytes"
55
"fmt"
6+
"maps"
67
"os"
78
"strings"
89

@@ -51,9 +52,7 @@ func transformRest(config map[string]any) {
5152
if !ok {
5253
return
5354
}
54-
for k, v := range reqMap {
55-
config[k] = v
56-
}
55+
maps.Copy(config, reqMap)
5756
delete(config, "request")
5857
}
5958

pkg/commands/shared/providers.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package shared
33

44
import (
55
"fmt"
6+
"log/slog"
67

78
"github.com/spf13/cobra"
89

@@ -24,7 +25,8 @@ func AddProviderSubcommands(parent *cobra.Command, opts ProviderSubcommandOption
2425
for _, providerType := range provider.ProviderList() {
2526
instance, err := provider.New(providerType)
2627
if err != nil {
27-
panic(err)
28+
slog.Error("failed to instantiate provider", "type", providerType, "error", err)
29+
continue
2830
}
2931

3032
if opts.RequireChecks && !provider.SupportsChecks(instance) {

pkg/commands/validate/validate.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ func collectResults(result *config.LoadResult) ValidationSummary {
9191
var results []ValidationResult
9292

9393
// Add errors from config loading (instances that failed to load)
94-
for _, err := range result.ValidationErrors {
94+
for _, err := range result.ValidationErrors() {
9595
// Try to extract instance info from InstanceError
9696
if instErr, ok := err.(provider.InstanceError); ok {
9797
results = append(results, ValidationResult{

pkg/config/config.go

Lines changed: 68 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ import (
66
"fmt"
77
"log/slog"
88
"path/filepath"
9+
"slices"
10+
"sync"
911

1012
"github.com/fsnotify/fsnotify"
1113
"github.com/spf13/viper"
@@ -25,23 +27,24 @@ type abstractConfig map[string]any
2527
// Structure: map[providerType][]provider.Instance
2628
type concreteConfig map[string][]provider.Instance
2729

28-
// LoadResult wraps config with validation errors from loading
30+
// LoadResult wraps config with validation errors from loading.
31+
// It is safe for concurrent use; the config reload callback (from fsnotify)
32+
// writes under a write lock, while GetInstances and friends read under a read lock.
2933
type LoadResult struct {
34+
mu sync.RWMutex
3035
config concreteConfig
31-
ValidationErrors []error
32-
v *viper.Viper // viper instance for config reloading
36+
validationErrors []error
37+
v *viper.Viper // viper instance for config reloading
38+
log *slog.Logger // logger captured at Load time
3339
}
3440

35-
var log *slog.Logger
36-
3741
// Load loads and validates configuration from the specified paths.
3842
// If strict is true, validation errors are collected; otherwise invalid instances are skipped with warnings.
3943
func Load(ctx context.Context, configPaths []string, configName string, strict bool) (*LoadResult, error) {
40-
log = phctx.Logger(ctx)
41-
4244
result := &LoadResult{
4345
config: make(concreteConfig),
4446
v: phctx.Viper(ctx), // use viper from context (has :: delimiter)
47+
log: phctx.Logger(ctx),
4548
}
4649

4750
if err := result.initialize(configPaths, configName, strict); err != nil {
@@ -52,24 +55,37 @@ func Load(ctx context.Context, configPaths []string, configName string, strict b
5255

5356
// HasErrors returns true if any validation errors were collected.
5457
func (r *LoadResult) HasErrors() bool {
55-
return len(r.ValidationErrors) > 0
58+
r.mu.RLock()
59+
defer r.mu.RUnlock()
60+
return len(r.validationErrors) > 0
61+
}
62+
63+
// ValidationErrors returns a copy of the validation errors.
64+
func (r *LoadResult) ValidationErrors() []error {
65+
r.mu.RLock()
66+
defer r.mu.RUnlock()
67+
return slices.Clone(r.validationErrors)
5668
}
5769

5870
// EnforceStrict logs all validation errors and returns an error if any exist.
5971
// Used in strict mode to abort on configuration errors.
6072
func (r *LoadResult) EnforceStrict(log *slog.Logger) error {
61-
if !r.HasErrors() {
73+
errs := r.ValidationErrors()
74+
if len(errs) == 0 {
6275
return nil
6376
}
64-
for _, e := range r.ValidationErrors {
77+
for _, e := range errs {
6578
log.Error("configuration error", slog.Any("error", e))
6679
}
67-
return fmt.Errorf("configuration validation failed with %d error(s)", len(r.ValidationErrors))
80+
return fmt.Errorf("configuration validation failed with %d error(s)", len(errs))
6881
}
6982

7083
// GetInstances returns a flat slice of all loaded provider instances.
7184
func (r *LoadResult) GetInstances() []provider.Instance {
72-
flatInstances := make([]provider.Instance, 0, r.totalInstances())
85+
r.mu.RLock()
86+
defer r.mu.RUnlock()
87+
88+
flatInstances := make([]provider.Instance, 0, r.totalInstancesLocked())
7389

7490
for _, instances := range r.config {
7591
flatInstances = append(flatInstances, instances...)
@@ -80,7 +96,7 @@ func (r *LoadResult) GetInstances() []provider.Instance {
8096

8197
// initialize sets up provider configuration using viper
8298
func (r *LoadResult) initialize(configPaths []string, configName string, strict bool) (err error) {
83-
log.Debug("initializing server configuration")
99+
r.log.Debug("initializing server configuration")
84100

85101
// r.v is already set from context with :: delimiter
86102

@@ -95,52 +111,62 @@ func (r *LoadResult) initialize(configPaths []string, configName string, strict
95111

96112
if err = r.v.ReadInConfig(); err != nil {
97113
if _, ok := err.(viper.ConfigFileNotFoundError); ok {
98-
log.Info("no configuration file found - using defaults")
114+
r.log.Info("no configuration file found - using defaults")
99115
} else {
100-
log.Error("failed to read config", "error", err)
116+
r.log.Error("failed to read config", "error", err)
101117
return err
102118
}
103119
} else {
104-
log.Info("read config", slog.String("file", r.v.ConfigFileUsed()))
120+
r.log.Info("read config", slog.String("file", r.v.ConfigFileUsed()))
105121
r.v.WatchConfig()
106122
r.v.OnConfigChange(func(e fsnotify.Event) {
107-
log.Debug("config change")
123+
r.log.Debug("config change")
108124
if err := r.v.ReadInConfig(); err != nil {
109-
log.Error("failed to read config", "error", err)
125+
r.log.Error("failed to read config", "error", err)
110126
return
111127
}
112128

129+
r.mu.Lock()
113130
prevConfig := r.config
114-
prevErrors := r.ValidationErrors
131+
prevErrors := r.validationErrors
132+
r.mu.Unlock()
115133

116134
if err := r.update(strict); err != nil {
117-
log.Error("failed to load config", "error", err)
135+
r.log.Error("failed to load config", "error", err)
136+
r.mu.Lock()
118137
r.config = prevConfig
119-
r.ValidationErrors = prevErrors
138+
r.validationErrors = prevErrors
139+
r.mu.Unlock()
120140
return
121141
}
122142

123-
if strict && r.HasErrors() {
124-
for _, e := range r.ValidationErrors {
125-
log.Error("configuration error", slog.Any("error", e))
143+
r.mu.RLock()
144+
hasErrors := len(r.validationErrors) > 0
145+
r.mu.RUnlock()
146+
147+
if strict && hasErrors {
148+
for _, e := range r.ValidationErrors() {
149+
r.log.Error("configuration error", slog.Any("error", e))
126150
}
127-
log.Warn("config reload rejected due to strict validation errors")
151+
r.log.Warn("config reload rejected due to strict validation errors")
152+
r.mu.Lock()
128153
r.config = prevConfig
129-
r.ValidationErrors = prevErrors
154+
r.validationErrors = prevErrors
155+
r.mu.Unlock()
130156
return
131157
}
132158

133-
log.Info("config reloaded", slog.Any("instances", r.countByProvider()))
159+
r.log.Info("config reloaded", slog.Any("instances", r.countByProvider()))
134160
})
135161
}
136162
}
137163

138164
if err := r.update(strict); err != nil {
139-
log.Error("failed to load config", "error", err)
165+
r.log.Error("failed to load config", "error", err)
140166
return err
141167
}
142168

143-
log.Info("config loaded", slog.Any("instances", r.countByProvider()))
169+
r.log.Info("config loaded", slog.Any("instances", r.countByProvider()))
144170

145171
return nil
146172
}
@@ -149,7 +175,7 @@ func (r *LoadResult) update(strict bool) error {
149175
raw := make(map[string]any)
150176

151177
if err := r.v.Unmarshal(&raw); err != nil {
152-
log.Error("failed to unmarshal config", "error", err)
178+
r.log.Error("failed to unmarshal config", "error", err)
153179
return err
154180
}
155181

@@ -160,7 +186,7 @@ func (r *LoadResult) update(strict bool) error {
160186

161187
processed, err := ProcessIncludes(raw, basePath, nil)
162188
if err != nil {
163-
log.Error("failed to process includes", "error", err)
189+
r.log.Error("failed to process includes", "error", err)
164190
return fmt.Errorf("failed to process includes: %w", err)
165191
}
166192

@@ -170,12 +196,16 @@ func (r *LoadResult) update(strict bool) error {
170196
}
171197

172198
abstract := abstractConfig(components)
173-
r.config, r.ValidationErrors = abstract.harden(strict)
199+
config, validationErrors := abstract.harden(r.log, strict)
200+
201+
r.mu.Lock()
202+
r.config, r.validationErrors = config, validationErrors
203+
r.mu.Unlock()
174204

175205
return nil
176206
}
177207

178-
func (a abstractConfig) harden(strict bool) (concreteConfig, []error) {
208+
func (a abstractConfig) harden(log *slog.Logger, strict bool) (concreteConfig, []error) {
179209
concrete := make(concreteConfig)
180210
var validationErrors []error
181211

@@ -236,14 +266,18 @@ func (a abstractConfig) harden(strict bool) (concreteConfig, []error) {
236266
return concrete, validationErrors
237267
}
238268

239-
func (r *LoadResult) totalInstances() (count int) {
269+
// totalInstancesLocked returns the total instance count. Caller must hold r.mu.
270+
func (r *LoadResult) totalInstancesLocked() (count int) {
240271
for _, instances := range r.config {
241272
count += len(instances)
242273
}
243274
return count
244275
}
245276

246277
func (r *LoadResult) countByProvider() map[string]int {
278+
r.mu.RLock()
279+
defer r.mu.RUnlock()
280+
247281
counts := make(map[string]int)
248282

249283
for providerType, instances := range r.config {

pkg/config/config_test.go

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package config
22

33
import (
44
"context"
5+
"log/slog"
56
"testing"
67
"time"
78

@@ -12,9 +13,7 @@ import (
1213
"github.com/isometry/platform-health/pkg/provider/mock"
1314
)
1415

15-
func init() {
16-
log = phctx.Logger(context.Background())
17-
}
16+
var testLog = slog.Default()
1817

1918
// testContext creates a context with a viper instance for testing
2019
func testContext(t *testing.T) context.Context {
@@ -136,7 +135,7 @@ func TestHarden(t *testing.T) {
136135

137136
for _, tt := range tests {
138137
t.Run(tt.name, func(t *testing.T) {
139-
instances, _ := tt.abstract.harden(false)
138+
instances, _ := tt.abstract.harden(testLog, false)
140139
assert.Equal(t, tt.expectedProviders, len(instances), "providers count mismatch")
141140
// Count total instances
142141
total := 0
@@ -153,7 +152,7 @@ func TestHardenSetName(t *testing.T) {
153152
"myinstance": map[string]any{"type": "mock", "spec": map[string]any{}},
154153
}
155154

156-
result, _ := abstract.harden(false)
155+
result, _ := abstract.harden(testLog, false)
157156

158157
// Check instance name is set from key
159158
instance := findInstanceByName(result["mock"], "myinstance")
@@ -171,7 +170,7 @@ func TestHardenDurationParsing(t *testing.T) {
171170
},
172171
}
173172

174-
result, _ := abstract.harden(false)
173+
result, _ := abstract.harden(testLog, false)
175174
assert.Equal(t, 1, len(result["mock"]))
176175

177176
instance := findInstanceByName(result["mock"], "test").(*mock.Component)
@@ -224,7 +223,7 @@ func TestTotalInstances(t *testing.T) {
224223

225224
for _, tt := range tests {
226225
t.Run(tt.name, func(t *testing.T) {
227-
assert.Equal(t, tt.expected, tt.result.totalInstances())
226+
assert.Equal(t, tt.expected, len(tt.result.GetInstances()))
228227
})
229228
}
230229
}
@@ -281,7 +280,7 @@ func TestUnknownComponentKeysFixtures(t *testing.T) {
281280
t.Run(tt.name, func(t *testing.T) {
282281
result, err := Load(testContext(t), []string{testdataPath}, tt.configFile, tt.strict)
283282
assert.NoError(t, err, "Load should not return error")
284-
assert.Equal(t, tt.expectErrors, len(result.ValidationErrors), "validation error count mismatch")
283+
assert.Equal(t, tt.expectErrors, len(result.ValidationErrors()), "validation error count mismatch")
285284
assert.Equal(t, tt.expectInstances, len(result.GetInstances()), "instance count mismatch")
286285
})
287286
}
@@ -332,7 +331,7 @@ func TestUnknownSpecKeysFixtures(t *testing.T) {
332331
t.Run(tt.name, func(t *testing.T) {
333332
result, err := Load(testContext(t), []string{testdataPath}, tt.configFile, tt.strict)
334333
assert.NoError(t, err, "Load should not return error")
335-
assert.Equal(t, tt.expectErrors, len(result.ValidationErrors), "validation error count mismatch")
334+
assert.Equal(t, tt.expectErrors, len(result.ValidationErrors()), "validation error count mismatch")
336335
assert.Equal(t, tt.expectInstances, len(result.GetInstances()), "instance count mismatch")
337336
})
338337
}

0 commit comments

Comments
 (0)