-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathmanager.go
More file actions
628 lines (537 loc) · 19.2 KB
/
manager.go
File metadata and controls
628 lines (537 loc) · 19.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
package ingress
import (
"context"
"fmt"
"log/slog"
"regexp"
"slices"
"strings"
"sync"
"time"
"github.com/nrednav/cuid2"
"github.com/kernel/hypeman/lib/dns"
"github.com/kernel/hypeman/lib/logger"
"github.com/kernel/hypeman/lib/paths"
)
// InstanceResolver provides instance resolution capabilities.
// This interface is implemented by the instance manager.
type InstanceResolver interface {
// ResolveInstanceIP resolves an instance name or ID to its IP address.
// Returns the IP address and nil error if found, or an error if the instance
// doesn't exist, isn't running, or has no network.
ResolveInstanceIP(ctx context.Context, nameOrID string) (string, error)
// InstanceExists checks if an instance with the given name or ID exists.
InstanceExists(ctx context.Context, nameOrID string) (bool, error)
// ResolveInstance resolves an instance name, ID, or ID prefix to its canonical name and ID.
// Returns (name, id, nil) if found, or an error if the instance doesn't exist.
ResolveInstance(ctx context.Context, nameOrID string) (name string, id string, err error)
}
// Manager is the interface for managing ingress resources.
type Manager interface {
// Initialize starts the ingress subsystem.
// This should be called during server startup.
Initialize(ctx context.Context) error
// Create creates a new ingress resource.
Create(ctx context.Context, req CreateIngressRequest) (*Ingress, error)
// Get retrieves an ingress by ID, name, or ID prefix.
// Lookup order: exact ID match -> exact name match -> ID prefix match.
// Returns ErrAmbiguousName if prefix matches multiple ingresses.
Get(ctx context.Context, idOrName string) (*Ingress, error)
// List returns all ingress resources.
List(ctx context.Context) ([]Ingress, error)
// Delete removes an ingress resource by ID, name, or ID prefix.
// Lookup order: exact ID match -> exact name match -> ID prefix match.
// Returns ErrAmbiguousName if prefix matches multiple ingresses.
Delete(ctx context.Context, idOrName string) error
// Shutdown gracefully stops the ingress subsystem.
Shutdown(ctx context.Context) error
// AdminURL returns the Caddy admin API URL.
// Only valid after Initialize() has been called.
AdminURL() string
}
// DefaultDNSPort is the default port for the internal DNS server.
const DefaultDNSPort = dns.DefaultPort
// Config holds configuration for the ingress manager.
type Config struct {
// ListenAddress is the address Caddy should listen on (default: 0.0.0.0).
ListenAddress string
// AdminAddress is the address for Caddy admin API (default: 127.0.0.1).
AdminAddress string
// AdminPort is the port for Caddy admin API (default: 2019).
AdminPort int
// DNSPort is the port for the internal DNS server used for dynamic upstream resolution.
// Default: 5353. Set to 0 to use a random available port.
DNSPort int
// StopOnShutdown determines whether to stop Caddy when hypeman shuts down (default: false).
// When false, Caddy continues running independently.
StopOnShutdown bool
// ACME configuration for TLS certificates
ACME ACMEConfig
// APIIngress configuration for exposing Hypeman API via Caddy
APIIngress APIIngressConfig
}
// DefaultConfig returns the default ingress configuration.
func DefaultConfig() Config {
return Config{
ListenAddress: "0.0.0.0",
AdminAddress: "127.0.0.1",
AdminPort: 2019,
DNSPort: dns.DefaultPort,
StopOnShutdown: false,
}
}
type manager struct {
paths *paths.Paths
config Config
instanceResolver InstanceResolver
daemon *CaddyDaemon
configGenerator *CaddyConfigGenerator
logForwarder *CaddyLogForwarder
dnsServer *dns.Server
mu sync.RWMutex
}
// NewManager creates a new ingress manager.
// If otelLogger is non-nil, Caddy system logs will be forwarded to OTEL.
func NewManager(p *paths.Paths, config Config, instanceResolver InstanceResolver, otelLogger *slog.Logger) Manager {
daemon := NewCaddyDaemon(p, config.AdminAddress, config.AdminPort, config.StopOnShutdown)
// Create log forwarder if OTEL logger is provided
var logForwarder *CaddyLogForwarder
if otelLogger != nil {
logForwarder = NewCaddyLogForwarder(p, otelLogger)
}
// Create DNS server for instance resolution
// The InstanceResolver interface is compatible with dns.InstanceResolver
dnsServer := dns.NewServer(instanceResolver, config.DNSPort, otelLogger)
// Create config generator with initial DNS port
// Note: If DNSPort was 0 (random), the actual port is determined in Initialize()
// after the DNS server starts. The config generator is recreated there with the actual port.
configGenerator := NewCaddyConfigGenerator(
p,
config.ListenAddress,
config.AdminAddress,
config.AdminPort,
config.ACME,
config.APIIngress,
dnsServer.Port(),
)
return &manager{
paths: p,
config: config,
instanceResolver: instanceResolver,
daemon: daemon,
configGenerator: configGenerator,
logForwarder: logForwarder,
dnsServer: dnsServer,
}
}
// Initialize starts the ingress subsystem.
func (m *manager) Initialize(ctx context.Context) error {
m.mu.Lock()
defer m.mu.Unlock()
log := logger.FromContext(ctx)
// Start DNS server for instance resolution
if err := m.dnsServer.Start(ctx); err != nil {
return fmt.Errorf("start DNS server: %w", err)
}
// Resolve the admin port before creating config generator.
// If configured as 0, try to read from existing config or pick a new port.
adminPort := m.config.AdminPort
if adminPort == 0 {
// Try to read port from existing Caddy config
if existingPort := m.daemon.readPortFromConfig(); existingPort > 0 {
adminPort = existingPort
} else {
// Pick a new available port
port, err := pickAvailablePort(m.config.AdminAddress)
if err != nil {
return fmt.Errorf("pick admin port: %w", err)
}
adminPort = port
}
// Update daemon with resolved port
m.daemon.adminPort = adminPort
}
// Create config generator with resolved ports
m.configGenerator = NewCaddyConfigGenerator(
m.paths,
m.config.ListenAddress,
m.config.AdminAddress,
adminPort,
m.config.ACME,
m.config.APIIngress,
m.dnsServer.Port(),
)
// Load existing ingresses
ingresses, err := m.loadAllIngresses()
if err != nil {
return fmt.Errorf("load ingresses: %w", err)
}
// Check if any TLS ingresses exist but TLS isn't configured
if HasTLSRules(ingresses) && !m.config.ACME.IsTLSConfigured() {
log.WarnContext(ctx, "TLS ingresses exist but ACME is not configured - TLS will not work")
}
// Filter out TLS ingresses with hostnames not in the allowed domains list
// to prevent Caddy from trying to obtain certificates for invalid domains
var validIngresses []Ingress
for _, ing := range ingresses {
var validRules []IngressRule
for _, rule := range ing.Rules {
if rule.TLS && !m.config.ACME.IsDomainAllowed(rule.Match.Hostname) {
log.WarnContext(ctx, "skipping TLS ingress rule with hostname not in allowed domains list",
"ingress", ing.Name,
"hostname", rule.Match.Hostname,
"allowed_domains", m.config.ACME.AllowedDomains,
)
continue // Skip this rule
}
validRules = append(validRules, rule)
}
if len(validRules) > 0 {
ing.Rules = validRules
validIngresses = append(validIngresses, ing)
} else {
log.WarnContext(ctx, "skipping ingress with no valid rules",
"ingress", ing.Name,
)
}
}
// Generate and write config with only valid ingresses
if err := m.regenerateConfig(ctx, validIngresses); err != nil {
return fmt.Errorf("regenerate config: %w", err)
}
// Start Caddy daemon
_, err = m.daemon.Start(ctx)
if err != nil {
return fmt.Errorf("start caddy: %w", err)
}
// Start log forwarder (if configured) to forward Caddy system logs to OTEL
if m.logForwarder != nil {
if err := m.logForwarder.Start(ctx); err != nil {
log.WarnContext(ctx, "failed to start caddy log forwarder", "error", err)
// Non-fatal - continue without log forwarding
}
}
return nil
}
// Create creates a new ingress resource.
func (m *manager) Create(ctx context.Context, req CreateIngressRequest) (*Ingress, error) {
m.mu.Lock()
defer m.mu.Unlock()
log := logger.FromContext(ctx)
// Validate request
if err := req.Validate(); err != nil {
return nil, fmt.Errorf("%w: %v", ErrInvalidRequest, err)
}
// Validate name format
if !isValidName(req.Name) {
return nil, fmt.Errorf("%w: name must be lowercase letters, digits, and dashes only; cannot start or end with a dash", ErrInvalidRequest)
}
// Check if name already exists
if _, err := findIngressByName(m.paths, req.Name); err == nil {
return nil, fmt.Errorf("%w: ingress with name %q already exists", ErrAlreadyExists, req.Name)
}
// Check if TLS is requested but ACME isn't configured, and validate allowed domains
for _, rule := range req.Rules {
if rule.TLS {
if !m.config.ACME.IsTLSConfigured() {
return nil, fmt.Errorf("%w: TLS requested but ACME is not configured (set ACME_EMAIL and ACME_DNS_PROVIDER)", ErrInvalidRequest)
}
// Check if domain is in the allowed list
// For pattern hostnames, check the wildcard pattern (e.g., "*.example.com")
domainToCheck := rule.Match.Hostname
if rule.Match.IsPattern() {
pattern, err := rule.Match.ParsePattern()
if err != nil {
return nil, fmt.Errorf("invalid hostname pattern: %w", err)
}
domainToCheck = pattern.Wildcard
}
if !m.config.ACME.IsDomainAllowed(domainToCheck) {
return nil, fmt.Errorf("%w: %q is not in TLS_ALLOWED_DOMAINS (allowed: %s)", ErrDomainNotAllowed, domainToCheck, m.config.ACME.AllowedDomains)
}
}
}
// Validate that all target instances exist and resolve their names (only for literal hostnames)
// Pattern hostnames have dynamic target instances that can't be validated at creation time
var resolvedInstanceIDs []string // Track IDs for logging (used for hypeman.log routing)
for i, rule := range req.Rules {
if !rule.Match.IsPattern() {
// Literal hostname - validate instance exists and resolve to canonical name + ID
resolvedName, resolvedID, err := m.instanceResolver.ResolveInstance(ctx, rule.Target.Instance)
if err != nil {
return nil, fmt.Errorf("%w: instance %q not found", ErrInstanceNotFound, rule.Target.Instance)
}
// Update the rule with the resolved instance name (human-readable for config)
req.Rules[i].Target.Instance = resolvedName
// Track ID for logging (instance directories are by ID)
resolvedInstanceIDs = append(resolvedInstanceIDs, resolvedID)
}
// For pattern hostnames, instance validation happens at request time via the upstream resolver
}
// Check for hostname conflicts (hostname + port must be unique)
existingIngresses, err := m.loadAllIngresses()
if err != nil {
return nil, fmt.Errorf("load existing ingresses: %w", err)
}
for _, rule := range req.Rules {
newPort := rule.Match.GetPort()
for _, existing := range existingIngresses {
for _, existingRule := range existing.Rules {
existingPort := existingRule.Match.GetPort()
if existingRule.Match.Hostname == rule.Match.Hostname && existingPort == newPort {
return nil, fmt.Errorf("%w: hostname %q on port %d is already used by ingress %q", ErrHostnameInUse, rule.Match.Hostname, newPort, existing.Name)
}
}
}
}
// Generate ID
id := cuid2.Generate()
// Create ingress
ingress := Ingress{
ID: id,
Name: req.Name,
Rules: req.Rules,
CreatedAt: time.Now().UTC(),
}
// Generate config with the new ingress included
// Use slices.Concat to avoid modifying the existingIngresses slice
allIngresses := slices.Concat(existingIngresses, []Ingress{ingress})
configData, err := m.configGenerator.GenerateConfig(ctx, allIngresses)
if err != nil {
return nil, fmt.Errorf("generate config: %w", err)
}
// Apply config to Caddy - this validates and applies atomically
// If Caddy rejects the config, we don't persist the ingress
if m.daemon.IsRunning() {
if err := m.daemon.ReloadConfig(configData); err != nil {
return nil, fmt.Errorf("%w: %v", ErrConfigValidationFailed, err)
}
}
// Config accepted - save ingress to storage
stored := &storedIngress{
ID: ingress.ID,
Name: ingress.Name,
Rules: ingress.Rules,
CreatedAt: ingress.CreatedAt.Format(time.RFC3339),
}
if err := saveIngress(m.paths, stored); err != nil {
return nil, fmt.Errorf("save ingress: %w", err)
}
// Write config to disk (for Caddy restarts)
if err := m.configGenerator.WriteConfig(ctx, allIngresses); err != nil {
// Try to clean up the saved ingress
deleteIngressData(m.paths, id)
log.ErrorContext(ctx, "failed to write config after create", "error", err)
return nil, fmt.Errorf("write config: %w", err)
}
// Log creation with ingress_id and instance_id(s) for audit trail
// Each resolved instance gets the log in their hypeman.log (routed by instance_id)
for _, instanceID := range resolvedInstanceIDs {
log.InfoContext(ctx, "ingress created",
"ingress_id", ingress.ID,
"ingress_name", ingress.Name,
"instance_id", instanceID,
)
}
// If no literal hostnames (all patterns), still log the creation
if len(resolvedInstanceIDs) == 0 {
log.InfoContext(ctx, "ingress created",
"ingress_id", ingress.ID,
"ingress_name", ingress.Name,
)
}
return &ingress, nil
}
// Get retrieves an ingress by ID, name, or ID prefix.
// Lookup order: exact ID match -> exact name match -> ID prefix match.
// Returns ErrAmbiguousName if prefix matches multiple ingresses.
func (m *manager) Get(ctx context.Context, idOrName string) (*Ingress, error) {
m.mu.RLock()
defer m.mu.RUnlock()
return m.resolveIngress(idOrName)
}
// resolveIngress finds an ingress by ID, name, or ID prefix.
// Must be called with at least a read lock held.
func (m *manager) resolveIngress(idOrName string) (*Ingress, error) {
// 1. Try exact ID match first (most common case)
stored, err := loadIngress(m.paths, idOrName)
if err == nil {
return storedToIngress(stored), nil
}
// 2. Load all ingresses for name and prefix matching
allIngresses, err := loadAllIngresses(m.paths)
if err != nil {
return nil, err
}
// 3. Try exact name match
var nameMatches []storedIngress
for _, ing := range allIngresses {
if ing.Name == idOrName {
nameMatches = append(nameMatches, ing)
}
}
if len(nameMatches) == 1 {
return storedToIngress(&nameMatches[0]), nil
}
if len(nameMatches) > 1 {
return nil, ErrAmbiguousName
}
// 4. Try ID prefix match
var prefixMatches []storedIngress
for _, ing := range allIngresses {
if len(idOrName) > 0 && strings.HasPrefix(ing.ID, idOrName) {
prefixMatches = append(prefixMatches, ing)
}
}
if len(prefixMatches) == 1 {
return storedToIngress(&prefixMatches[0]), nil
}
if len(prefixMatches) > 1 {
return nil, ErrAmbiguousName
}
return nil, ErrNotFound
}
// List returns all ingress resources.
func (m *manager) List(ctx context.Context) ([]Ingress, error) {
m.mu.RLock()
defer m.mu.RUnlock()
return m.loadAllIngresses()
}
// Delete removes an ingress resource by ID, name, or ID prefix.
func (m *manager) Delete(ctx context.Context, idOrName string) error {
m.mu.Lock()
defer m.mu.Unlock()
log := logger.FromContext(ctx)
// Find the ingress using ID/name/prefix resolution
ingress, err := m.resolveIngress(idOrName)
if err != nil {
return err
}
id := ingress.ID
// Delete from storage
if err := deleteIngressData(m.paths, id); err != nil {
return fmt.Errorf("delete ingress data: %w", err)
}
// Regenerate config without the deleted ingress
ingresses, err := m.loadAllIngresses()
if err != nil {
return fmt.Errorf("load ingresses: %w", err)
}
// Generate and validate new config
configData, err := m.configGenerator.GenerateConfig(ctx, ingresses)
if err != nil {
return fmt.Errorf("generate config: %w", err)
}
// Apply new config
if m.daemon.IsRunning() {
if err := m.daemon.ReloadConfig(configData); err != nil {
log.ErrorContext(ctx, "failed to reload caddy config after delete", "error", err)
return ErrConfigValidationFailed
}
}
// Write config to disk
if err := m.configGenerator.WriteConfig(ctx, ingresses); err != nil {
log.ErrorContext(ctx, "failed to write config after delete", "error", err)
}
// Log deletion with instance_id(s) for audit trail
// Resolve instance names to IDs for hypeman.log routing
hasLiteralHostname := false
for _, rule := range ingress.Rules {
if !rule.Match.IsPattern() {
hasLiteralHostname = true
// Resolve instance name to ID for logging (instance may have been deleted, so ignore errors)
_, instanceID, err := m.instanceResolver.ResolveInstance(ctx, rule.Target.Instance)
if err == nil {
log.InfoContext(ctx, "ingress deleted",
"ingress_id", ingress.ID,
"ingress_name", ingress.Name,
"instance_id", instanceID,
)
} else {
// Instance doesn't exist anymore, log without instance_id
log.InfoContext(ctx, "ingress deleted",
"ingress_id", ingress.ID,
"ingress_name", ingress.Name,
"instance_name", rule.Target.Instance,
)
}
}
}
// If no literal hostnames (all patterns), still log the deletion
if !hasLiteralHostname {
log.InfoContext(ctx, "ingress deleted",
"ingress_id", ingress.ID,
"ingress_name", ingress.Name,
)
}
return nil
}
// Shutdown gracefully stops the ingress subsystem.
func (m *manager) Shutdown(ctx context.Context) error {
m.mu.Lock()
defer m.mu.Unlock()
log := logger.FromContext(ctx)
// Stop log forwarder
if m.logForwarder != nil {
m.logForwarder.Stop()
}
// Stop DNS server
if m.dnsServer != nil {
log.InfoContext(ctx, "stopping DNS server")
if err := m.dnsServer.Stop(); err != nil {
log.WarnContext(ctx, "failed to stop DNS server", "error", err)
} else {
log.InfoContext(ctx, "stopped DNS server")
}
}
// Only stop Caddy if configured to do so
if m.daemon.StopOnShutdown() {
log.InfoContext(ctx, "stopping Caddy daemon")
if err := m.daemon.Stop(ctx); err != nil {
log.ErrorContext(ctx, "failed to stop Caddy daemon", "error", err)
return err
}
log.InfoContext(ctx, "stopped Caddy daemon")
return nil
}
log.InfoContext(ctx, "leaving Caddy daemon running (CADDY_STOP_ON_SHUTDOWN=false)")
return nil
}
// AdminURL returns the Caddy admin API URL.
func (m *manager) AdminURL() string {
return m.daemon.AdminURL()
}
// loadAllIngresses loads all ingresses and converts them to the Ingress type.
func (m *manager) loadAllIngresses() ([]Ingress, error) {
storedList, err := loadAllIngresses(m.paths)
if err != nil {
return nil, err
}
ingresses := make([]Ingress, 0, len(storedList))
for _, stored := range storedList {
ingresses = append(ingresses, *storedToIngress(&stored))
}
return ingresses, nil
}
// regenerateConfig regenerates the Caddy config file from the given ingresses.
func (m *manager) regenerateConfig(ctx context.Context, ingresses []Ingress) error {
return m.configGenerator.WriteConfig(ctx, ingresses)
}
// storedToIngress converts a storedIngress to an Ingress.
func storedToIngress(stored *storedIngress) *Ingress {
createdAt, _ := time.Parse(time.RFC3339, stored.CreatedAt)
return &Ingress{
ID: stored.ID,
Name: stored.Name,
Rules: stored.Rules,
CreatedAt: createdAt,
}
}
// isValidName validates that a name matches the allowed pattern.
var namePattern = regexp.MustCompile(`^[a-z0-9]([a-z0-9-]*[a-z0-9])?$`)
func isValidName(name string) bool {
if len(name) == 0 || len(name) > 63 {
return false
}
return namePattern.MatchString(name)
}