Skip to content

Commit a076e43

Browse files
authored
Operational logs over API: hypeman.log, vmm.log (#34)
* Operational logs over API: hypeman.log, vmm.log * Fix test * Review instance log handler * Use log not found error * Don't delete taps when instance state is unknown * Review fixes 1. Exec endpoint now has logger injection: 2. Simplified InstanceLogHandler - no more file caching: - Removed `sharedState` struct with mutex and fileCache - Each write now opens, writes, closes the file - No cleanup methods needed = no leak possible - Simpler code, no shared state complexity The performance impact is negligible since instance operations (start, stop, standby, etc.) are infrequent. The tradeoff of slightly more I/O vs. guaranteed no leaks and simpler code is worth it. * add logs * Fix which id * Reivew fixes * run cleanup when no vm * Move resource id, partial id, name resolution to middleware * Fix response code * Extra careful checks 1. Fixed `ErrAmbiguousName` to return 409 Conflict (instead of 404) Changed the HTTP status code from `http.StatusNotFound` to `http.StatusConflict` for ambiguous name errors, restoring the previous ingress behavior. 2. Added nil checks with 500 error responses for all `GetResolved*` calls Added defensive nil checks to all 14 handlers that use the resolved resource from middleware. If the middleware didn't set the resource (which shouldn't happen in production but could in tests), the handler now returns a 500 error with `"resource not resolved"` message instead of panicking with a nil pointer dereference. * resource_id instead of id, fix target in ingress to use instance name $ hypeman ingress create q --hostname 'nginx-test' --port 80 --host-port 8081 Creating ingress nginx-test-wmbi... e5hbxzwc6cq0tnjchw861exg $ hypeman ingress list ID NAME HOSTNAME TARGET TLS CREATED e5hbxzwc6cq0 nginx-test-wmbi nginx-test nginx-gi7w:80 no 4 seconds ago Fix is shows "nginx-gi7w:80" instead of "g:80"
1 parent 8b92fda commit a076e43

40 files changed

+1307
-718
lines changed

cmd/api/api/api_test.go

Lines changed: 49 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import (
1111
"github.com/onkernel/hypeman/cmd/api/config"
1212
"github.com/onkernel/hypeman/lib/images"
1313
"github.com/onkernel/hypeman/lib/instances"
14+
mw "github.com/onkernel/hypeman/lib/middleware"
1415
"github.com/onkernel/hypeman/lib/network"
1516
"github.com/onkernel/hypeman/lib/oapi"
1617
"github.com/onkernel/hypeman/lib/paths"
@@ -98,6 +99,36 @@ func ctx() context.Context {
9899
return context.Background()
99100
}
100101

102+
// ctxWithInstance creates a context with a resolved instance (simulates ResolveResource middleware)
103+
func ctxWithInstance(svc *ApiService, idOrName string) context.Context {
104+
inst, err := svc.InstanceManager.GetInstance(ctx(), idOrName)
105+
if err != nil {
106+
return ctx() // Let handler deal with the error
107+
}
108+
return mw.WithResolvedInstance(ctx(), inst.Id, inst)
109+
}
110+
111+
// ctxWithVolume creates a context with a resolved volume (simulates ResolveResource middleware)
112+
func ctxWithVolume(svc *ApiService, idOrName string) context.Context {
113+
vol, err := svc.VolumeManager.GetVolume(ctx(), idOrName)
114+
if err != nil {
115+
vol, err = svc.VolumeManager.GetVolumeByName(ctx(), idOrName)
116+
}
117+
if err != nil {
118+
return ctx()
119+
}
120+
return mw.WithResolvedVolume(ctx(), vol.Id, vol)
121+
}
122+
123+
// ctxWithImage creates a context with a resolved image (simulates ResolveResource middleware)
124+
func ctxWithImage(svc *ApiService, name string) context.Context {
125+
img, err := svc.ImageManager.GetImage(ctx(), name)
126+
if err != nil {
127+
return ctx()
128+
}
129+
return mw.WithResolvedImage(ctx(), img.Name, img)
130+
}
131+
101132
// createAndWaitForImage creates an image and waits for it to be ready.
102133
// Returns the image name on success, or fails the test on error/timeout.
103134
func createAndWaitForImage(t *testing.T, svc *ApiService, imageName string, timeout time.Duration) string {
@@ -117,24 +148,26 @@ func createAndWaitForImage(t *testing.T, svc *ApiService, imageName string, time
117148
t.Log("Waiting for image to be ready...")
118149
deadline := time.Now().Add(timeout)
119150
for time.Now().Before(deadline) {
120-
imgResp, err := svc.GetImage(ctx(), oapi.GetImageRequestObject{
121-
Name: imageName,
122-
})
123-
require.NoError(t, err)
124-
125-
img, ok := imgResp.(oapi.GetImage200JSONResponse)
126-
if ok {
127-
switch img.Status {
128-
case "ready":
129-
t.Log("Image is ready")
130-
return imgCreated.Name
131-
case "failed":
132-
t.Fatalf("Image build failed: %v", img.Error)
133-
default:
134-
t.Logf("Image status: %s", img.Status)
151+
// Get image from manager (may fail during pending/pulling, that's OK)
152+
img, err := svc.ImageManager.GetImage(ctx(), imageName)
153+
if err != nil {
154+
time.Sleep(100 * time.Millisecond)
155+
continue
156+
}
157+
158+
switch img.Status {
159+
case "ready":
160+
t.Log("Image is ready")
161+
return imgCreated.Name
162+
case "failed":
163+
errMsg := ""
164+
if img.Error != nil {
165+
errMsg = *img.Error
135166
}
167+
t.Fatalf("Image build failed: %v", errMsg)
136168
}
137-
time.Sleep(1 * time.Second)
169+
// Still pending/pulling/converting, poll again
170+
time.Sleep(100 * time.Millisecond)
138171
}
139172

140173
t.Fatalf("Timeout waiting for image %s to be ready", imageName)

cmd/api/api/exec.go

Lines changed: 10 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,11 @@ import (
1010
"sync"
1111
"time"
1212

13-
"github.com/go-chi/chi/v5"
1413
"github.com/gorilla/websocket"
1514
"github.com/onkernel/hypeman/lib/exec"
1615
"github.com/onkernel/hypeman/lib/instances"
1716
"github.com/onkernel/hypeman/lib/logger"
17+
mw "github.com/onkernel/hypeman/lib/middleware"
1818
)
1919

2020
var upgrader = websocket.Upgrader{
@@ -36,22 +36,16 @@ type ExecRequest struct {
3636
}
3737

3838
// ExecHandler handles exec requests via WebSocket for bidirectional streaming
39+
// Note: Resolution is handled by ResolveResource middleware
3940
func (s *ApiService) ExecHandler(w http.ResponseWriter, r *http.Request) {
4041
ctx := r.Context()
41-
log := logger.FromContext(ctx)
4242
startTime := time.Now()
43+
log := logger.FromContext(ctx)
4344

44-
instanceID := chi.URLParam(r, "id")
45-
46-
// Get instance
47-
inst, err := s.InstanceManager.GetInstance(ctx, instanceID)
48-
if err != nil {
49-
if err == instances.ErrNotFound {
50-
http.Error(w, `{"code":"not_found","message":"instance not found"}`, http.StatusNotFound)
51-
return
52-
}
53-
log.ErrorContext(ctx, "failed to get instance", "error", err)
54-
http.Error(w, `{"code":"internal_error","message":"failed to get instance"}`, http.StatusInternalServerError)
45+
// Get instance resolved by middleware
46+
inst := mw.GetResolvedInstance[instances.Instance](ctx)
47+
if inst == nil {
48+
http.Error(w, `{"code":"internal_error","message":"resource not resolved"}`, http.StatusInternalServerError)
5549
return
5650
}
5751

@@ -105,7 +99,7 @@ func (s *ApiService) ExecHandler(w http.ResponseWriter, r *http.Request) {
10599

106100
// Audit log: exec session started
107101
log.InfoContext(ctx, "exec session started",
108-
"instance_id", instanceID,
102+
"instance_id", inst.Id,
109103
"subject", subject,
110104
"command", execReq.Command,
111105
"tty", execReq.TTY,
@@ -133,7 +127,7 @@ func (s *ApiService) ExecHandler(w http.ResponseWriter, r *http.Request) {
133127
if err != nil {
134128
log.ErrorContext(ctx, "exec failed",
135129
"error", err,
136-
"instance_id", instanceID,
130+
"instance_id", inst.Id,
137131
"subject", subject,
138132
"duration_ms", duration.Milliseconds(),
139133
)
@@ -148,7 +142,7 @@ func (s *ApiService) ExecHandler(w http.ResponseWriter, r *http.Request) {
148142

149143
// Audit log: exec session ended
150144
log.InfoContext(ctx, "exec session ended",
151-
"instance_id", instanceID,
145+
"instance_id", inst.Id,
152146
"subject", subject,
153147
"exit_code", exit.Code,
154148
"duration_ms", duration.Milliseconds(),

cmd/api/api/exec_test.go

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
"time"
99

1010
"github.com/onkernel/hypeman/lib/exec"
11+
"github.com/onkernel/hypeman/lib/instances"
1112
"github.com/onkernel/hypeman/lib/oapi"
1213
"github.com/onkernel/hypeman/lib/paths"
1314
"github.com/onkernel/hypeman/lib/system"
@@ -91,7 +92,7 @@ func TestExecInstanceNonTTY(t *testing.T) {
9192
// Capture console log on failure with exec-agent filtering
9293
t.Cleanup(func() {
9394
if t.Failed() {
94-
consolePath := paths.New(svc.Config.DataDir).InstanceConsoleLog(inst.Id)
95+
consolePath := paths.New(svc.Config.DataDir).InstanceAppLog(inst.Id)
9596
if consoleData, err := os.ReadFile(consolePath); err == nil {
9697
lines := strings.Split(string(consoleData), "\n")
9798

@@ -152,7 +153,7 @@ func TestExecInstanceNonTTY(t *testing.T) {
152153

153154
// Cleanup
154155
t.Log("Cleaning up instance...")
155-
delResp, err := svc.DeleteInstance(ctx(), oapi.DeleteInstanceRequestObject{
156+
delResp, err := svc.DeleteInstance(ctxWithInstance(svc, inst.Id), oapi.DeleteInstanceRequestObject{
156157
Id: inst.Id,
157158
})
158159
require.NoError(t, err)
@@ -211,7 +212,7 @@ func TestExecWithDebianMinimal(t *testing.T) {
211212
// Cleanup on exit
212213
t.Cleanup(func() {
213214
t.Log("Cleaning up instance...")
214-
svc.DeleteInstance(ctx(), oapi.DeleteInstanceRequestObject{Id: inst.Id})
215+
svc.DeleteInstance(ctxWithInstance(svc, inst.Id), oapi.DeleteInstanceRequestObject{Id: inst.Id})
215216
})
216217

217218
// Get actual instance to access vsock fields
@@ -280,7 +281,7 @@ func TestExecWithDebianMinimal(t *testing.T) {
280281

281282
// collectTestLogs collects logs from an instance (non-streaming)
282283
func collectTestLogs(t *testing.T, svc *ApiService, instanceID string, n int) string {
283-
logChan, err := svc.InstanceManager.StreamInstanceLogs(ctx(), instanceID, n, false)
284+
logChan, err := svc.InstanceManager.StreamInstanceLogs(ctx(), instanceID, n, false, instances.LogSourceApp)
284285
if err != nil {
285286
return ""
286287
}

cmd/api/api/images.go

Lines changed: 24 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66

77
"github.com/onkernel/hypeman/lib/images"
88
"github.com/onkernel/hypeman/lib/logger"
9+
mw "github.com/onkernel/hypeman/lib/middleware"
910
"github.com/onkernel/hypeman/lib/oapi"
1011
)
1112

@@ -60,46 +61,38 @@ func (s *ApiService) CreateImage(ctx context.Context, request oapi.CreateImageRe
6061
return oapi.CreateImage202JSONResponse(imageToOAPI(*img)), nil
6162
}
6263

64+
// GetImage gets image details by name
65+
// Note: Resolution is handled by ResolveResource middleware
6366
func (s *ApiService) GetImage(ctx context.Context, request oapi.GetImageRequestObject) (oapi.GetImageResponseObject, error) {
64-
log := logger.FromContext(ctx)
65-
66-
img, err := s.ImageManager.GetImage(ctx, request.Name)
67-
if err != nil {
68-
switch {
69-
case errors.Is(err, images.ErrInvalidName), errors.Is(err, images.ErrNotFound):
70-
return oapi.GetImage404JSONResponse{
71-
Code: "not_found",
72-
Message: "image not found",
73-
}, nil
74-
default:
75-
log.ErrorContext(ctx, "failed to get image", "error", err, "name", request.Name)
76-
return oapi.GetImage500JSONResponse{
77-
Code: "internal_error",
78-
Message: "failed to get image",
79-
}, nil
80-
}
67+
img := mw.GetResolvedImage[images.Image](ctx)
68+
if img == nil {
69+
return oapi.GetImage500JSONResponse{
70+
Code: "internal_error",
71+
Message: "resource not resolved",
72+
}, nil
8173
}
8274
return oapi.GetImage200JSONResponse(imageToOAPI(*img)), nil
8375
}
8476

77+
// DeleteImage deletes an image by name
78+
// Note: Resolution is handled by ResolveResource middleware
8579
func (s *ApiService) DeleteImage(ctx context.Context, request oapi.DeleteImageRequestObject) (oapi.DeleteImageResponseObject, error) {
80+
img := mw.GetResolvedImage[images.Image](ctx)
81+
if img == nil {
82+
return oapi.DeleteImage500JSONResponse{
83+
Code: "internal_error",
84+
Message: "resource not resolved",
85+
}, nil
86+
}
8687
log := logger.FromContext(ctx)
8788

88-
err := s.ImageManager.DeleteImage(ctx, request.Name)
89+
err := s.ImageManager.DeleteImage(ctx, img.Name)
8990
if err != nil {
90-
switch {
91-
case errors.Is(err, images.ErrInvalidName), errors.Is(err, images.ErrNotFound):
92-
return oapi.DeleteImage404JSONResponse{
93-
Code: "not_found",
94-
Message: "image not found",
95-
}, nil
96-
default:
97-
log.ErrorContext(ctx, "failed to delete image", "error", err, "name", request.Name)
98-
return oapi.DeleteImage500JSONResponse{
99-
Code: "internal_error",
100-
Message: "failed to delete image",
101-
}, nil
102-
}
91+
log.ErrorContext(ctx, "failed to delete image", "error", err)
92+
return oapi.DeleteImage500JSONResponse{
93+
Code: "internal_error",
94+
Message: "failed to delete image",
95+
}, nil
10396
}
10497
return oapi.DeleteImage204Response{}, nil
10598
}

0 commit comments

Comments
 (0)