diff --git a/cmd/api/api/api_test.go b/cmd/api/api/api_test.go index 9f010002..8aaa4064 100644 --- a/cmd/api/api/api_test.go +++ b/cmd/api/api/api_test.go @@ -11,6 +11,7 @@ import ( "github.com/onkernel/hypeman/cmd/api/config" "github.com/onkernel/hypeman/lib/images" "github.com/onkernel/hypeman/lib/instances" + mw "github.com/onkernel/hypeman/lib/middleware" "github.com/onkernel/hypeman/lib/network" "github.com/onkernel/hypeman/lib/oapi" "github.com/onkernel/hypeman/lib/paths" @@ -98,6 +99,36 @@ func ctx() context.Context { return context.Background() } +// ctxWithInstance creates a context with a resolved instance (simulates ResolveResource middleware) +func ctxWithInstance(svc *ApiService, idOrName string) context.Context { + inst, err := svc.InstanceManager.GetInstance(ctx(), idOrName) + if err != nil { + return ctx() // Let handler deal with the error + } + return mw.WithResolvedInstance(ctx(), inst.Id, inst) +} + +// ctxWithVolume creates a context with a resolved volume (simulates ResolveResource middleware) +func ctxWithVolume(svc *ApiService, idOrName string) context.Context { + vol, err := svc.VolumeManager.GetVolume(ctx(), idOrName) + if err != nil { + vol, err = svc.VolumeManager.GetVolumeByName(ctx(), idOrName) + } + if err != nil { + return ctx() + } + return mw.WithResolvedVolume(ctx(), vol.Id, vol) +} + +// ctxWithImage creates a context with a resolved image (simulates ResolveResource middleware) +func ctxWithImage(svc *ApiService, name string) context.Context { + img, err := svc.ImageManager.GetImage(ctx(), name) + if err != nil { + return ctx() + } + return mw.WithResolvedImage(ctx(), img.Name, img) +} + // createAndWaitForImage creates an image and waits for it to be ready. // Returns the image name on success, or fails the test on error/timeout. func createAndWaitForImage(t *testing.T, svc *ApiService, imageName string, timeout time.Duration) string { @@ -117,24 +148,26 @@ func createAndWaitForImage(t *testing.T, svc *ApiService, imageName string, time t.Log("Waiting for image to be ready...") deadline := time.Now().Add(timeout) for time.Now().Before(deadline) { - imgResp, err := svc.GetImage(ctx(), oapi.GetImageRequestObject{ - Name: imageName, - }) - require.NoError(t, err) - - img, ok := imgResp.(oapi.GetImage200JSONResponse) - if ok { - switch img.Status { - case "ready": - t.Log("Image is ready") - return imgCreated.Name - case "failed": - t.Fatalf("Image build failed: %v", img.Error) - default: - t.Logf("Image status: %s", img.Status) + // Get image from manager (may fail during pending/pulling, that's OK) + img, err := svc.ImageManager.GetImage(ctx(), imageName) + if err != nil { + time.Sleep(100 * time.Millisecond) + continue + } + + switch img.Status { + case "ready": + t.Log("Image is ready") + return imgCreated.Name + case "failed": + errMsg := "" + if img.Error != nil { + errMsg = *img.Error } + t.Fatalf("Image build failed: %v", errMsg) } - time.Sleep(1 * time.Second) + // Still pending/pulling/converting, poll again + time.Sleep(100 * time.Millisecond) } t.Fatalf("Timeout waiting for image %s to be ready", imageName) diff --git a/cmd/api/api/exec.go b/cmd/api/api/exec.go index 39e823c5..9d529fc7 100644 --- a/cmd/api/api/exec.go +++ b/cmd/api/api/exec.go @@ -10,11 +10,11 @@ import ( "sync" "time" - "github.com/go-chi/chi/v5" "github.com/gorilla/websocket" "github.com/onkernel/hypeman/lib/exec" "github.com/onkernel/hypeman/lib/instances" "github.com/onkernel/hypeman/lib/logger" + mw "github.com/onkernel/hypeman/lib/middleware" ) var upgrader = websocket.Upgrader{ @@ -36,22 +36,16 @@ type ExecRequest struct { } // ExecHandler handles exec requests via WebSocket for bidirectional streaming +// Note: Resolution is handled by ResolveResource middleware func (s *ApiService) ExecHandler(w http.ResponseWriter, r *http.Request) { ctx := r.Context() - log := logger.FromContext(ctx) startTime := time.Now() + log := logger.FromContext(ctx) - instanceID := chi.URLParam(r, "id") - - // Get instance - inst, err := s.InstanceManager.GetInstance(ctx, instanceID) - if err != nil { - if err == instances.ErrNotFound { - http.Error(w, `{"code":"not_found","message":"instance not found"}`, http.StatusNotFound) - return - } - log.ErrorContext(ctx, "failed to get instance", "error", err) - http.Error(w, `{"code":"internal_error","message":"failed to get instance"}`, http.StatusInternalServerError) + // Get instance resolved by middleware + inst := mw.GetResolvedInstance[instances.Instance](ctx) + if inst == nil { + http.Error(w, `{"code":"internal_error","message":"resource not resolved"}`, http.StatusInternalServerError) return } @@ -105,7 +99,7 @@ func (s *ApiService) ExecHandler(w http.ResponseWriter, r *http.Request) { // Audit log: exec session started log.InfoContext(ctx, "exec session started", - "instance_id", instanceID, + "instance_id", inst.Id, "subject", subject, "command", execReq.Command, "tty", execReq.TTY, @@ -133,7 +127,7 @@ func (s *ApiService) ExecHandler(w http.ResponseWriter, r *http.Request) { if err != nil { log.ErrorContext(ctx, "exec failed", "error", err, - "instance_id", instanceID, + "instance_id", inst.Id, "subject", subject, "duration_ms", duration.Milliseconds(), ) @@ -148,7 +142,7 @@ func (s *ApiService) ExecHandler(w http.ResponseWriter, r *http.Request) { // Audit log: exec session ended log.InfoContext(ctx, "exec session ended", - "instance_id", instanceID, + "instance_id", inst.Id, "subject", subject, "exit_code", exit.Code, "duration_ms", duration.Milliseconds(), diff --git a/cmd/api/api/exec_test.go b/cmd/api/api/exec_test.go index 8e5992f2..219d2ea7 100644 --- a/cmd/api/api/exec_test.go +++ b/cmd/api/api/exec_test.go @@ -8,6 +8,7 @@ import ( "time" "github.com/onkernel/hypeman/lib/exec" + "github.com/onkernel/hypeman/lib/instances" "github.com/onkernel/hypeman/lib/oapi" "github.com/onkernel/hypeman/lib/paths" "github.com/onkernel/hypeman/lib/system" @@ -91,7 +92,7 @@ func TestExecInstanceNonTTY(t *testing.T) { // Capture console log on failure with exec-agent filtering t.Cleanup(func() { if t.Failed() { - consolePath := paths.New(svc.Config.DataDir).InstanceConsoleLog(inst.Id) + consolePath := paths.New(svc.Config.DataDir).InstanceAppLog(inst.Id) if consoleData, err := os.ReadFile(consolePath); err == nil { lines := strings.Split(string(consoleData), "\n") @@ -152,7 +153,7 @@ func TestExecInstanceNonTTY(t *testing.T) { // Cleanup t.Log("Cleaning up instance...") - delResp, err := svc.DeleteInstance(ctx(), oapi.DeleteInstanceRequestObject{ + delResp, err := svc.DeleteInstance(ctxWithInstance(svc, inst.Id), oapi.DeleteInstanceRequestObject{ Id: inst.Id, }) require.NoError(t, err) @@ -211,7 +212,7 @@ func TestExecWithDebianMinimal(t *testing.T) { // Cleanup on exit t.Cleanup(func() { t.Log("Cleaning up instance...") - svc.DeleteInstance(ctx(), oapi.DeleteInstanceRequestObject{Id: inst.Id}) + svc.DeleteInstance(ctxWithInstance(svc, inst.Id), oapi.DeleteInstanceRequestObject{Id: inst.Id}) }) // Get actual instance to access vsock fields @@ -280,7 +281,7 @@ func TestExecWithDebianMinimal(t *testing.T) { // collectTestLogs collects logs from an instance (non-streaming) func collectTestLogs(t *testing.T, svc *ApiService, instanceID string, n int) string { - logChan, err := svc.InstanceManager.StreamInstanceLogs(ctx(), instanceID, n, false) + logChan, err := svc.InstanceManager.StreamInstanceLogs(ctx(), instanceID, n, false, instances.LogSourceApp) if err != nil { return "" } diff --git a/cmd/api/api/images.go b/cmd/api/api/images.go index 68f4cc96..a97fa59b 100644 --- a/cmd/api/api/images.go +++ b/cmd/api/api/images.go @@ -6,6 +6,7 @@ import ( "github.com/onkernel/hypeman/lib/images" "github.com/onkernel/hypeman/lib/logger" + mw "github.com/onkernel/hypeman/lib/middleware" "github.com/onkernel/hypeman/lib/oapi" ) @@ -60,46 +61,38 @@ func (s *ApiService) CreateImage(ctx context.Context, request oapi.CreateImageRe return oapi.CreateImage202JSONResponse(imageToOAPI(*img)), nil } +// GetImage gets image details by name +// Note: Resolution is handled by ResolveResource middleware func (s *ApiService) GetImage(ctx context.Context, request oapi.GetImageRequestObject) (oapi.GetImageResponseObject, error) { - log := logger.FromContext(ctx) - - img, err := s.ImageManager.GetImage(ctx, request.Name) - if err != nil { - switch { - case errors.Is(err, images.ErrInvalidName), errors.Is(err, images.ErrNotFound): - return oapi.GetImage404JSONResponse{ - Code: "not_found", - Message: "image not found", - }, nil - default: - log.ErrorContext(ctx, "failed to get image", "error", err, "name", request.Name) - return oapi.GetImage500JSONResponse{ - Code: "internal_error", - Message: "failed to get image", - }, nil - } + img := mw.GetResolvedImage[images.Image](ctx) + if img == nil { + return oapi.GetImage500JSONResponse{ + Code: "internal_error", + Message: "resource not resolved", + }, nil } return oapi.GetImage200JSONResponse(imageToOAPI(*img)), nil } +// DeleteImage deletes an image by name +// Note: Resolution is handled by ResolveResource middleware func (s *ApiService) DeleteImage(ctx context.Context, request oapi.DeleteImageRequestObject) (oapi.DeleteImageResponseObject, error) { + img := mw.GetResolvedImage[images.Image](ctx) + if img == nil { + return oapi.DeleteImage500JSONResponse{ + Code: "internal_error", + Message: "resource not resolved", + }, nil + } log := logger.FromContext(ctx) - err := s.ImageManager.DeleteImage(ctx, request.Name) + err := s.ImageManager.DeleteImage(ctx, img.Name) if err != nil { - switch { - case errors.Is(err, images.ErrInvalidName), errors.Is(err, images.ErrNotFound): - return oapi.DeleteImage404JSONResponse{ - Code: "not_found", - Message: "image not found", - }, nil - default: - log.ErrorContext(ctx, "failed to delete image", "error", err, "name", request.Name) - return oapi.DeleteImage500JSONResponse{ - Code: "internal_error", - Message: "failed to delete image", - }, nil - } + log.ErrorContext(ctx, "failed to delete image", "error", err) + return oapi.DeleteImage500JSONResponse{ + Code: "internal_error", + Message: "failed to delete image", + }, nil } return oapi.DeleteImage204Response{}, nil } diff --git a/cmd/api/api/images_test.go b/cmd/api/api/images_test.go index 168578d3..9d5f0590 100644 --- a/cmd/api/api/images_test.go +++ b/cmd/api/api/images_test.go @@ -26,15 +26,10 @@ func TestListImages_Empty(t *testing.T) { func TestGetImage_NotFound(t *testing.T) { svc := newTestService(t) - resp, err := svc.GetImage(ctx(), oapi.GetImageRequestObject{ - Name: "non-existent:latest", - }) - require.NoError(t, err) - - notFound, ok := resp.(oapi.GetImage404JSONResponse) - require.True(t, ok, "expected 404 response") - assert.Equal(t, "not_found", notFound.Code) - assert.Equal(t, "image not found", notFound.Message) + // With middleware, not-found would be handled before reaching handler. + // For this test, we call the manager directly to verify the error. + _, err := svc.ImageManager.GetImage(ctx(), "non-existent:latest") + require.Error(t, err) } func TestCreateImage_Async(t *testing.T) { @@ -69,7 +64,7 @@ func TestCreateImage_Async(t *testing.T) { img := oapi.Image(acceptedResp) require.Equal(t, "docker.io/library/alpine:latest", img.Name) require.NotEmpty(t, img.Digest, "digest should be populated immediately") - t.Logf("Image created: name=%s, digest=%s, initial_status=%s, queue_position=%v", + t.Logf("Image created: name=%s, digest=%s, initial_status=%s, queue_position=%v", img.Name, img.Digest, img.Status, img.QueuePosition) // Construct digest reference for polling: repository@digest @@ -81,9 +76,9 @@ func TestCreateImage_Async(t *testing.T) { t.Log("Polling for completion...") lastStatus := img.Status lastQueuePos := getQueuePos(img.QueuePosition) - + for i := 0; i < 3000; i++ { - getResp, err := svc.GetImage(ctx, oapi.GetImageRequestObject{Name: digestRef}) + getResp, err := svc.GetImage(ctxWithImage(svc, digestRef), oapi.GetImageRequestObject{Name: digestRef}) require.NoError(t, err) imgResp, ok := getResp.(oapi.GetImage200JSONResponse) @@ -93,16 +88,16 @@ func TestCreateImage_Async(t *testing.T) { currentImg := oapi.Image(imgResp) currentQueuePos := getQueuePos(currentImg.QueuePosition) - + // Log when status or queue position changes if currentImg.Status != lastStatus || currentQueuePos != lastQueuePos { t.Logf("Update: status=%s, queue_position=%v", currentImg.Status, formatQueuePos(currentImg.QueuePosition)) - + // Queue position should only decrease (never increase) if lastQueuePos > 0 && currentQueuePos > lastQueuePos { t.Errorf("Queue position increased: %d -> %d", lastQueuePos, currentQueuePos) } - + lastStatus = currentImg.Status lastQueuePos = currentQueuePos } @@ -194,7 +189,7 @@ func TestCreateImage_Idempotent(t *testing.T) { Body: &oapi.CreateImageRequest{Name: imageName}, }) require.NoError(t, err) - + accepted1, ok := resp1.(oapi.CreateImage202JSONResponse) require.True(t, ok, "expected 202 response") img1 := oapi.Image(accepted1) @@ -211,17 +206,17 @@ func TestCreateImage_Idempotent(t *testing.T) { Body: &oapi.CreateImageRequest{Name: imageName}, }) require.NoError(t, err) - + accepted2, ok := resp2.(oapi.CreateImage202JSONResponse) require.True(t, ok, "expected 202 response") img2 := oapi.Image(accepted2) require.Equal(t, imageName, img2.Name) require.Equal(t, img1.Digest, img2.Digest, "should have same digest") - + // Log actual status to see what's happening - t.Logf("Second call: digest=%s, status=%s, queue_position=%v, error=%v", + t.Logf("Second call: digest=%s, status=%s, queue_position=%v, error=%v", img2.Digest, img2.Status, formatQueuePos(img2.QueuePosition), img2.Error) - + // If it failed, we need to see why if img2.Status == oapi.ImageStatus(images.StatusFailed) { if img2.Error != nil { @@ -229,7 +224,7 @@ func TestCreateImage_Idempotent(t *testing.T) { } t.Fatal("Build failed - this is the root cause of test failures") } - + require.Equal(t, oapi.ImageStatus(images.StatusPending), img2.Status) require.NotNil(t, img2.QueuePosition, "should have queue position") require.Equal(t, 1, *img2.QueuePosition, "should still be at position 1") @@ -243,14 +238,14 @@ func TestCreateImage_Idempotent(t *testing.T) { // Wait for build to complete - poll by digest (tag symlink doesn't exist until status=ready) t.Log("Waiting for build to complete...") for i := 0; i < 3000; i++ { - getResp, err := svc.GetImage(ctx, oapi.GetImageRequestObject{Name: digestRef}) + getResp, err := svc.GetImage(ctxWithImage(svc, digestRef), oapi.GetImageRequestObject{Name: digestRef}) require.NoError(t, err) imgResp, ok := getResp.(oapi.GetImage200JSONResponse) require.True(t, ok, "expected 200 response") currentImg := oapi.Image(imgResp) - + if currentImg.Status == oapi.ImageStatus(images.StatusReady) { t.Log("Build complete!") break @@ -273,7 +268,7 @@ func TestCreateImage_Idempotent(t *testing.T) { Body: &oapi.CreateImageRequest{Name: imageName}, }) require.NoError(t, err) - + accepted3, ok := resp3.(oapi.CreateImage202JSONResponse) require.True(t, ok, "expected 202 response") img3 := oapi.Image(accepted3) @@ -282,9 +277,9 @@ func TestCreateImage_Idempotent(t *testing.T) { require.Nil(t, img3.QueuePosition, "ready image should have no queue position") require.NotNil(t, img3.SizeBytes) require.Greater(t, *img3.SizeBytes, int64(0)) - t.Logf("Third call: status=%s, queue_position=%v, size=%d", + t.Logf("Third call: status=%s, queue_position=%v, size=%d", img3.Status, formatQueuePos(img3.QueuePosition), *img3.SizeBytes) - + t.Log("Idempotency test passed!") } @@ -301,5 +296,3 @@ func formatQueuePos(pos *int) string { } return fmt.Sprintf("%d", *pos) } - - diff --git a/cmd/api/api/ingress.go b/cmd/api/api/ingress.go index bdac447d..7b9ba881 100644 --- a/cmd/api/api/ingress.go +++ b/cmd/api/api/ingress.go @@ -6,6 +6,7 @@ import ( "github.com/onkernel/hypeman/lib/ingress" "github.com/onkernel/hypeman/lib/logger" + mw "github.com/onkernel/hypeman/lib/middleware" "github.com/onkernel/hypeman/lib/oapi" ) @@ -119,58 +120,37 @@ func (s *ApiService) CreateIngress(ctx context.Context, request oapi.CreateIngre } // GetIngress gets ingress details by ID, name, or ID prefix +// Note: Resolution is handled by ResolveResource middleware func (s *ApiService) GetIngress(ctx context.Context, request oapi.GetIngressRequestObject) (oapi.GetIngressResponseObject, error) { - log := logger.FromContext(ctx) - - ing, err := s.IngressManager.Get(ctx, request.Id) - if err != nil { - switch { - case errors.Is(err, ingress.ErrNotFound): - return oapi.GetIngress404JSONResponse{ - Code: "not_found", - Message: "ingress not found", - }, nil - case errors.Is(err, ingress.ErrAmbiguousName): - return oapi.GetIngress409JSONResponse{ - Code: "ambiguous_identifier", - Message: "identifier matches multiple ingresses, please use a more specific ID or name", - }, nil - default: - log.ErrorContext(ctx, "failed to get ingress", "error", err, "id", request.Id) - return oapi.GetIngress500JSONResponse{ - Code: "internal_error", - Message: "failed to get ingress", - }, nil - } + ing := mw.GetResolvedIngress[ingress.Ingress](ctx) + if ing == nil { + return oapi.GetIngress500JSONResponse{ + Code: "internal_error", + Message: "resource not resolved", + }, nil } - return oapi.GetIngress200JSONResponse(ingressToOAPI(*ing)), nil } // DeleteIngress deletes an ingress by ID, name, or ID prefix +// Note: Resolution is handled by ResolveResource middleware func (s *ApiService) DeleteIngress(ctx context.Context, request oapi.DeleteIngressRequestObject) (oapi.DeleteIngressResponseObject, error) { + ing := mw.GetResolvedIngress[ingress.Ingress](ctx) + if ing == nil { + return oapi.DeleteIngress500JSONResponse{ + Code: "internal_error", + Message: "resource not resolved", + }, nil + } log := logger.FromContext(ctx) - err := s.IngressManager.Delete(ctx, request.Id) + err := s.IngressManager.Delete(ctx, ing.ID) if err != nil { - switch { - case errors.Is(err, ingress.ErrNotFound): - return oapi.DeleteIngress404JSONResponse{ - Code: "not_found", - Message: "ingress not found", - }, nil - case errors.Is(err, ingress.ErrAmbiguousName): - return oapi.DeleteIngress409JSONResponse{ - Code: "ambiguous_identifier", - Message: "identifier matches multiple ingresses, please use a more specific ID or name", - }, nil - default: - log.ErrorContext(ctx, "failed to delete ingress", "error", err, "id", request.Id) - return oapi.DeleteIngress500JSONResponse{ - Code: "internal_error", - Message: "failed to delete ingress", - }, nil - } + log.ErrorContext(ctx, "failed to delete ingress", "error", err) + return oapi.DeleteIngress500JSONResponse{ + Code: "internal_error", + Message: "failed to delete ingress", + }, nil } return oapi.DeleteIngress204Response{}, nil diff --git a/cmd/api/api/instances.go b/cmd/api/api/instances.go index 8a599db3..8adb8c8d 100644 --- a/cmd/api/api/instances.go +++ b/cmd/api/api/instances.go @@ -10,6 +10,7 @@ import ( "github.com/c2h5oh/datasize" "github.com/onkernel/hypeman/lib/instances" "github.com/onkernel/hypeman/lib/logger" + mw "github.com/onkernel/hypeman/lib/middleware" "github.com/onkernel/hypeman/lib/network" "github.com/onkernel/hypeman/lib/oapi" "github.com/samber/lo" @@ -172,64 +173,34 @@ func (s *ApiService) CreateInstance(ctx context.Context, request oapi.CreateInst // GetInstance gets instance details // The id parameter can be an instance ID, name, or ID prefix +// Note: Resolution is handled by ResolveResource middleware func (s *ApiService) GetInstance(ctx context.Context, request oapi.GetInstanceRequestObject) (oapi.GetInstanceResponseObject, error) { - log := logger.FromContext(ctx) - - inst, err := s.InstanceManager.GetInstance(ctx, request.Id) - if err != nil { - switch { - case errors.Is(err, instances.ErrNotFound): - return oapi.GetInstance404JSONResponse{ - Code: "not_found", - Message: "instance not found", - }, nil - case errors.Is(err, instances.ErrAmbiguousName): - return oapi.GetInstance404JSONResponse{ - Code: "ambiguous", - Message: "multiple instances match, use full instance ID", - }, nil - default: - log.ErrorContext(ctx, "failed to get instance", "error", err, "id", request.Id) - return oapi.GetInstance500JSONResponse{ - Code: "internal_error", - Message: "failed to get instance", - }, nil - } + inst := mw.GetResolvedInstance[instances.Instance](ctx) + if inst == nil { + return oapi.GetInstance500JSONResponse{ + Code: "internal_error", + Message: "resource not resolved", + }, nil } return oapi.GetInstance200JSONResponse(instanceToOAPI(*inst)), nil } // DeleteInstance stops and deletes an instance // The id parameter can be an instance ID, name, or ID prefix +// Note: Resolution is handled by ResolveResource middleware func (s *ApiService) DeleteInstance(ctx context.Context, request oapi.DeleteInstanceRequestObject) (oapi.DeleteInstanceResponseObject, error) { - log := logger.FromContext(ctx) - - // Resolve to get the actual instance ID - inst, err := s.InstanceManager.GetInstance(ctx, request.Id) - if err != nil { - switch { - case errors.Is(err, instances.ErrNotFound): - return oapi.DeleteInstance404JSONResponse{ - Code: "not_found", - Message: "instance not found", - }, nil - case errors.Is(err, instances.ErrAmbiguousName): - return oapi.DeleteInstance404JSONResponse{ - Code: "ambiguous", - Message: "multiple instances match, use full instance ID", - }, nil - default: - log.ErrorContext(ctx, "failed to get instance", "error", err, "id", request.Id) - return oapi.DeleteInstance500JSONResponse{ - Code: "internal_error", - Message: "failed to get instance", - }, nil - } + inst := mw.GetResolvedInstance[instances.Instance](ctx) + if inst == nil { + return oapi.DeleteInstance500JSONResponse{ + Code: "internal_error", + Message: "resource not resolved", + }, nil } + log := logger.FromContext(ctx) - err = s.InstanceManager.DeleteInstance(ctx, inst.Id) + err := s.InstanceManager.DeleteInstance(ctx, inst.Id) if err != nil { - log.ErrorContext(ctx, "failed to delete instance", "error", err, "id", inst.Id) + log.ErrorContext(ctx, "failed to delete instance", "error", err) return oapi.DeleteInstance500JSONResponse{ Code: "internal_error", Message: "failed to delete instance", @@ -240,33 +211,18 @@ func (s *ApiService) DeleteInstance(ctx context.Context, request oapi.DeleteInst // StandbyInstance puts an instance in standby (pause, snapshot, delete VMM) // The id parameter can be an instance ID, name, or ID prefix +// Note: Resolution is handled by ResolveResource middleware func (s *ApiService) StandbyInstance(ctx context.Context, request oapi.StandbyInstanceRequestObject) (oapi.StandbyInstanceResponseObject, error) { - log := logger.FromContext(ctx) - - // Resolve to get the actual instance ID - resolved, err := s.InstanceManager.GetInstance(ctx, request.Id) - if err != nil { - switch { - case errors.Is(err, instances.ErrNotFound): - return oapi.StandbyInstance404JSONResponse{ - Code: "not_found", - Message: "instance not found", - }, nil - case errors.Is(err, instances.ErrAmbiguousName): - return oapi.StandbyInstance404JSONResponse{ - Code: "ambiguous", - Message: "multiple instances match, use full instance ID", - }, nil - default: - log.ErrorContext(ctx, "failed to get instance", "error", err, "id", request.Id) - return oapi.StandbyInstance500JSONResponse{ - Code: "internal_error", - Message: "failed to get instance", - }, nil - } + inst := mw.GetResolvedInstance[instances.Instance](ctx) + if inst == nil { + return oapi.StandbyInstance500JSONResponse{ + Code: "internal_error", + Message: "resource not resolved", + }, nil } + log := logger.FromContext(ctx) - inst, err := s.InstanceManager.StandbyInstance(ctx, resolved.Id) + result, err := s.InstanceManager.StandbyInstance(ctx, inst.Id) if err != nil { switch { case errors.Is(err, instances.ErrInvalidState): @@ -275,45 +231,30 @@ func (s *ApiService) StandbyInstance(ctx context.Context, request oapi.StandbyIn Message: err.Error(), }, nil default: - log.ErrorContext(ctx, "failed to standby instance", "error", err, "id", resolved.Id) + log.ErrorContext(ctx, "failed to standby instance", "error", err) return oapi.StandbyInstance500JSONResponse{ Code: "internal_error", Message: "failed to standby instance", }, nil } } - return oapi.StandbyInstance200JSONResponse(instanceToOAPI(*inst)), nil + return oapi.StandbyInstance200JSONResponse(instanceToOAPI(*result)), nil } // RestoreInstance restores an instance from standby // The id parameter can be an instance ID, name, or ID prefix +// Note: Resolution is handled by ResolveResource middleware func (s *ApiService) RestoreInstance(ctx context.Context, request oapi.RestoreInstanceRequestObject) (oapi.RestoreInstanceResponseObject, error) { - log := logger.FromContext(ctx) - - // Resolve to get the actual instance ID - resolved, err := s.InstanceManager.GetInstance(ctx, request.Id) - if err != nil { - switch { - case errors.Is(err, instances.ErrNotFound): - return oapi.RestoreInstance404JSONResponse{ - Code: "not_found", - Message: "instance not found", - }, nil - case errors.Is(err, instances.ErrAmbiguousName): - return oapi.RestoreInstance404JSONResponse{ - Code: "ambiguous", - Message: "multiple instances match, use full instance ID", - }, nil - default: - log.ErrorContext(ctx, "failed to get instance", "error", err, "id", request.Id) - return oapi.RestoreInstance500JSONResponse{ - Code: "internal_error", - Message: "failed to get instance", - }, nil - } + inst := mw.GetResolvedInstance[instances.Instance](ctx) + if inst == nil { + return oapi.RestoreInstance500JSONResponse{ + Code: "internal_error", + Message: "resource not resolved", + }, nil } + log := logger.FromContext(ctx) - inst, err := s.InstanceManager.RestoreInstance(ctx, resolved.Id) + result, err := s.InstanceManager.RestoreInstance(ctx, inst.Id) if err != nil { switch { case errors.Is(err, instances.ErrInvalidState): @@ -322,45 +263,30 @@ func (s *ApiService) RestoreInstance(ctx context.Context, request oapi.RestoreIn Message: err.Error(), }, nil default: - log.ErrorContext(ctx, "failed to restore instance", "error", err, "id", resolved.Id) + log.ErrorContext(ctx, "failed to restore instance", "error", err) return oapi.RestoreInstance500JSONResponse{ Code: "internal_error", Message: "failed to restore instance", }, nil } } - return oapi.RestoreInstance200JSONResponse(instanceToOAPI(*inst)), nil + return oapi.RestoreInstance200JSONResponse(instanceToOAPI(*result)), nil } // StopInstance gracefully stops a running instance // The id parameter can be an instance ID, name, or ID prefix +// Note: Resolution is handled by ResolveResource middleware func (s *ApiService) StopInstance(ctx context.Context, request oapi.StopInstanceRequestObject) (oapi.StopInstanceResponseObject, error) { - log := logger.FromContext(ctx) - - // Resolve to get the actual instance ID - resolved, err := s.InstanceManager.GetInstance(ctx, request.Id) - if err != nil { - switch { - case errors.Is(err, instances.ErrNotFound): - return oapi.StopInstance404JSONResponse{ - Code: "not_found", - Message: "instance not found", - }, nil - case errors.Is(err, instances.ErrAmbiguousName): - return oapi.StopInstance404JSONResponse{ - Code: "ambiguous", - Message: "multiple instances match, use full instance ID", - }, nil - default: - log.ErrorContext(ctx, "failed to get instance", "error", err, "id", request.Id) - return oapi.StopInstance500JSONResponse{ - Code: "internal_error", - Message: "failed to get instance", - }, nil - } + inst := mw.GetResolvedInstance[instances.Instance](ctx) + if inst == nil { + return oapi.StopInstance500JSONResponse{ + Code: "internal_error", + Message: "resource not resolved", + }, nil } + log := logger.FromContext(ctx) - inst, err := s.InstanceManager.StopInstance(ctx, resolved.Id) + result, err := s.InstanceManager.StopInstance(ctx, inst.Id) if err != nil { switch { case errors.Is(err, instances.ErrInvalidState): @@ -369,45 +295,30 @@ func (s *ApiService) StopInstance(ctx context.Context, request oapi.StopInstance Message: err.Error(), }, nil default: - log.ErrorContext(ctx, "failed to stop instance", "error", err, "id", resolved.Id) + log.ErrorContext(ctx, "failed to stop instance", "error", err) return oapi.StopInstance500JSONResponse{ Code: "internal_error", Message: "failed to stop instance", }, nil } } - return oapi.StopInstance200JSONResponse(instanceToOAPI(*inst)), nil + return oapi.StopInstance200JSONResponse(instanceToOAPI(*result)), nil } // StartInstance starts a stopped instance // The id parameter can be an instance ID, name, or ID prefix +// Note: Resolution is handled by ResolveResource middleware func (s *ApiService) StartInstance(ctx context.Context, request oapi.StartInstanceRequestObject) (oapi.StartInstanceResponseObject, error) { - log := logger.FromContext(ctx) - - // Resolve to get the actual instance ID - resolved, err := s.InstanceManager.GetInstance(ctx, request.Id) - if err != nil { - switch { - case errors.Is(err, instances.ErrNotFound): - return oapi.StartInstance404JSONResponse{ - Code: "not_found", - Message: "instance not found", - }, nil - case errors.Is(err, instances.ErrAmbiguousName): - return oapi.StartInstance404JSONResponse{ - Code: "ambiguous", - Message: "multiple instances match, use full instance ID", - }, nil - default: - log.ErrorContext(ctx, "failed to get instance", "error", err, "id", request.Id) - return oapi.StartInstance500JSONResponse{ - Code: "internal_error", - Message: "failed to get instance", - }, nil - } + inst := mw.GetResolvedInstance[instances.Instance](ctx) + if inst == nil { + return oapi.StartInstance500JSONResponse{ + Code: "internal_error", + Message: "resource not resolved", + }, nil } + log := logger.FromContext(ctx) - inst, err := s.InstanceManager.StartInstance(ctx, resolved.Id) + result, err := s.InstanceManager.StartInstance(ctx, inst.Id) if err != nil { switch { case errors.Is(err, instances.ErrInvalidState): @@ -416,14 +327,14 @@ func (s *ApiService) StartInstance(ctx context.Context, request oapi.StartInstan Message: err.Error(), }, nil default: - log.ErrorContext(ctx, "failed to start instance", "error", err, "id", resolved.Id) + log.ErrorContext(ctx, "failed to start instance", "error", err) return oapi.StartInstance500JSONResponse{ Code: "internal_error", Message: "failed to start instance", }, nil } } - return oapi.StartInstance200JSONResponse(instanceToOAPI(*inst)), nil + return oapi.StartInstance200JSONResponse(instanceToOAPI(*result)), nil } // logsStreamResponse implements oapi.GetInstanceLogsResponseObject with proper SSE flushing @@ -455,7 +366,16 @@ func (r logsStreamResponse) VisitGetInstanceLogsResponse(w http.ResponseWriter) // With follow=false (default), streams last N lines then closes // With follow=true, streams last N lines then continues following new output // The id parameter can be an instance ID, name, or ID prefix +// Note: Resolution is handled by ResolveResource middleware func (s *ApiService) GetInstanceLogs(ctx context.Context, request oapi.GetInstanceLogsRequestObject) (oapi.GetInstanceLogsResponseObject, error) { + inst := mw.GetResolvedInstance[instances.Instance](ctx) + if inst == nil { + return oapi.GetInstanceLogs500JSONResponse{ + Code: "internal_error", + Message: "resource not resolved", + }, nil + } + tail := 100 if request.Params.Tail != nil { tail = *request.Params.Tail @@ -466,29 +386,20 @@ func (s *ApiService) GetInstanceLogs(ctx context.Context, request oapi.GetInstan follow = *request.Params.Follow } - // Resolve to get the actual instance ID - resolved, err := s.InstanceManager.GetInstance(ctx, request.Id) - if err != nil { - switch { - case errors.Is(err, instances.ErrNotFound): - return oapi.GetInstanceLogs404JSONResponse{ - Code: "not_found", - Message: "instance not found", - }, nil - case errors.Is(err, instances.ErrAmbiguousName): - return oapi.GetInstanceLogs404JSONResponse{ - Code: "ambiguous", - Message: "multiple instances match, use full instance ID", - }, nil - default: - return oapi.GetInstanceLogs500JSONResponse{ - Code: "internal_error", - Message: "failed to get instance", - }, nil + // Map source parameter to LogSource type (default to app) + source := instances.LogSourceApp + if request.Params.Source != nil { + switch *request.Params.Source { + case oapi.App: + source = instances.LogSourceApp + case oapi.Vmm: + source = instances.LogSourceVMM + case oapi.Hypeman: + source = instances.LogSourceHypeman } } - logChan, err := s.InstanceManager.StreamInstanceLogs(ctx, resolved.Id, tail, follow) + logChan, err := s.InstanceManager.StreamInstanceLogs(ctx, inst.Id, tail, follow, source) if err != nil { switch { case errors.Is(err, instances.ErrTailNotFound): @@ -496,6 +407,11 @@ func (s *ApiService) GetInstanceLogs(ctx context.Context, request oapi.GetInstan Code: "dependency_missing", Message: "tail command not found on server - required for log streaming", }, nil + case errors.Is(err, instances.ErrLogNotFound): + return oapi.GetInstanceLogs404JSONResponse{ + Code: "log_not_found", + Message: "requested log file does not exist yet", + }, nil default: return oapi.GetInstanceLogs500JSONResponse{ Code: "internal_error", diff --git a/cmd/api/api/instances_test.go b/cmd/api/api/instances_test.go index ce1801e6..82f38861 100644 --- a/cmd/api/api/instances_test.go +++ b/cmd/api/api/instances_test.go @@ -26,14 +26,10 @@ func TestListInstances_Empty(t *testing.T) { func TestGetInstance_NotFound(t *testing.T) { svc := newTestService(t) - resp, err := svc.GetInstance(ctx(), oapi.GetInstanceRequestObject{ - Id: "non-existent", - }) - require.NoError(t, err) - - notFound, ok := resp.(oapi.GetInstance404JSONResponse) - require.True(t, ok, "expected 404 response") - assert.Equal(t, "not_found", notFound.Code) + // With middleware, not-found would be handled before reaching handler. + // For this test, we call the manager directly to verify the error type. + _, err := svc.InstanceManager.GetInstance(ctx(), "non-existent") + require.Error(t, err) } func TestCreateInstance_ParsesHumanReadableSizes(t *testing.T) { @@ -174,7 +170,7 @@ func TestInstanceLifecycle_StopStart(t *testing.T) { // 2. Stop the instance t.Log("Stopping instance...") - stopResp, err := svc.StopInstance(ctx(), oapi.StopInstanceRequestObject{Id: instanceID}) + stopResp, err := svc.StopInstance(ctxWithInstance(svc, instanceID), oapi.StopInstanceRequestObject{Id: instanceID}) require.NoError(t, err) stopped, ok := stopResp.(oapi.StopInstance200JSONResponse) @@ -184,7 +180,7 @@ func TestInstanceLifecycle_StopStart(t *testing.T) { // 3. Start the instance t.Log("Starting instance...") - startResp, err := svc.StartInstance(ctx(), oapi.StartInstanceRequestObject{Id: instanceID}) + startResp, err := svc.StartInstance(ctxWithInstance(svc, instanceID), oapi.StartInstanceRequestObject{Id: instanceID}) require.NoError(t, err) started, ok := startResp.(oapi.StartInstance200JSONResponse) @@ -196,7 +192,7 @@ func TestInstanceLifecycle_StopStart(t *testing.T) { // 4. Cleanup - delete the instance t.Log("Deleting instance...") - deleteResp, err := svc.DeleteInstance(ctx(), oapi.DeleteInstanceRequestObject{Id: instanceID}) + deleteResp, err := svc.DeleteInstance(ctxWithInstance(svc, instanceID), oapi.DeleteInstanceRequestObject{Id: instanceID}) require.NoError(t, err) _, ok = deleteResp.(oapi.DeleteInstance204Response) require.True(t, ok, "expected 204 response for delete") @@ -208,16 +204,15 @@ func waitForState(t *testing.T, svc *ApiService, instanceID string, expectedStat t.Helper() deadline := time.Now().Add(timeout) for time.Now().Before(deadline) { - resp, err := svc.GetInstance(ctx(), oapi.GetInstanceRequestObject{Id: instanceID}) + // Use manager directly to poll state (middleware not needed for polling) + inst, err := svc.InstanceManager.GetInstance(ctx(), instanceID) require.NoError(t, err) - if inst, ok := resp.(oapi.GetInstance200JSONResponse); ok { if string(inst.State) == expectedState { t.Logf("Instance reached %s state", expectedState) return } t.Logf("Instance state: %s (waiting for %s)", inst.State, expectedState) - } time.Sleep(100 * time.Millisecond) } t.Fatalf("Timeout waiting for instance to reach %s state", expectedState) diff --git a/cmd/api/api/registry_test.go b/cmd/api/api/registry_test.go index 45d20af6..1e9e2554 100644 --- a/cmd/api/api/registry_test.go +++ b/cmd/api/api/registry_test.go @@ -150,11 +150,11 @@ func TestRegistryPushAndCreateInstance(t *testing.T) { assert.Equal(t, "test-pushed-image", instance.Name) t.Logf("Instance created: %s (state: %s)", instance.Id, instance.State) - // Verify instance reaches Running state + // Verify instance reaches Running state (use manager directly for polling) deadline := time.Now().Add(30 * time.Second) for time.Now().Before(deadline) { - resp, _ := svc.GetInstance(ctx(), oapi.GetInstanceRequestObject{Id: instance.Id}) - if inst, ok := resp.(oapi.GetInstance200JSONResponse); ok { + inst, err := svc.InstanceManager.GetInstance(ctx(), instance.Id) + if err == nil { if inst.State == "Running" { t.Log("Instance is running!") return // Success! @@ -559,42 +559,41 @@ func (t *loggingTransport) RoundTrip(req *http.Request) (*http.Response, error) return t.transport.RoundTrip(req) } -// waitForImageReady polls GetImage until the image reaches Ready status. +// waitForImageReady polls ImageManager until the image reaches Ready status. // Returns the image response on success, fails the test on error or timeout. func waitForImageReady(t *testing.T, svc *ApiService, imageName string, timeout time.Duration) oapi.GetImage200JSONResponse { t.Helper() t.Logf("Waiting for image %s to be ready...", imageName) deadline := time.Now().Add(timeout) - var lastStatus oapi.ImageStatus + var lastStatus string var lastError string for time.Now().Before(deadline) { - resp, err := svc.GetImage(ctx(), oapi.GetImageRequestObject{Name: imageName}) + img, err := svc.ImageManager.GetImage(ctx(), imageName) if err != nil { time.Sleep(1 * time.Second) continue } - imgResp, ok := resp.(oapi.GetImage200JSONResponse) - if !ok { - time.Sleep(1 * time.Second) - continue + lastStatus = string(img.Status) + if img.Error != nil { + lastError = *img.Error } - lastStatus = imgResp.Status - if imgResp.Error != nil { - lastError = *imgResp.Error - } - - switch imgResp.Status { - case oapi.Ready: - t.Logf("Image ready: %s (digest=%s)", imgResp.Name, imgResp.Digest) - return imgResp - case oapi.Failed: + switch img.Status { + case "ready": + t.Logf("Image ready: %s (digest=%s)", img.Name, img.Digest) + return oapi.GetImage200JSONResponse{ + Name: img.Name, + Digest: img.Digest, + Status: oapi.ImageStatus(img.Status), + SizeBytes: img.SizeBytes, + } + case "failed": t.Fatalf("Image conversion failed: %s", lastError) default: - t.Logf("Image status: %s", imgResp.Status) + t.Logf("Image status: %s", img.Status) } time.Sleep(2 * time.Second) } diff --git a/cmd/api/api/resolvers.go b/cmd/api/api/resolvers.go new file mode 100644 index 00000000..bde8d8e8 --- /dev/null +++ b/cmd/api/api/resolvers.go @@ -0,0 +1,108 @@ +package api + +import ( + "context" + "errors" + "net/http" + + "github.com/onkernel/hypeman/lib/images" + "github.com/onkernel/hypeman/lib/ingress" + "github.com/onkernel/hypeman/lib/instances" + "github.com/onkernel/hypeman/lib/middleware" + "github.com/onkernel/hypeman/lib/volumes" +) + +// InstanceResolver adapts instances.Manager to middleware.ResourceResolver. +type InstanceResolver struct { + Manager instances.Manager +} + +func (r InstanceResolver) Resolve(ctx context.Context, idOrName string) (string, any, error) { + inst, err := r.Manager.GetInstance(ctx, idOrName) + if err != nil { + return "", nil, err + } + return inst.Id, inst, nil +} + +// VolumeResolver adapts volumes.Manager to middleware.ResourceResolver. +type VolumeResolver struct { + Manager volumes.Manager +} + +func (r VolumeResolver) Resolve(ctx context.Context, idOrName string) (string, any, error) { + // Try by ID first, then by name + vol, err := r.Manager.GetVolume(ctx, idOrName) + if errors.Is(err, volumes.ErrNotFound) { + vol, err = r.Manager.GetVolumeByName(ctx, idOrName) + } + if err != nil { + return "", nil, err + } + return vol.Id, vol, nil +} + +// IngressResolver adapts ingress.Manager to middleware.ResourceResolver. +type IngressResolver struct { + Manager ingress.Manager +} + +func (r IngressResolver) Resolve(ctx context.Context, idOrName string) (string, any, error) { + ing, err := r.Manager.Get(ctx, idOrName) + if err != nil { + return "", nil, err + } + return ing.ID, ing, nil +} + +// ImageResolver adapts images.Manager to middleware.ResourceResolver. +// Note: Images are looked up by name (OCI reference), not ID. +type ImageResolver struct { + Manager images.Manager +} + +func (r ImageResolver) Resolve(ctx context.Context, name string) (string, any, error) { + img, err := r.Manager.GetImage(ctx, name) + if err != nil { + return "", nil, err + } + return img.Name, img, nil +} + +// NewResolvers creates Resolvers from the ApiService managers. +func (s *ApiService) NewResolvers() middleware.Resolvers { + return middleware.Resolvers{ + Instance: InstanceResolver{Manager: s.InstanceManager}, + Volume: VolumeResolver{Manager: s.VolumeManager}, + Ingress: IngressResolver{Manager: s.IngressManager}, + Image: ImageResolver{Manager: s.ImageManager}, + } +} + +// ResolverErrorResponder handles resolver errors by writing appropriate HTTP responses. +func ResolverErrorResponder(w http.ResponseWriter, err error, lookup string) { + w.Header().Set("Content-Type", "application/json") + + switch { + case errors.Is(err, instances.ErrNotFound), + errors.Is(err, volumes.ErrNotFound), + errors.Is(err, ingress.ErrNotFound), + errors.Is(err, images.ErrNotFound): + w.WriteHeader(http.StatusNotFound) + w.Write([]byte(`{"code":"not_found","message":"resource not found"}`)) + + case errors.Is(err, instances.ErrAmbiguousName), + errors.Is(err, volumes.ErrAmbiguousName), + errors.Is(err, ingress.ErrAmbiguousName): + w.WriteHeader(http.StatusConflict) + w.Write([]byte(`{"code":"ambiguous","message":"multiple resources match, use full ID"}`)) + + case errors.Is(err, images.ErrInvalidName): + w.WriteHeader(http.StatusBadRequest) + w.Write([]byte(`{"code":"invalid_name","message":"invalid image reference"}`)) + + default: + w.WriteHeader(http.StatusInternalServerError) + w.Write([]byte(`{"code":"internal_error","message":"failed to resolve resource"}`)) + } +} diff --git a/cmd/api/api/volumes.go b/cmd/api/api/volumes.go index a66d743b..3354c002 100644 --- a/cmd/api/api/volumes.go +++ b/cmd/api/api/volumes.go @@ -8,6 +8,7 @@ import ( "strconv" "github.com/onkernel/hypeman/lib/logger" + mw "github.com/onkernel/hypeman/lib/middleware" "github.com/onkernel/hypeman/lib/oapi" "github.com/onkernel/hypeman/lib/volumes" ) @@ -199,77 +200,41 @@ func (s *ApiService) createVolumeFromMultipart(ctx context.Context, multipartRea // GetVolume gets volume details // The id parameter can be either a volume ID or name +// Note: Resolution is handled by ResolveResource middleware func (s *ApiService) GetVolume(ctx context.Context, request oapi.GetVolumeRequestObject) (oapi.GetVolumeResponseObject, error) { - log := logger.FromContext(ctx) - - // Try lookup by ID first - vol, err := s.VolumeManager.GetVolume(ctx, request.Id) - if errors.Is(err, volumes.ErrNotFound) { - // Try lookup by name - vol, err = s.VolumeManager.GetVolumeByName(ctx, request.Id) - } - - if err != nil { - switch { - case errors.Is(err, volumes.ErrNotFound): - return oapi.GetVolume404JSONResponse{ - Code: "not_found", - Message: "volume not found", - }, nil - case errors.Is(err, volumes.ErrAmbiguousName): - return oapi.GetVolume404JSONResponse{ - Code: "ambiguous_name", - Message: "multiple volumes have this name, use volume ID instead", - }, nil - default: - log.ErrorContext(ctx, "failed to get volume", "error", err, "id", request.Id) - return oapi.GetVolume500JSONResponse{ - Code: "internal_error", - Message: "failed to get volume", - }, nil - } + vol := mw.GetResolvedVolume[volumes.Volume](ctx) + if vol == nil { + return oapi.GetVolume500JSONResponse{ + Code: "internal_error", + Message: "resource not resolved", + }, nil } return oapi.GetVolume200JSONResponse(volumeToOAPI(*vol)), nil } // DeleteVolume deletes a volume // The id parameter can be either a volume ID or name +// Note: Resolution is handled by ResolveResource middleware func (s *ApiService) DeleteVolume(ctx context.Context, request oapi.DeleteVolumeRequestObject) (oapi.DeleteVolumeResponseObject, error) { - log := logger.FromContext(ctx) - - // Resolve ID - try direct ID first, then name lookup - volumeID := request.Id - _, err := s.VolumeManager.GetVolume(ctx, request.Id) - if errors.Is(err, volumes.ErrNotFound) { - // Try lookup by name - vol, nameErr := s.VolumeManager.GetVolumeByName(ctx, request.Id) - if nameErr == nil { - volumeID = vol.Id - } else if errors.Is(nameErr, volumes.ErrAmbiguousName) { - return oapi.DeleteVolume404JSONResponse{ - Code: "ambiguous_name", - Message: "multiple volumes have this name, use volume ID instead", - }, nil - } - // If name lookup also fails with ErrNotFound, we'll proceed with original ID - // and let DeleteVolume return the proper 404 + vol := mw.GetResolvedVolume[volumes.Volume](ctx) + if vol == nil { + return oapi.DeleteVolume500JSONResponse{ + Code: "internal_error", + Message: "resource not resolved", + }, nil } + log := logger.FromContext(ctx) - err = s.VolumeManager.DeleteVolume(ctx, volumeID) + err := s.VolumeManager.DeleteVolume(ctx, vol.Id) if err != nil { switch { - case errors.Is(err, volumes.ErrNotFound): - return oapi.DeleteVolume404JSONResponse{ - Code: "not_found", - Message: "volume not found", - }, nil case errors.Is(err, volumes.ErrInUse): return oapi.DeleteVolume409JSONResponse{ Code: "conflict", Message: "volume is in use by an instance", }, nil default: - log.ErrorContext(ctx, "failed to delete volume", "error", err, "id", request.Id) + log.ErrorContext(ctx, "failed to delete volume", "error", err) return oapi.DeleteVolume500JSONResponse{ Code: "internal_error", Message: "failed to delete volume", diff --git a/cmd/api/api/volumes_test.go b/cmd/api/api/volumes_test.go index 5b26d1d8..179d5ede 100644 --- a/cmd/api/api/volumes_test.go +++ b/cmd/api/api/volumes_test.go @@ -22,14 +22,10 @@ func TestListVolumes_Empty(t *testing.T) { func TestGetVolume_NotFound(t *testing.T) { svc := newTestService(t) - resp, err := svc.GetVolume(ctx(), oapi.GetVolumeRequestObject{ - Id: "non-existent", - }) - require.NoError(t, err) - - notFound, ok := resp.(oapi.GetVolume404JSONResponse) - require.True(t, ok, "expected 404 response") - assert.Equal(t, "not_found", notFound.Code) + // With middleware, not-found would be handled before reaching handler. + // For this test, we call the manager directly to verify the error. + _, err := svc.VolumeManager.GetVolume(ctx(), "non-existent") + require.Error(t, err) } func TestGetVolume_ByName(t *testing.T) { @@ -45,8 +41,8 @@ func TestGetVolume_ByName(t *testing.T) { require.NoError(t, err) created := createResp.(oapi.CreateVolume201JSONResponse) - // Get by name (not ID) - resp, err := svc.GetVolume(ctx(), oapi.GetVolumeRequestObject{ + // Get by name (not ID) - use ctxWithVolume to simulate middleware + resp, err := svc.GetVolume(ctxWithVolume(svc, "my-data"), oapi.GetVolumeRequestObject{ Id: "my-data", // using name instead of ID }) require.NoError(t, err) @@ -69,12 +65,11 @@ func TestDeleteVolume_ByName(t *testing.T) { }) require.NoError(t, err) - // Delete by name - resp, err := svc.DeleteVolume(ctx(), oapi.DeleteVolumeRequestObject{ + // Delete by name - use ctxWithVolume to simulate middleware + resp, err := svc.DeleteVolume(ctxWithVolume(svc, "to-delete"), oapi.DeleteVolumeRequestObject{ Id: "to-delete", }) require.NoError(t, err) _, ok := resp.(oapi.DeleteVolume204Response) assert.True(t, ok, "expected 204 response") } - diff --git a/cmd/api/main.go b/cmd/api/main.go index 13d3ca84..e2cb704d 100644 --- a/cmd/api/main.go +++ b/cmd/api/main.go @@ -146,10 +146,27 @@ func run() error { "kernel", kernelVer) // Initialize network manager (creates default network if needed) - // Get running instance IDs for TAP cleanup - runningIDs := getRunningInstanceIDs(app) + // Get instance IDs that might have a running VMM for TAP cleanup safety. + // Include Unknown state: we couldn't confirm their state, but they might still + // have a running VMM. Better to leave a stale TAP than crash a running VM. + var preserveTAPs []string + allInstances, err := app.InstanceManager.ListInstances(app.Ctx) + if err != nil { + // On error, skip TAP cleanup entirely to avoid crashing running VMs. + // Pass nil to Initialize to skip cleanup. + logger.Warn("failed to list instances for TAP cleanup, skipping cleanup", "error", err) + preserveTAPs = nil + } else { + // Initialize to empty slice (not nil) so cleanup runs even with no running VMs + preserveTAPs = []string{} + for _, inst := range allInstances { + if inst.State == instances.StateRunning || inst.State == instances.StateUnknown { + preserveTAPs = append(preserveTAPs, inst.Id) + } + } + } logger.Info("Initializing network manager...") - if err := app.NetworkManager.Initialize(app.Ctx, runningIDs); err != nil { + if err := app.NetworkManager.Initialize(app.Ctx, preserveTAPs); err != nil { logger.Error("failed to initialize network manager", "error", err) return fmt.Errorf("initialize network manager: %w", err) } @@ -199,8 +216,10 @@ func run() error { middleware.RequestID, middleware.RealIP, middleware.Recoverer, + mw.InjectLogger(logger), mw.AccessLogger(accessLogger), mw.JwtAuth(app.Config.JwtSecret), + mw.ResolveResource(app.ApiService.NewResolvers(), api.ResolverErrorResponder), ).Get("/instances/{id}/exec", app.ApiService.ExecHandler) // OCI Distribution registry endpoints for image push (outside OpenAPI spec) @@ -226,7 +245,8 @@ func run() error { } // Inject logger into request context for handlers to use - r.Use(mw.InjectLogger(accessLogger)) + // Use app logger (not accessLogger) so the instance log handler is included + r.Use(mw.InjectLogger(logger)) // Access logger AFTER otelchi so trace context is available r.Use(mw.AccessLogger(accessLogger)) @@ -254,6 +274,10 @@ func run() error { } r.Use(nethttpmiddleware.OapiRequestValidatorWithOptions(spec, validatorOptions)) + // Resource resolver middleware - resolves IDs/names/prefixes before handlers + // Enriches context with resolved resource and logger with resolved ID + r.Use(mw.ResolveResource(app.ApiService.NewResolvers(), api.ResolverErrorResponder)) + // Setup strict handler strictHandler := oapi.NewStrictHandler(app.ApiService, nil) @@ -354,21 +378,6 @@ func run() error { return err } -// getRunningInstanceIDs returns IDs of instances currently in Running state -func getRunningInstanceIDs(app *application) []string { - allInstances, err := app.InstanceManager.ListInstances(app.Ctx) - if err != nil { - return nil - } - var running []string - for _, inst := range allInstances { - if inst.State == instances.StateRunning { - running = append(running, inst.Id) - } - } - return running -} - // checkKVMAccess verifies KVM is available and the user has permission to use it func checkKVMAccess() error { f, err := os.OpenFile("/dev/kvm", os.O_RDWR, 0) diff --git a/cmd/api/wire_gen.go b/cmd/api/wire_gen.go index 79bfc61f..5a94276c 100644 --- a/cmd/api/wire_gen.go +++ b/cmd/api/wire_gen.go @@ -29,10 +29,10 @@ import ( // initializeApp is the injector function func initializeApp() (*application, func(), error) { - logger := providers.ProvideLogger() - context := providers.ProvideContext(logger) config := providers.ProvideConfig() paths := providers.ProvidePaths(config) + logger := providers.ProvideLogger(paths) + context := providers.ProvideContext(logger) manager, err := providers.ProvideImageManager(paths, config) if err != nil { return nil, nil, err diff --git a/lib/ingress/manager.go b/lib/ingress/manager.go index 8622e5a6..579af21e 100644 --- a/lib/ingress/manager.go +++ b/lib/ingress/manager.go @@ -26,6 +26,10 @@ type InstanceResolver interface { // InstanceExists checks if an instance with the given name or ID exists. InstanceExists(ctx context.Context, nameOrID string) (bool, error) + + // ResolveInstance resolves an instance name, ID, or ID prefix to its canonical name and ID. + // Returns (name, id, nil) if found, or an error if the instance doesn't exist. + ResolveInstance(ctx context.Context, nameOrID string) (name string, id string, err error) } // Manager is the interface for managing ingress resources. @@ -288,18 +292,20 @@ func (m *manager) Create(ctx context.Context, req CreateIngressRequest) (*Ingres } } - // Validate that all target instances exist (only for literal hostnames) + // Validate that all target instances exist and resolve their names (only for literal hostnames) // Pattern hostnames have dynamic target instances that can't be validated at creation time - for _, rule := range req.Rules { + var resolvedInstanceIDs []string // Track IDs for logging (used for hypeman.log routing) + for i, rule := range req.Rules { if !rule.Match.IsPattern() { - // Literal hostname - validate instance exists - exists, err := m.instanceResolver.InstanceExists(ctx, rule.Target.Instance) + // Literal hostname - validate instance exists and resolve to canonical name + ID + resolvedName, resolvedID, err := m.instanceResolver.ResolveInstance(ctx, rule.Target.Instance) if err != nil { - return nil, fmt.Errorf("check instance %q: %w", rule.Target.Instance, err) - } - if !exists { return nil, fmt.Errorf("%w: instance %q not found", ErrInstanceNotFound, rule.Target.Instance) } + // Update the rule with the resolved instance name (human-readable for config) + req.Rules[i].Target.Instance = resolvedName + // Track ID for logging (instance directories are by ID) + resolvedInstanceIDs = append(resolvedInstanceIDs, resolvedID) } // For pattern hostnames, instance validation happens at request time via the upstream resolver } @@ -370,6 +376,23 @@ func (m *manager) Create(ctx context.Context, req CreateIngressRequest) (*Ingres return nil, fmt.Errorf("write config: %w", err) } + // Log creation with ingress_id and instance_id(s) for audit trail + // Each resolved instance gets the log in their hypeman.log (routed by instance_id) + for _, instanceID := range resolvedInstanceIDs { + log.InfoContext(ctx, "ingress created", + "ingress_id", ingress.ID, + "ingress_name", ingress.Name, + "instance_id", instanceID, + ) + } + // If no literal hostnames (all patterns), still log the creation + if len(resolvedInstanceIDs) == 0 { + log.InfoContext(ctx, "ingress created", + "ingress_id", ingress.ID, + "ingress_name", ingress.Name, + ) + } + return &ingress, nil } @@ -481,6 +504,38 @@ func (m *manager) Delete(ctx context.Context, idOrName string) error { log.ErrorContext(ctx, "failed to write config after delete", "error", err) } + // Log deletion with instance_id(s) for audit trail + // Resolve instance names to IDs for hypeman.log routing + hasLiteralHostname := false + for _, rule := range ingress.Rules { + if !rule.Match.IsPattern() { + hasLiteralHostname = true + // Resolve instance name to ID for logging (instance may have been deleted, so ignore errors) + _, instanceID, err := m.instanceResolver.ResolveInstance(ctx, rule.Target.Instance) + if err == nil { + log.InfoContext(ctx, "ingress deleted", + "ingress_id", ingress.ID, + "ingress_name", ingress.Name, + "instance_id", instanceID, + ) + } else { + // Instance doesn't exist anymore, log without instance_id + log.InfoContext(ctx, "ingress deleted", + "ingress_id", ingress.ID, + "ingress_name", ingress.Name, + "instance_name", rule.Target.Instance, + ) + } + } + } + // If no literal hostnames (all patterns), still log the deletion + if !hasLiteralHostname { + log.InfoContext(ctx, "ingress deleted", + "ingress_id", ingress.ID, + "ingress_name", ingress.Name, + ) + } + return nil } diff --git a/lib/ingress/manager_test.go b/lib/ingress/manager_test.go index dd4c723b..ceda6ff2 100644 --- a/lib/ingress/manager_test.go +++ b/lib/ingress/manager_test.go @@ -13,25 +13,38 @@ import ( // mockInstanceResolver implements InstanceResolver for testing type mockInstanceResolver struct { - instances map[string]string // instance name/ID -> IP + instances map[string]mockInstance // instance name/ID -> mock data +} + +type mockInstance struct { + name string + id string + ip string } func newMockResolver() *mockInstanceResolver { return &mockInstanceResolver{ - instances: make(map[string]string), + instances: make(map[string]mockInstance), } } func (m *mockInstanceResolver) AddInstance(nameOrID, ip string) { - m.instances[nameOrID] = ip + // For backwards compatibility, use the nameOrID as both name and id + m.instances[nameOrID] = mockInstance{name: nameOrID, id: nameOrID, ip: ip} +} + +func (m *mockInstanceResolver) AddInstanceFull(name, id, ip string) { + // Add with explicit name and id + m.instances[name] = mockInstance{name: name, id: id, ip: ip} + m.instances[id] = mockInstance{name: name, id: id, ip: ip} } func (m *mockInstanceResolver) ResolveInstanceIP(ctx context.Context, nameOrID string) (string, error) { - ip, ok := m.instances[nameOrID] + inst, ok := m.instances[nameOrID] if !ok { return "", ErrInstanceNotFound } - return ip, nil + return inst.ip, nil } func (m *mockInstanceResolver) InstanceExists(ctx context.Context, nameOrID string) (bool, error) { @@ -39,6 +52,14 @@ func (m *mockInstanceResolver) InstanceExists(ctx context.Context, nameOrID stri return ok, nil } +func (m *mockInstanceResolver) ResolveInstance(ctx context.Context, nameOrID string) (string, string, error) { + inst, ok := m.instances[nameOrID] + if !ok { + return "", "", ErrInstanceNotFound + } + return inst.name, inst.id, nil +} + func setupTestManager(t *testing.T) (Manager, *mockInstanceResolver, *paths.Paths, func()) { t.Helper() diff --git a/lib/instances/create.go b/lib/instances/create.go index b82e421d..0e023073 100644 --- a/lib/instances/create.go +++ b/lib/instances/create.go @@ -129,12 +129,12 @@ func (m *manager) createInstance( // 3. Generate instance ID (CUID2 for secure, collision-resistant IDs) id := cuid2.Generate() - log.DebugContext(ctx, "generated instance ID", "id", id) + log.DebugContext(ctx, "generated instance ID", "instance_id", id) // 4. Generate vsock configuration vsockCID := generateVsockCID(id) vsockSocket := m.paths.InstanceVsockSocket(id) - log.DebugContext(ctx, "generated vsock config", "id", id, "cid", vsockCID) + log.DebugContext(ctx, "generated vsock config", "instance_id", id, "cid", vsockCID) // 5. Check instance doesn't already exist if _, err := m.loadMetadata(id); err == nil { @@ -224,35 +224,35 @@ func (m *manager) createInstance( // Setup cleanup stack for automatic rollback on errors cu := cleanup.Make(func() { - log.DebugContext(ctx, "cleaning up instance on error", "id", id) + log.DebugContext(ctx, "cleaning up instance on error", "instance_id", id) m.deleteInstanceData(id) }) defer cu.Clean() // 8. Ensure directories - log.DebugContext(ctx, "creating instance directories", "id", id) + log.DebugContext(ctx, "creating instance directories", "instance_id", id) if err := m.ensureDirectories(id); err != nil { - log.ErrorContext(ctx, "failed to create directories", "id", id, "error", err) + log.ErrorContext(ctx, "failed to create directories", "instance_id", id, "error", err) return nil, fmt.Errorf("ensure directories: %w", err) } // 9. Create overlay disk with specified size - log.DebugContext(ctx, "creating overlay disk", "id", id, "size_bytes", stored.OverlaySize) + log.DebugContext(ctx, "creating overlay disk", "instance_id", id, "size_bytes", stored.OverlaySize) if err := m.createOverlayDisk(id, stored.OverlaySize); err != nil { - log.ErrorContext(ctx, "failed to create overlay disk", "id", id, "error", err) + log.ErrorContext(ctx, "failed to create overlay disk", "instance_id", id, "error", err) return nil, fmt.Errorf("create overlay disk: %w", err) } // 10. Allocate network (if network enabled) var netConfig *network.NetworkConfig if networkName != "" { - log.DebugContext(ctx, "allocating network", "id", id, "network", networkName) + log.DebugContext(ctx, "allocating network", "instance_id", id, "network", networkName) netConfig, err = m.networkManager.CreateAllocation(ctx, network.AllocateRequest{ InstanceID: id, InstanceName: req.Name, }) if err != nil { - log.ErrorContext(ctx, "failed to allocate network", "id", id, "network", networkName, "error", err) + log.ErrorContext(ctx, "failed to allocate network", "instance_id", id, "network", networkName, "error", err) return nil, fmt.Errorf("allocate network: %w", err) } // Store IP/MAC in metadata (persisted with instance) @@ -270,12 +270,12 @@ func (m *manager) createInstance( // 10.5. Validate and attach volumes if len(req.Volumes) > 0 { - log.DebugContext(ctx, "validating volumes", "id", id, "count", len(req.Volumes)) + log.DebugContext(ctx, "validating volumes", "instance_id", id, "count", len(req.Volumes)) for _, volAttach := range req.Volumes { // Check volume exists _, err := m.volumeManager.GetVolume(ctx, volAttach.VolumeID) if err != nil { - log.ErrorContext(ctx, "volume not found", "id", id, "volume_id", volAttach.VolumeID, "error", err) + log.ErrorContext(ctx, "volume not found", "instance_id", id, "volume_id", volAttach.VolumeID, "error", err) return nil, fmt.Errorf("volume %s: %w", volAttach.VolumeID, err) } @@ -285,7 +285,7 @@ func (m *manager) createInstance( MountPath: volAttach.MountPath, Readonly: volAttach.Readonly, }); err != nil { - log.ErrorContext(ctx, "failed to attach volume", "id", id, "volume_id", volAttach.VolumeID, "error", err) + log.ErrorContext(ctx, "failed to attach volume", "instance_id", id, "volume_id", volAttach.VolumeID, "error", err) return nil, fmt.Errorf("attach volume %s: %w", volAttach.VolumeID, err) } @@ -297,9 +297,9 @@ func (m *manager) createInstance( // Create overlay disk for volumes with overlay enabled if volAttach.Overlay { - log.DebugContext(ctx, "creating volume overlay disk", "id", id, "volume_id", volAttach.VolumeID, "size", volAttach.OverlaySize) + log.DebugContext(ctx, "creating volume overlay disk", "instance_id", id, "volume_id", volAttach.VolumeID, "size", volAttach.OverlaySize) if err := m.createVolumeOverlayDisk(id, volAttach.VolumeID, volAttach.OverlaySize); err != nil { - log.ErrorContext(ctx, "failed to create volume overlay disk", "id", id, "volume_id", volAttach.VolumeID, "error", err) + log.ErrorContext(ctx, "failed to create volume overlay disk", "instance_id", id, "volume_id", volAttach.VolumeID, "error", err) return nil, fmt.Errorf("create volume overlay disk %s: %w", volAttach.VolumeID, err) } } @@ -310,24 +310,24 @@ func (m *manager) createInstance( // 11. Create config disk (needs Instance for buildVMConfig) inst := &Instance{StoredMetadata: *stored} - log.DebugContext(ctx, "creating config disk", "id", id) + log.DebugContext(ctx, "creating config disk", "instance_id", id) if err := m.createConfigDisk(inst, imageInfo, netConfig); err != nil { - log.ErrorContext(ctx, "failed to create config disk", "id", id, "error", err) + log.ErrorContext(ctx, "failed to create config disk", "instance_id", id, "error", err) return nil, fmt.Errorf("create config disk: %w", err) } // 12. Save metadata - log.DebugContext(ctx, "saving instance metadata", "id", id) + log.DebugContext(ctx, "saving instance metadata", "instance_id", id) meta := &metadata{StoredMetadata: *stored} if err := m.saveMetadata(meta); err != nil { - log.ErrorContext(ctx, "failed to save metadata", "id", id, "error", err) + log.ErrorContext(ctx, "failed to save metadata", "instance_id", id, "error", err) return nil, fmt.Errorf("save metadata: %w", err) } // 13. Start VMM and boot VM - log.InfoContext(ctx, "starting VMM and booting VM", "id", id) + log.InfoContext(ctx, "starting VMM and booting VM", "instance_id", id) if err := m.startAndBootVM(ctx, stored, imageInfo, netConfig); err != nil { - log.ErrorContext(ctx, "failed to start and boot VM", "id", id, "error", err) + log.ErrorContext(ctx, "failed to start and boot VM", "instance_id", id, "error", err) return nil, err } @@ -339,7 +339,7 @@ func (m *manager) createInstance( if err := m.saveMetadata(meta); err != nil { // VM is running but metadata failed - log but don't fail // Instance is recoverable, state will be derived - log.WarnContext(ctx, "failed to update metadata after VM start", "id", id, "error", err) + log.WarnContext(ctx, "failed to update metadata after VM start", "instance_id", id, "error", err) } // Success - release cleanup stack (prevent cleanup) @@ -353,7 +353,7 @@ func (m *manager) createInstance( // Return instance with derived state finalInst := m.toInstance(ctx, meta) - log.InfoContext(ctx, "instance created successfully", "id", id, "name", req.Name, "state", finalInst.State) + log.InfoContext(ctx, "instance created successfully", "instance_id", id, "name", req.Name, "state", finalInst.State) return &finalInst, nil } @@ -469,7 +469,7 @@ func (m *manager) startAndBootVM( log := logger.FromContext(ctx) // Start VMM process and capture PID - log.DebugContext(ctx, "starting VMM process", "id", stored.Id, "version", stored.CHVersion) + log.DebugContext(ctx, "starting VMM process", "instance_id", stored.Id, "version", stored.CHVersion) pid, err := vmm.StartProcess(ctx, m.paths, stored.CHVersion, stored.SocketPath) if err != nil { return fmt.Errorf("start vmm: %w", err) @@ -477,7 +477,7 @@ func (m *manager) startAndBootVM( // Store the PID for later cleanup stored.CHPID = &pid - log.DebugContext(ctx, "VMM process started", "id", stored.Id, "pid", pid) + log.DebugContext(ctx, "VMM process started", "instance_id", stored.Id, "pid", pid) // Create VMM client client, err := vmm.NewVMM(stored.SocketPath) @@ -493,7 +493,7 @@ func (m *manager) startAndBootVM( } // Create VM in VMM - log.DebugContext(ctx, "creating VM in VMM", "id", stored.Id) + log.DebugContext(ctx, "creating VM in VMM", "instance_id", stored.Id) createResp, err := client.CreateVMWithResponse(ctx, vmConfig) if err != nil { return fmt.Errorf("create vm: %w", err) @@ -501,12 +501,12 @@ func (m *manager) startAndBootVM( if createResp.StatusCode() != 204 { // Include response body for debugging body := string(createResp.Body) - log.ErrorContext(ctx, "create VM failed", "id", stored.Id, "status", createResp.StatusCode(), "body", body) + log.ErrorContext(ctx, "create VM failed", "instance_id", stored.Id, "status", createResp.StatusCode(), "body", body) return fmt.Errorf("create vm failed with status %d: %s", createResp.StatusCode(), body) } // Transition: Created → Running (boot VM) - log.DebugContext(ctx, "booting VM", "id", stored.Id) + log.DebugContext(ctx, "booting VM", "instance_id", stored.Id) bootResp, err := client.BootVMWithResponse(ctx) if err != nil { // Try to cleanup @@ -518,18 +518,18 @@ func (m *manager) startAndBootVM( client.DeleteVMWithResponse(ctx) client.ShutdownVMMWithResponse(ctx) body := string(bootResp.Body) - log.ErrorContext(ctx, "boot VM failed", "id", stored.Id, "status", bootResp.StatusCode(), "body", body) + log.ErrorContext(ctx, "boot VM failed", "instance_id", stored.Id, "status", bootResp.StatusCode(), "body", body) return fmt.Errorf("boot vm failed with status %d: %s", bootResp.StatusCode(), body) } // Optional: Expand memory to max if hotplug configured if inst.HotplugSize > 0 { totalBytes := inst.Size + inst.HotplugSize - log.DebugContext(ctx, "expanding VM memory", "id", stored.Id, "total_bytes", totalBytes) + log.DebugContext(ctx, "expanding VM memory", "instance_id", stored.Id, "total_bytes", totalBytes) resizeConfig := vmm.VmResize{DesiredRam: &totalBytes} // Best effort, ignore errors if resp, err := client.PutVmResizeWithResponse(ctx, resizeConfig); err != nil || resp.StatusCode() != 204 { - log.WarnContext(ctx, "failed to expand VM memory", "id", stored.Id, "error", err) + log.WarnContext(ctx, "failed to expand VM memory", "instance_id", stored.Id, "error", err) } } @@ -619,7 +619,7 @@ func (m *manager) buildVMConfig(inst *Instance, imageInfo *images.Image, netConf // Serial console configuration serial := vmm.ConsoleConfig{ Mode: vmm.ConsoleConfigMode("File"), - File: ptr(m.paths.InstanceConsoleLog(inst.Id)), + File: ptr(m.paths.InstanceAppLog(inst.Id)), } // Console off (we use serial) diff --git a/lib/instances/delete.go b/lib/instances/delete.go index f6be1fae..24d8ddbd 100644 --- a/lib/instances/delete.go +++ b/lib/instances/delete.go @@ -17,67 +17,67 @@ func (m *manager) deleteInstance( id string, ) error { log := logger.FromContext(ctx) - log.InfoContext(ctx, "deleting instance", "id", id) + log.InfoContext(ctx, "deleting instance", "instance_id", id) // 1. Load instance meta, err := m.loadMetadata(id) if err != nil { - log.ErrorContext(ctx, "failed to load instance metadata", "id", id, "error", err) + log.ErrorContext(ctx, "failed to load instance metadata", "instance_id", id, "error", err) return err } inst := m.toInstance(ctx, meta) - log.DebugContext(ctx, "loaded instance", "id", id, "state", inst.State) + log.DebugContext(ctx, "loaded instance", "instance_id", id, "state", inst.State) // 2. Get network allocation BEFORE killing VMM (while we can still query it) var networkAlloc *network.Allocation if inst.NetworkEnabled { - log.DebugContext(ctx, "getting network allocation", "id", id) + log.DebugContext(ctx, "getting network allocation", "instance_id", id) networkAlloc, err = m.networkManager.GetAllocation(ctx, id) if err != nil { - log.WarnContext(ctx, "failed to get network allocation, will still attempt cleanup", "id", id, "error", err) + log.WarnContext(ctx, "failed to get network allocation, will still attempt cleanup", "instance_id", id, "error", err) } } // 3. If VMM might be running, force kill it // Also attempt kill for StateUnknown since we can't be sure if VMM is running if inst.State.RequiresVMM() || inst.State == StateUnknown { - log.DebugContext(ctx, "stopping VMM", "id", id, "state", inst.State) + log.DebugContext(ctx, "stopping VMM", "instance_id", id, "state", inst.State) if err := m.killVMM(ctx, &inst); err != nil { // Log error but continue with cleanup // Best effort to clean up even if VMM is unresponsive - log.WarnContext(ctx, "failed to kill VMM, continuing with cleanup", "id", id, "error", err) + log.WarnContext(ctx, "failed to kill VMM, continuing with cleanup", "instance_id", id, "error", err) } } // 4. Release network allocation if inst.NetworkEnabled { - log.DebugContext(ctx, "releasing network", "id", id, "network", "default") + log.DebugContext(ctx, "releasing network", "instance_id", id, "network", "default") if err := m.networkManager.ReleaseAllocation(ctx, networkAlloc); err != nil { // Log error but continue with cleanup - log.WarnContext(ctx, "failed to release network, continuing with cleanup", "id", id, "error", err) + log.WarnContext(ctx, "failed to release network, continuing with cleanup", "instance_id", id, "error", err) } } // 5. Detach volumes if len(inst.Volumes) > 0 { - log.DebugContext(ctx, "detaching volumes", "id", id, "count", len(inst.Volumes)) + log.DebugContext(ctx, "detaching volumes", "instance_id", id, "count", len(inst.Volumes)) for _, volAttach := range inst.Volumes { if err := m.volumeManager.DetachVolume(ctx, volAttach.VolumeID, id); err != nil { // Log error but continue with cleanup - log.WarnContext(ctx, "failed to detach volume, continuing with cleanup", "id", id, "volume_id", volAttach.VolumeID, "error", err) + log.WarnContext(ctx, "failed to detach volume, continuing with cleanup", "instance_id", id, "volume_id", volAttach.VolumeID, "error", err) } } } // 6. Delete all instance data - log.DebugContext(ctx, "deleting instance data", "id", id) + log.DebugContext(ctx, "deleting instance data", "instance_id", id) if err := m.deleteInstanceData(id); err != nil { - log.ErrorContext(ctx, "failed to delete instance data", "id", id, "error", err) + log.ErrorContext(ctx, "failed to delete instance data", "instance_id", id, "error", err) return fmt.Errorf("delete instance data: %w", err) } - log.InfoContext(ctx, "instance deleted successfully", "id", id) + log.InfoContext(ctx, "instance deleted successfully", "instance_id", id) return nil } @@ -95,9 +95,9 @@ func (m *manager) killVMM(ctx context.Context, inst *Instance) error { if err := syscall.Kill(pid, 0); err == nil { // Process exists - kill it immediately with SIGKILL // No graceful shutdown needed since we're deleting all data - log.DebugContext(ctx, "killing VMM process", "id", inst.Id, "pid", pid) + log.DebugContext(ctx, "killing VMM process", "instance_id", inst.Id, "pid", pid) if err := syscall.Kill(pid, syscall.SIGKILL); err != nil { - log.WarnContext(ctx, "failed to kill VMM process", "id", inst.Id, "pid", pid, "error", err) + log.WarnContext(ctx, "failed to kill VMM process", "instance_id", inst.Id, "pid", pid, "error", err) } // Wait for process to die and reap it to prevent zombies @@ -107,16 +107,16 @@ func (m *manager) killVMM(ctx context.Context, inst *Instance) error { wpid, err := syscall.Wait4(pid, &wstatus, syscall.WNOHANG, nil) if err != nil || wpid == pid { // Process reaped successfully or error (likely ECHILD if already reaped) - log.DebugContext(ctx, "VMM process killed and reaped", "id", inst.Id, "pid", pid) + log.DebugContext(ctx, "VMM process killed and reaped", "instance_id", inst.Id, "pid", pid) break } if i == 49 { - log.WarnContext(ctx, "VMM process did not exit in time", "id", inst.Id, "pid", pid) + log.WarnContext(ctx, "VMM process did not exit in time", "instance_id", inst.Id, "pid", pid) } time.Sleep(100 * time.Millisecond) } } else { - log.DebugContext(ctx, "VMM process not running", "id", inst.Id, "pid", pid) + log.DebugContext(ctx, "VMM process not running", "instance_id", inst.Id, "pid", pid) } } diff --git a/lib/instances/ingress_resolver.go b/lib/instances/ingress_resolver.go index 6220967e..47d9200a 100644 --- a/lib/instances/ingress_resolver.go +++ b/lib/instances/ingress_resolver.go @@ -42,3 +42,12 @@ func (r *IngressResolver) InstanceExists(ctx context.Context, nameOrID string) ( _, err := r.manager.GetInstance(ctx, nameOrID) return err == nil, nil } + +// ResolveInstance resolves an instance name, ID, or ID prefix to its canonical name and ID. +func (r *IngressResolver) ResolveInstance(ctx context.Context, nameOrID string) (string, string, error) { + inst, err := r.manager.GetInstance(ctx, nameOrID) + if err != nil { + return "", "", fmt.Errorf("instance not found: %s", nameOrID) + } + return inst.Name, inst.Id, nil +} diff --git a/lib/instances/logs.go b/lib/instances/logs.go index 1b89acf1..a483ec26 100644 --- a/lib/instances/logs.go +++ b/lib/instances/logs.go @@ -12,14 +12,29 @@ import ( "github.com/onkernel/hypeman/lib/logger" ) +// LogSource represents a log source type +type LogSource string + +const ( + // LogSourceApp is the guest application log (serial console) + LogSourceApp LogSource = "app" + // LogSourceVMM is the Cloud Hypervisor VMM log + LogSourceVMM LogSource = "vmm" + // LogSourceHypeman is the hypeman operations log + LogSourceHypeman LogSource = "hypeman" +) + // ErrTailNotFound is returned when the tail command is not available var ErrTailNotFound = fmt.Errorf("tail command not found: required for log streaming") -// StreamInstanceLogs streams instance console logs +// ErrLogNotFound is returned when the requested log file doesn't exist +var ErrLogNotFound = fmt.Errorf("log file not found") + +// streamInstanceLogs streams instance logs from the specified source // Returns last N lines, then continues following if follow=true -func (m *manager) streamInstanceLogs(ctx context.Context, id string, tail int, follow bool) (<-chan string, error) { +func (m *manager) streamInstanceLogs(ctx context.Context, id string, tail int, follow bool, source LogSource) (<-chan string, error) { log := logger.FromContext(ctx) - log.DebugContext(ctx, "starting log stream", "id", id, "tail", tail, "follow", follow) + log.DebugContext(ctx, "starting log stream", "instance_id", id, "tail", tail, "follow", follow, "source", source) // Verify tail command is available if _, err := exec.LookPath("tail"); err != nil { @@ -30,7 +45,24 @@ func (m *manager) streamInstanceLogs(ctx context.Context, id string, tail int, f return nil, err } - logPath := m.paths.InstanceConsoleLog(id) + // Determine log path based on source + var logPath string + switch source { + case LogSourceApp: + logPath = m.paths.InstanceAppLog(id) + case LogSourceVMM: + logPath = m.paths.InstanceVMMLog(id) + case LogSourceHypeman: + logPath = m.paths.InstanceHypemanLog(id) + default: + // Default to app log for backwards compatibility + logPath = m.paths.InstanceAppLog(id) + } + + // Check if log file exists before starting tail + if _, err := os.Stat(logPath); os.IsNotExist(err) { + return nil, ErrLogNotFound + } // Build tail command args := []string{"-n", strconv.Itoa(tail)} @@ -60,14 +92,14 @@ func (m *manager) streamInstanceLogs(ctx context.Context, id string, tail int, f for scanner.Scan() { select { case <-ctx.Done(): - log.DebugContext(ctx, "log stream cancelled", "id", id) + log.DebugContext(ctx, "log stream cancelled", "instance_id", id) return case out <- scanner.Text(): } } if err := scanner.Err(); err != nil { - log.ErrorContext(ctx, "scanner error", "id", id, "error", err) + log.ErrorContext(ctx, "scanner error", "instance_id", id, "error", err) } // Wait for tail to exit (important for non-follow mode) diff --git a/lib/instances/manager.go b/lib/instances/manager.go index efbe2d85..3b95a1cb 100644 --- a/lib/instances/manager.go +++ b/lib/instances/manager.go @@ -26,7 +26,7 @@ type Manager interface { RestoreInstance(ctx context.Context, id string) (*Instance, error) StopInstance(ctx context.Context, id string) (*Instance, error) StartInstance(ctx context.Context, id string) (*Instance, error) - StreamInstanceLogs(ctx context.Context, id string, tail int, follow bool) (<-chan string, error) + StreamInstanceLogs(ctx context.Context, id string, tail int, follow bool, source LogSource) (<-chan string, error) RotateLogs(ctx context.Context, maxBytes int64, maxFiles int) error AttachVolume(ctx context.Context, id string, volumeId string, req AttachVolumeRequest) (*Instance, error) DetachVolume(ctx context.Context, id string, volumeId string) (*Instance, error) @@ -197,15 +197,15 @@ func (m *manager) GetInstance(ctx context.Context, idOrName string) (*Instance, return nil, ErrNotFound } -// StreamInstanceLogs streams instance console logs +// StreamInstanceLogs streams instance logs from the specified source // Returns last N lines, then continues following if follow=true -func (m *manager) StreamInstanceLogs(ctx context.Context, id string, tail int, follow bool) (<-chan string, error) { +func (m *manager) StreamInstanceLogs(ctx context.Context, id string, tail int, follow bool, source LogSource) (<-chan string, error) { // Note: No lock held during streaming - we read from the file continuously // and the file is append-only, so this is safe - return m.streamInstanceLogs(ctx, id, tail, follow) + return m.streamInstanceLogs(ctx, id, tail, follow, source) } -// RotateLogs rotates console logs for all instances that exceed maxBytes +// RotateLogs rotates all instance logs (app, vmm, hypeman) that exceed maxBytes func (m *manager) RotateLogs(ctx context.Context, maxBytes int64, maxFiles int) error { instances, err := m.listInstances(ctx) if err != nil { @@ -214,9 +214,16 @@ func (m *manager) RotateLogs(ctx context.Context, maxBytes int64, maxFiles int) var lastErr error for _, inst := range instances { - logPath := m.paths.InstanceConsoleLog(inst.Id) + // Rotate all three log types + logPaths := []string{ + m.paths.InstanceAppLog(inst.Id), + m.paths.InstanceVMMLog(inst.Id), + m.paths.InstanceHypemanLog(inst.Id), + } + for _, logPath := range logPaths { if err := rotateLogIfNeeded(logPath, maxBytes, maxFiles); err != nil { - lastErr = err // Continue with other instances, but track error + lastErr = err // Continue with other logs, but track error + } } } return lastErr diff --git a/lib/instances/manager_test.go b/lib/instances/manager_test.go index 7e0b2dcf..2ee0a7f4 100644 --- a/lib/instances/manager_test.go +++ b/lib/instances/manager_test.go @@ -123,7 +123,7 @@ func waitForLogMessage(ctx context.Context, mgr *manager, instanceID, message st // collectLogs gets the last N lines of logs (non-streaming) func collectLogs(ctx context.Context, mgr *manager, instanceID string, n int) (string, error) { - logChan, err := mgr.StreamInstanceLogs(ctx, instanceID, n, false) + logChan, err := mgr.StreamInstanceLogs(ctx, instanceID, n, false, LogSourceApp) if err != nil { return "", err } @@ -672,7 +672,7 @@ func TestBasicEndToEnd(t *testing.T) { streamCtx, streamCancel := context.WithCancel(ctx) defer streamCancel() - logChan, err := manager.StreamInstanceLogs(streamCtx, inst.Id, 10, true) + logChan, err := manager.StreamInstanceLogs(streamCtx, inst.Id, 10, true, LogSourceApp) require.NoError(t, err) // Create unique marker @@ -692,7 +692,7 @@ func TestBasicEndToEnd(t *testing.T) { }() // Append marker to console log file - consoleLogPath := p.InstanceConsoleLog(inst.Id) + consoleLogPath := p.InstanceAppLog(inst.Id) f, err := os.OpenFile(consoleLogPath, os.O_APPEND|os.O_WRONLY, 0644) require.NoError(t, err) _, err = fmt.Fprintln(f, marker) @@ -899,12 +899,12 @@ func TestStandbyAndRestore(t *testing.T) { t.Logf(" - %s (size: %d bytes)", entry.Name(), info.Size()) } - // DEBUG: Check console.log file size before restore - consoleLogPath := filepath.Join(tmpDir, "guests", inst.Id, "logs", "console.log") + // DEBUG: Check app.log file size before restore + consoleLogPath := filepath.Join(tmpDir, "guests", inst.Id, "logs", "app.log") var consoleLogSizeBefore int64 if info, err := os.Stat(consoleLogPath); err == nil { consoleLogSizeBefore = info.Size() - t.Logf("DEBUG: console.log size before restore: %d bytes", consoleLogSizeBefore) + t.Logf("DEBUG: app.log size before restore: %d bytes", consoleLogSizeBefore) } // Restore instance @@ -914,13 +914,13 @@ func TestStandbyAndRestore(t *testing.T) { assert.Equal(t, StateRunning, inst.State) t.Log("Instance restored and running") - // DEBUG: Check console.log file size after restore + // DEBUG: Check app.log file size after restore if info, err := os.Stat(consoleLogPath); err == nil { consoleLogSizeAfter := info.Size() - t.Logf("DEBUG: console.log size after restore: %d bytes", consoleLogSizeAfter) + t.Logf("DEBUG: app.log size after restore: %d bytes", consoleLogSizeAfter) t.Logf("DEBUG: File size diff: %d bytes", consoleLogSizeAfter-consoleLogSizeBefore) if consoleLogSizeAfter < consoleLogSizeBefore { - t.Logf("DEBUG: WARNING! console.log was TRUNCATED (lost %d bytes)", consoleLogSizeBefore-consoleLogSizeAfter) + t.Logf("DEBUG: WARNING! app.log was TRUNCATED (lost %d bytes)", consoleLogSizeBefore-consoleLogSizeAfter) } } @@ -983,3 +983,11 @@ func (r *testInstanceResolver) ResolveInstanceIP(ctx context.Context, nameOrID s func (r *testInstanceResolver) InstanceExists(ctx context.Context, nameOrID string) (bool, error) { return r.exists, nil } + +func (r *testInstanceResolver) ResolveInstance(ctx context.Context, nameOrID string) (string, string, error) { + if !r.exists { + return "", "", fmt.Errorf("instance not found: %s", nameOrID) + } + // For tests, just return nameOrID as both name and id + return nameOrID, nameOrID, nil +} diff --git a/lib/instances/query.go b/lib/instances/query.go index 154895b9..819ba1a2 100644 --- a/lib/instances/query.go +++ b/lib/instances/query.go @@ -140,7 +140,7 @@ func (m *manager) listInstances(ctx context.Context) ([]Instance, error) { meta, err := m.loadMetadata(id) if err != nil { // Skip instances with invalid metadata - log.WarnContext(ctx, "skipping instance with invalid metadata", "id", id, "error", err) + log.WarnContext(ctx, "skipping instance with invalid metadata", "instance_id", id, "error", err) continue } @@ -155,15 +155,15 @@ func (m *manager) listInstances(ctx context.Context) ([]Instance, error) { // getInstance returns a single instance by ID func (m *manager) getInstance(ctx context.Context, id string) (*Instance, error) { log := logger.FromContext(ctx) - log.DebugContext(ctx, "getting instance", "id", id) + log.DebugContext(ctx, "getting instance", "lookup", id) meta, err := m.loadMetadata(id) if err != nil { - log.ErrorContext(ctx, "failed to load instance metadata", "id", id, "error", err) + log.DebugContext(ctx, "failed to load instance metadata", "lookup", id, "error", err) return nil, err } inst := m.toInstance(ctx, meta) - log.DebugContext(ctx, "retrieved instance", "id", id, "state", inst.State) + log.DebugContext(ctx, "retrieved instance", "instance_id", inst.Id, "state", inst.State) return &inst, nil } diff --git a/lib/instances/restore.go b/lib/instances/restore.go index 53b7b3e9..9ec95bed 100644 --- a/lib/instances/restore.go +++ b/lib/instances/restore.go @@ -20,7 +20,7 @@ func (m *manager) restoreInstance( ) (*Instance, error) { start := time.Now() log := logger.FromContext(ctx) - log.InfoContext(ctx, "restoring instance from standby", "id", id) + log.InfoContext(ctx, "restoring instance from standby", "instance_id", id) // Start tracing span if tracer is available if m.metrics != nil && m.metrics.tracer != nil { @@ -32,22 +32,22 @@ func (m *manager) restoreInstance( // 1. Load instance meta, err := m.loadMetadata(id) if err != nil { - log.ErrorContext(ctx, "failed to load instance metadata", "id", id, "error", err) + log.ErrorContext(ctx, "failed to load instance metadata", "instance_id", id, "error", err) return nil, err } inst := m.toInstance(ctx, meta) stored := &meta.StoredMetadata - log.DebugContext(ctx, "loaded instance", "id", id, "state", inst.State, "has_snapshot", inst.HasSnapshot) + log.DebugContext(ctx, "loaded instance", "instance_id", id, "state", inst.State, "has_snapshot", inst.HasSnapshot) // 2. Validate state if inst.State != StateStandby { - log.ErrorContext(ctx, "invalid state for restore", "id", id, "state", inst.State) + log.ErrorContext(ctx, "invalid state for restore", "instance_id", id, "state", inst.State) return nil, fmt.Errorf("%w: cannot restore from state %s", ErrInvalidState, inst.State) } if !inst.HasSnapshot { - log.ErrorContext(ctx, "no snapshot available", "id", id) + log.ErrorContext(ctx, "no snapshot available", "instance_id", id) return nil, fmt.Errorf("no snapshot available for instance %s", id) } @@ -56,17 +56,17 @@ func (m *manager) restoreInstance( // 4. Recreate TAP device if network enabled if stored.NetworkEnabled { - log.DebugContext(ctx, "recreating network for restore", "id", id, "network", "default") + log.DebugContext(ctx, "recreating network for restore", "instance_id", id, "network", "default") if err := m.networkManager.RecreateAllocation(ctx, id); err != nil { - log.ErrorContext(ctx, "failed to recreate network", "id", id, "error", err) + log.ErrorContext(ctx, "failed to recreate network", "instance_id", id, "error", err) return nil, fmt.Errorf("recreate network: %w", err) } } // 5. Transition: Standby → Paused (start VMM + restore) - log.DebugContext(ctx, "restoring from snapshot", "id", id, "snapshot_dir", snapshotDir) + log.DebugContext(ctx, "restoring from snapshot", "instance_id", id, "snapshot_dir", snapshotDir) if err := m.restoreFromSnapshot(ctx, stored, snapshotDir); err != nil { - log.ErrorContext(ctx, "failed to restore from snapshot", "id", id, "error", err) + log.ErrorContext(ctx, "failed to restore from snapshot", "instance_id", id, "error", err) // Cleanup network on failure // Note: Network cleanup is explicitly called on failure paths to ensure TAP devices // are removed. In production, stale TAP devices from unexpected failures (e.g., @@ -81,7 +81,7 @@ func (m *manager) restoreInstance( // 6. Create client for resumed VM client, err := vmm.NewVMM(stored.SocketPath) if err != nil { - log.ErrorContext(ctx, "failed to create VMM client", "id", id, "error", err) + log.ErrorContext(ctx, "failed to create VMM client", "instance_id", id, "error", err) // Cleanup network on failure if stored.NetworkEnabled { netAlloc, _ := m.networkManager.GetAllocation(ctx, id) @@ -91,10 +91,10 @@ func (m *manager) restoreInstance( } // 7. Transition: Paused → Running (resume) - log.DebugContext(ctx, "resuming VM", "id", id) + log.DebugContext(ctx, "resuming VM", "instance_id", id) resumeResp, err := client.ResumeVMWithResponse(ctx) if err != nil || resumeResp.StatusCode() != 204 { - log.ErrorContext(ctx, "failed to resume VM", "id", id, "error", err) + log.ErrorContext(ctx, "failed to resume VM", "instance_id", id, "error", err) // Cleanup network on failure if stored.NetworkEnabled { netAlloc, _ := m.networkManager.GetAllocation(ctx, id) @@ -104,7 +104,7 @@ func (m *manager) restoreInstance( } // 8. Delete snapshot after successful restore - log.DebugContext(ctx, "deleting snapshot after successful restore", "id", id) + log.DebugContext(ctx, "deleting snapshot after successful restore", "instance_id", id) os.RemoveAll(snapshotDir) // Best effort, ignore errors // 9. Update timestamp @@ -114,7 +114,7 @@ func (m *manager) restoreInstance( meta = &metadata{StoredMetadata: *stored} if err := m.saveMetadata(meta); err != nil { // VM is running but metadata failed - log.WarnContext(ctx, "failed to update metadata after restore", "id", id, "error", err) + log.WarnContext(ctx, "failed to update metadata after restore", "instance_id", id, "error", err) } // Record metrics @@ -125,7 +125,7 @@ func (m *manager) restoreInstance( // Return instance with derived state (should be Running now) finalInst := m.toInstance(ctx, meta) - log.InfoContext(ctx, "instance restored successfully", "id", id, "state", finalInst.State) + log.InfoContext(ctx, "instance restored successfully", "instance_id", id, "state", finalInst.State) return &finalInst, nil } @@ -138,7 +138,7 @@ func (m *manager) restoreFromSnapshot( log := logger.FromContext(ctx) // Start VMM process and capture PID - log.DebugContext(ctx, "starting VMM process for restore", "id", stored.Id, "version", stored.CHVersion) + log.DebugContext(ctx, "starting VMM process for restore", "instance_id", stored.Id, "version", stored.CHVersion) pid, err := vmm.StartProcess(ctx, m.paths, stored.CHVersion, stored.SocketPath) if err != nil { return fmt.Errorf("start vmm: %w", err) @@ -146,7 +146,7 @@ func (m *manager) restoreFromSnapshot( // Store the PID for later cleanup stored.CHPID = &pid - log.DebugContext(ctx, "VMM process started", "id", stored.Id, "pid", pid) + log.DebugContext(ctx, "VMM process started", "instance_id", stored.Id, "pid", pid) // Create client client, err := vmm.NewVMM(stored.SocketPath) @@ -161,19 +161,19 @@ func (m *manager) restoreFromSnapshot( Prefault: ptr(false), // Don't prefault pages for faster restore } - log.DebugContext(ctx, "invoking VMM restore API", "id", stored.Id, "source_url", sourceURL) + log.DebugContext(ctx, "invoking VMM restore API", "instance_id", stored.Id, "source_url", sourceURL) resp, err := client.PutVmRestoreWithResponse(ctx, restoreConfig) if err != nil { - log.ErrorContext(ctx, "restore API call failed", "id", stored.Id, "error", err) + log.ErrorContext(ctx, "restore API call failed", "instance_id", stored.Id, "error", err) client.ShutdownVMMWithResponse(ctx) // Cleanup return fmt.Errorf("restore api call: %w", err) } if resp.StatusCode() != 204 { - log.ErrorContext(ctx, "restore API returned error", "id", stored.Id, "status", resp.StatusCode()) + log.ErrorContext(ctx, "restore API returned error", "instance_id", stored.Id, "status", resp.StatusCode()) client.ShutdownVMMWithResponse(ctx) // Cleanup return fmt.Errorf("restore failed with status %d", resp.StatusCode()) } - log.DebugContext(ctx, "VM restored from snapshot successfully", "id", stored.Id) + log.DebugContext(ctx, "VM restored from snapshot successfully", "instance_id", stored.Id) return nil } diff --git a/lib/instances/standby.go b/lib/instances/standby.go index 68e67661..8c2a2620 100644 --- a/lib/instances/standby.go +++ b/lib/instances/standby.go @@ -21,7 +21,7 @@ func (m *manager) standbyInstance( ) (*Instance, error) { start := time.Now() log := logger.FromContext(ctx) - log.InfoContext(ctx, "putting instance in standby", "id", id) + log.InfoContext(ctx, "putting instance in standby", "instance_id", id) // Start tracing span if tracer is available if m.metrics != nil && m.metrics.tracer != nil { @@ -33,17 +33,17 @@ func (m *manager) standbyInstance( // 1. Load instance meta, err := m.loadMetadata(id) if err != nil { - log.ErrorContext(ctx, "failed to load instance metadata", "id", id, "error", err) + log.ErrorContext(ctx, "failed to load instance metadata", "instance_id", id, "error", err) return nil, err } inst := m.toInstance(ctx, meta) stored := &meta.StoredMetadata - log.DebugContext(ctx, "loaded instance", "id", id, "state", inst.State) + log.DebugContext(ctx, "loaded instance", "instance_id", id, "state", inst.State) // 2. Validate state transition (must be Running to start standby flow) if inst.State != StateRunning { - log.ErrorContext(ctx, "invalid state for standby", "id", id, "state", inst.State) + log.ErrorContext(ctx, "invalid state for standby", "instance_id", id, "state", inst.State) return nil, fmt.Errorf("%w: cannot standby from state %s", ErrInvalidState, inst.State) } @@ -51,60 +51,60 @@ func (m *manager) standbyInstance( // This is needed to delete the TAP device after VMM shuts down var networkAlloc *network.Allocation if inst.NetworkEnabled { - log.DebugContext(ctx, "getting network allocation", "id", id) + log.DebugContext(ctx, "getting network allocation", "instance_id", id) networkAlloc, err = m.networkManager.GetAllocation(ctx, id) if err != nil { - log.WarnContext(ctx, "failed to get network allocation, will still attempt cleanup", "id", id, "error", err) + log.WarnContext(ctx, "failed to get network allocation, will still attempt cleanup", "instance_id", id, "error", err) } } // 4. Create VMM client client, err := vmm.NewVMM(inst.SocketPath) if err != nil { - log.ErrorContext(ctx, "failed to create VMM client", "id", id, "error", err) + log.ErrorContext(ctx, "failed to create VMM client", "instance_id", id, "error", err) return nil, fmt.Errorf("create vmm client: %w", err) } // 5. Reduce memory to base size (virtio-mem hotplug) - log.DebugContext(ctx, "reducing VM memory before snapshot", "id", id, "base_size", inst.Size) + log.DebugContext(ctx, "reducing VM memory before snapshot", "instance_id", id, "base_size", inst.Size) if err := reduceMemory(ctx, client, inst.Size); err != nil { // Log warning but continue - snapshot will just be larger - log.WarnContext(ctx, "failed to reduce memory, snapshot will be larger", "id", id, "error", err) + log.WarnContext(ctx, "failed to reduce memory, snapshot will be larger", "instance_id", id, "error", err) } // 6. Transition: Running → Paused - log.DebugContext(ctx, "pausing VM", "id", id) + log.DebugContext(ctx, "pausing VM", "instance_id", id) pauseResp, err := client.PauseVMWithResponse(ctx) if err != nil || pauseResp.StatusCode() != 204 { - log.ErrorContext(ctx, "failed to pause VM", "id", id, "error", err) + log.ErrorContext(ctx, "failed to pause VM", "instance_id", id, "error", err) return nil, fmt.Errorf("pause vm failed: %w", err) } // 7. Create snapshot snapshotDir := m.paths.InstanceSnapshotLatest(id) - log.DebugContext(ctx, "creating snapshot", "id", id, "snapshot_dir", snapshotDir) + log.DebugContext(ctx, "creating snapshot", "instance_id", id, "snapshot_dir", snapshotDir) if err := createSnapshot(ctx, client, snapshotDir); err != nil { // Snapshot failed - try to resume VM - log.ErrorContext(ctx, "snapshot failed, attempting to resume VM", "id", id, "error", err) + log.ErrorContext(ctx, "snapshot failed, attempting to resume VM", "instance_id", id, "error", err) client.ResumeVMWithResponse(ctx) return nil, fmt.Errorf("create snapshot: %w", err) } // 8. Stop VMM gracefully (snapshot is complete) - log.DebugContext(ctx, "shutting down VMM", "id", id) + log.DebugContext(ctx, "shutting down VMM", "instance_id", id) if err := m.shutdownVMM(ctx, &inst); err != nil { // Log but continue - snapshot was created successfully - log.WarnContext(ctx, "failed to shutdown VMM gracefully, snapshot still valid", "id", id, "error", err) + log.WarnContext(ctx, "failed to shutdown VMM gracefully, snapshot still valid", "instance_id", id, "error", err) } // 9. Release network allocation (delete TAP device) // TAP devices with explicit Owner/Group fields do NOT auto-delete when VMM exits // They must be explicitly deleted if inst.NetworkEnabled { - log.DebugContext(ctx, "releasing network", "id", id, "network", "default") + log.DebugContext(ctx, "releasing network", "instance_id", id, "network", "default") if err := m.networkManager.ReleaseAllocation(ctx, networkAlloc); err != nil { // Log error but continue - snapshot was created successfully - log.WarnContext(ctx, "failed to release network, continuing with standby", "id", id, "error", err) + log.WarnContext(ctx, "failed to release network, continuing with standby", "instance_id", id, "error", err) } } @@ -115,7 +115,7 @@ func (m *manager) standbyInstance( meta = &metadata{StoredMetadata: *stored} if err := m.saveMetadata(meta); err != nil { - log.ErrorContext(ctx, "failed to save metadata", "id", id, "error", err) + log.ErrorContext(ctx, "failed to save metadata", "instance_id", id, "error", err) return nil, fmt.Errorf("save metadata: %w", err) } @@ -127,7 +127,7 @@ func (m *manager) standbyInstance( // Return instance with derived state (should be Standby now) finalInst := m.toInstance(ctx, meta) - log.InfoContext(ctx, "instance put in standby successfully", "id", id, "state", finalInst.State) + log.InfoContext(ctx, "instance put in standby successfully", "instance_id", id, "state", finalInst.State) return &finalInst, nil } @@ -247,20 +247,20 @@ func (m *manager) shutdownVMM(ctx context.Context, inst *Instance) error { client, err := vmm.NewVMM(inst.SocketPath) if err != nil { // Can't connect - VMM might already be stopped - log.DebugContext(ctx, "could not connect to VMM, may already be stopped", "id", inst.Id) + log.DebugContext(ctx, "could not connect to VMM, may already be stopped", "instance_id", inst.Id) return nil } // Try graceful shutdown - log.DebugContext(ctx, "sending shutdown command to VMM", "id", inst.Id) + log.DebugContext(ctx, "sending shutdown command to VMM", "instance_id", inst.Id) client.ShutdownVMMWithResponse(ctx) // Wait for process to exit if inst.CHPID != nil { if !WaitForProcessExit(*inst.CHPID, 2*time.Second) { - log.WarnContext(ctx, "VMM did not exit gracefully in time", "id", inst.Id, "pid", *inst.CHPID) + log.WarnContext(ctx, "VMM did not exit gracefully in time", "instance_id", inst.Id, "pid", *inst.CHPID) } else { - log.DebugContext(ctx, "VMM shutdown gracefully", "id", inst.Id, "pid", *inst.CHPID) + log.DebugContext(ctx, "VMM shutdown gracefully", "instance_id", inst.Id, "pid", *inst.CHPID) } } diff --git a/lib/instances/start.go b/lib/instances/start.go index 5f044050..149c1995 100644 --- a/lib/instances/start.go +++ b/lib/instances/start.go @@ -18,7 +18,7 @@ func (m *manager) startInstance( ) (*Instance, error) { start := time.Now() log := logger.FromContext(ctx) - log.InfoContext(ctx, "starting instance", "id", id) + log.InfoContext(ctx, "starting instance", "instance_id", id) // Start tracing span if tracer is available if m.metrics != nil && m.metrics.tracer != nil { @@ -30,40 +30,40 @@ func (m *manager) startInstance( // 1. Load instance meta, err := m.loadMetadata(id) if err != nil { - log.ErrorContext(ctx, "failed to load instance metadata", "id", id, "error", err) + log.ErrorContext(ctx, "failed to load instance metadata", "instance_id", id, "error", err) return nil, err } inst := m.toInstance(ctx, meta) stored := &meta.StoredMetadata - log.DebugContext(ctx, "loaded instance", "id", id, "state", inst.State) + log.DebugContext(ctx, "loaded instance", "instance_id", id, "state", inst.State) // 2. Validate state (must be Stopped to start) if inst.State != StateStopped { - log.ErrorContext(ctx, "invalid state for start", "id", id, "state", inst.State) + log.ErrorContext(ctx, "invalid state for start", "instance_id", id, "state", inst.State) return nil, fmt.Errorf("%w: cannot start from state %s, must be Stopped", ErrInvalidState, inst.State) } // 3. Get image info (needed for buildVMConfig) - log.DebugContext(ctx, "getting image info", "id", id, "image", stored.Image) + log.DebugContext(ctx, "getting image info", "instance_id", id, "image", stored.Image) imageInfo, err := m.imageManager.GetImage(ctx, stored.Image) if err != nil { - log.ErrorContext(ctx, "failed to get image", "id", id, "image", stored.Image, "error", err) + log.ErrorContext(ctx, "failed to get image", "instance_id", id, "image", stored.Image, "error", err) return nil, fmt.Errorf("get image: %w", err) } // 4. Recreate network allocation if network enabled var netConfig *network.NetworkConfig if stored.NetworkEnabled { - log.DebugContext(ctx, "recreating network for start", "id", id, "network", "default") + log.DebugContext(ctx, "recreating network for start", "instance_id", id, "network", "default") if err := m.networkManager.RecreateAllocation(ctx, id); err != nil { - log.ErrorContext(ctx, "failed to recreate network", "id", id, "error", err) + log.ErrorContext(ctx, "failed to recreate network", "instance_id", id, "error", err) return nil, fmt.Errorf("recreate network: %w", err) } // Get the network config for VM configuration netAlloc, err := m.networkManager.GetAllocation(ctx, id) if err != nil { - log.ErrorContext(ctx, "failed to get network allocation", "id", id, "error", err) + log.ErrorContext(ctx, "failed to get network allocation", "instance_id", id, "error", err) // Cleanup network on failure if netAlloc != nil { m.networkManager.ReleaseAllocation(ctx, netAlloc) @@ -79,9 +79,9 @@ func (m *manager) startInstance( } // 5. Start VMM and boot VM (reuses logic from create) - log.InfoContext(ctx, "starting VMM and booting VM", "id", id) + log.InfoContext(ctx, "starting VMM and booting VM", "instance_id", id) if err := m.startAndBootVM(ctx, stored, imageInfo, netConfig); err != nil { - log.ErrorContext(ctx, "failed to start and boot VM", "id", id, "error", err) + log.ErrorContext(ctx, "failed to start and boot VM", "instance_id", id, "error", err) // Cleanup network on failure if stored.NetworkEnabled { if netAlloc, err := m.networkManager.GetAllocation(ctx, id); err == nil { @@ -98,7 +98,7 @@ func (m *manager) startInstance( meta = &metadata{StoredMetadata: *stored} if err := m.saveMetadata(meta); err != nil { // VM is running but metadata failed - log but don't fail - log.WarnContext(ctx, "failed to update metadata after VM start", "id", id, "error", err) + log.WarnContext(ctx, "failed to update metadata after VM start", "instance_id", id, "error", err) } // Record metrics @@ -109,6 +109,6 @@ func (m *manager) startInstance( // Return instance with derived state (should be Running now) finalInst := m.toInstance(ctx, meta) - log.InfoContext(ctx, "instance started successfully", "id", id, "state", finalInst.State) + log.InfoContext(ctx, "instance started successfully", "instance_id", id, "state", finalInst.State) return &finalInst, nil } diff --git a/lib/instances/stop.go b/lib/instances/stop.go index 5a37c809..47336caf 100644 --- a/lib/instances/stop.go +++ b/lib/instances/stop.go @@ -18,7 +18,7 @@ func (m *manager) stopInstance( ) (*Instance, error) { start := time.Now() log := logger.FromContext(ctx) - log.InfoContext(ctx, "stopping instance", "id", id) + log.InfoContext(ctx, "stopping instance", "instance_id", id) // Start tracing span if tracer is available if m.metrics != nil && m.metrics.tracer != nil { @@ -30,44 +30,44 @@ func (m *manager) stopInstance( // 1. Load instance meta, err := m.loadMetadata(id) if err != nil { - log.ErrorContext(ctx, "failed to load instance metadata", "id", id, "error", err) + log.ErrorContext(ctx, "failed to load instance metadata", "instance_id", id, "error", err) return nil, err } inst := m.toInstance(ctx, meta) stored := &meta.StoredMetadata - log.DebugContext(ctx, "loaded instance", "id", id, "state", inst.State) + log.DebugContext(ctx, "loaded instance", "instance_id", id, "state", inst.State) // 2. Validate state transition (must be Running to stop) if inst.State != StateRunning { - log.ErrorContext(ctx, "invalid state for stop", "id", id, "state", inst.State) + log.ErrorContext(ctx, "invalid state for stop", "instance_id", id, "state", inst.State) return nil, fmt.Errorf("%w: cannot stop from state %s, must be Running", ErrInvalidState, inst.State) } // 3. Get network allocation BEFORE killing VMM (while we can still query it) var networkAlloc *network.Allocation if inst.NetworkEnabled { - log.DebugContext(ctx, "getting network allocation", "id", id) + log.DebugContext(ctx, "getting network allocation", "instance_id", id) networkAlloc, err = m.networkManager.GetAllocation(ctx, id) if err != nil { - log.WarnContext(ctx, "failed to get network allocation, will still attempt cleanup", "id", id, "error", err) + log.WarnContext(ctx, "failed to get network allocation, will still attempt cleanup", "instance_id", id, "error", err) } } // 4. Shutdown VMM process // TODO: Add graceful shutdown via vsock signal to allow app to clean up - log.DebugContext(ctx, "shutting down VMM", "id", id) + log.DebugContext(ctx, "shutting down VMM", "instance_id", id) if err := m.shutdownVMM(ctx, &inst); err != nil { // Log but continue - try to clean up anyway - log.WarnContext(ctx, "failed to shutdown VMM gracefully", "id", id, "error", err) + log.WarnContext(ctx, "failed to shutdown VMM gracefully", "instance_id", id, "error", err) } // 5. Release network allocation (delete TAP device) if inst.NetworkEnabled && networkAlloc != nil { - log.DebugContext(ctx, "releasing network", "id", id, "network", "default") + log.DebugContext(ctx, "releasing network", "instance_id", id, "network", "default") if err := m.networkManager.ReleaseAllocation(ctx, networkAlloc); err != nil { // Log error but continue - log.WarnContext(ctx, "failed to release network, continuing", "id", id, "error", err) + log.WarnContext(ctx, "failed to release network, continuing", "instance_id", id, "error", err) } } @@ -78,7 +78,7 @@ func (m *manager) stopInstance( meta = &metadata{StoredMetadata: *stored} if err := m.saveMetadata(meta); err != nil { - log.ErrorContext(ctx, "failed to save metadata", "id", id, "error", err) + log.ErrorContext(ctx, "failed to save metadata", "instance_id", id, "error", err) return nil, fmt.Errorf("save metadata: %w", err) } @@ -90,6 +90,6 @@ func (m *manager) stopInstance( // Return instance with derived state (should be Stopped now) finalInst := m.toInstance(ctx, meta) - log.InfoContext(ctx, "instance stopped successfully", "id", id, "state", finalInst.State) + log.InfoContext(ctx, "instance stopped successfully", "instance_id", id, "state", finalInst.State) return &finalInst, nil } diff --git a/lib/instances/storage.go b/lib/instances/storage.go index 35efee99..1332137a 100644 --- a/lib/instances/storage.go +++ b/lib/instances/storage.go @@ -15,9 +15,10 @@ import ( // overlay.raw # Configurable sparse overlay disk (default 10GB) // config.ext4 # Read-only config disk (generated) // ch.sock # Cloud Hypervisor API socket -// ch-stdout.log # CH process output // logs/ -// console.log # Serial console output +// app.log # Guest application log (serial console output) +// vmm.log # Cloud Hypervisor VMM log (stdout+stderr combined) +// hypeman.log # Hypeman operations log (actions taken on this instance) // snapshots/ // snapshot-latest/ # Snapshot directory // config.json @@ -141,4 +142,3 @@ func (m *manager) listMetadataFiles() ([]string, error) { return metaFiles, nil } - diff --git a/lib/logger/README.md b/lib/logger/README.md index 4996bfeb..7e2db7a4 100644 --- a/lib/logger/README.md +++ b/lib/logger/README.md @@ -8,6 +8,7 @@ Structured logging with per-subsystem log levels and OpenTelemetry trace context - Automatic trace_id/span_id injection when OTel is active - Context-based logger propagation - JSON output format +- Per-instance log files via `InstanceLogHandler` ## Configuration @@ -35,7 +36,25 @@ ctx = logger.AddToContext(ctx, log) // Retrieve from context log = logger.FromContext(ctx) -log.InfoContext(ctx, "instance created", "id", instanceID) +log.InfoContext(ctx, "instance created", "instance_id", instanceID) +``` + +## Per-Instance Logging + +The `InstanceLogHandler` automatically writes logs with an `"instance_id"` attribute to per-instance `hypeman.log` files. This provides an operations audit trail for each VM. + +```go +// Wrap any handler with instance logging +handler := logger.NewInstanceLogHandler(baseHandler, func(id string) string { + return paths.InstanceHypemanLog(id) +}) + +// Logs with "instance_id" attribute are automatically written to that instance's hypeman.log +log.InfoContext(ctx, "starting VM", "instance_id", instanceID) + +// Related operations (e.g., ingress creation) can also include instance_id +// to appear in the instance's audit log +log.InfoContext(ctx, "ingress created", "ingress_id", ingressID, "instance_id", targetInstance) ``` ## Output @@ -49,7 +68,7 @@ When OTel tracing is active, logs include trace context: "subsystem": "INSTANCES", "trace_id": "abc123...", "span_id": "def456...", - "id": "instance-123" + "instance_id": "instance-123" } ``` diff --git a/lib/logger/instance_handler.go b/lib/logger/instance_handler.go new file mode 100644 index 00000000..6a034177 --- /dev/null +++ b/lib/logger/instance_handler.go @@ -0,0 +1,163 @@ +// Package logger provides structured logging with subsystem-specific levels +// and OpenTelemetry trace context integration. +package logger + +import ( + "context" + "fmt" + "log/slog" + "os" + "path/filepath" + "time" +) + +// InstanceLogHandler wraps an slog.Handler and additionally writes logs +// that have an "id" attribute to a per-instance hypeman.log file. +// This provides automatic per-instance logging without manual instrumentation. +// +// Implementation follows the slog handler guide for shared state across +// WithAttrs/WithGroup: https://pkg.go.dev/golang.org/x/example/slog-handler-guide +type InstanceLogHandler struct { + slog.Handler + logPathFunc func(id string) string // returns path to hypeman.log for an instance + preAttrs []slog.Attr // attrs added via WithAttrs (needed to find "id") +} + +// NewInstanceLogHandler creates a new handler that wraps the given handler +// and writes instance-related logs to per-instance log files. +// logPathFunc should return the path to hypeman.log for a given instance ID. +func NewInstanceLogHandler(wrapped slog.Handler, logPathFunc func(id string) string) *InstanceLogHandler { + return &InstanceLogHandler{ + Handler: wrapped, + logPathFunc: logPathFunc, + } +} + +// Handle processes a log record, passing it to the wrapped handler and +// optionally writing to a per-instance log file if "id" attribute is present. +func (h *InstanceLogHandler) Handle(ctx context.Context, r slog.Record) error { + // Always pass to wrapped handler first + if err := h.Handler.Handle(ctx, r); err != nil { + return err + } + + // Check for instance ID in pre-bound attrs first (from WithAttrs) + var instanceID string + for _, a := range h.preAttrs { + if a.Key == "instance_id" { + instanceID = a.Value.String() + break + } + } + + // Then check record attrs (overrides pre-bound if present) + r.Attrs(func(a slog.Attr) bool { + if a.Key == "instance_id" { + instanceID = a.Value.String() + return false // stop iteration + } + return true + }) + + // If instance ID found, also write to per-instance log + if instanceID != "" { + h.writeToInstanceLog(instanceID, r) + } + + return nil +} + +// writeToInstanceLog writes a log record to the instance's hypeman.log file. +// Opens and closes the file for each write to avoid file handle leaks. +func (h *InstanceLogHandler) writeToInstanceLog(instanceID string, r slog.Record) { + logPath := h.logPathFunc(instanceID) + if logPath == "" { + return + } + + // Check if the instance directory exists - if not, this "id" isn't an instance ID + // (could be an ingress ID, volume ID, etc.). Skip to avoid creating orphan directories. + dir := filepath.Dir(logPath) + instanceDir := filepath.Dir(dir) // logs dir -> instance dir + if _, err := os.Stat(instanceDir); os.IsNotExist(err) { + return // not a valid instance, skip silently + } + + // Format log line: timestamp LEVEL message key=value key=value... + timestamp := r.Time.Format(time.RFC3339) + level := r.Level.String() + msg := r.Message + + // Collect attributes (excluding "instance_id" since it's implicit) + // Include both pre-bound attrs and record attrs + var attrs []string + for _, a := range h.preAttrs { + if a.Key != "instance_id" { + attrs = append(attrs, fmt.Sprintf("%s=%v", a.Key, a.Value)) + } + } + r.Attrs(func(a slog.Attr) bool { + if a.Key != "instance_id" { + attrs = append(attrs, fmt.Sprintf("%s=%v", a.Key, a.Value)) + } + return true + }) + + // Build log line + line := fmt.Sprintf("%s %s %s", timestamp, level, msg) + for _, attr := range attrs { + line += " " + attr + } + line += "\n" + + // Ensure logs directory exists (dir was already computed above) + if err := os.MkdirAll(dir, 0755); err != nil { + // Use package-level slog (not our handler) to avoid recursion. + // No "id" attr means this won't trigger writeToInstanceLog. + slog.Warn("failed to create instance log directory", "path", dir, "error", err) + return + } + + // Open, write, close (no caching = no leak) + f, err := os.OpenFile(logPath, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644) + if err != nil { + slog.Warn("failed to open instance log file", "path", logPath, "error", err) + return + } + defer f.Close() + + if _, err := f.WriteString(line); err != nil { + slog.Warn("failed to write to instance log file", "path", logPath, "error", err) + } +} + +// Enabled reports whether the handler handles records at the given level. +func (h *InstanceLogHandler) Enabled(ctx context.Context, level slog.Level) bool { + return h.Handler.Enabled(ctx, level) +} + +// WithAttrs returns a new handler with the given attributes. +// Tracks attrs locally so we can find "id" even when added via With(). +func (h *InstanceLogHandler) WithAttrs(attrs []slog.Attr) slog.Handler { + // Combine existing pre-attrs with new ones + newPreAttrs := make([]slog.Attr, len(h.preAttrs), len(h.preAttrs)+len(attrs)) + copy(newPreAttrs, h.preAttrs) + newPreAttrs = append(newPreAttrs, attrs...) + + return &InstanceLogHandler{ + Handler: h.Handler.WithAttrs(attrs), + logPathFunc: h.logPathFunc, + preAttrs: newPreAttrs, + } +} + +// WithGroup returns a new handler with the given group name. +func (h *InstanceLogHandler) WithGroup(name string) slog.Handler { + // Note: We don't track groups for "id" lookup since instance IDs + // should always be at the top level, not nested in groups. + return &InstanceLogHandler{ + Handler: h.Handler.WithGroup(name), + logPathFunc: h.logPathFunc, + preAttrs: h.preAttrs, + } +} diff --git a/lib/middleware/README.md b/lib/middleware/README.md new file mode 100644 index 00000000..5a1e5610 --- /dev/null +++ b/lib/middleware/README.md @@ -0,0 +1,21 @@ +# middleware + +HTTP middleware for the hypeman API. + +## Authentication + +JWT bearer token validation for protected endpoints. Extracts user identity and adds it to the request context. + +## Resource Resolution + +Automatically resolves user-provided identifiers (IDs, names, or prefixes) to full resource objects before handlers run. This enables: + +- **Flexible lookups**: Users can reference resources by full ID, name, or ID prefix +- **Consistent error handling**: Returns 404 for not-found, handles ambiguous matches +- **Automatic logging enrichment**: The resolved resource ID is added to the request logger + +Handlers can trust that if they're called, the resource exists and is available via `mw.GetResolvedInstance[T](ctx)` etc. + +## Observability + +OpenTelemetry instrumentation for HTTP requests, including request counts, latencies, and status codes. diff --git a/lib/middleware/resolve.go b/lib/middleware/resolve.go new file mode 100644 index 00000000..75593370 --- /dev/null +++ b/lib/middleware/resolve.go @@ -0,0 +1,197 @@ +// Package middleware provides HTTP middleware for the hypeman API. +package middleware + +import ( + "context" + "net/http" + "strings" + + "github.com/go-chi/chi/v5" + "github.com/onkernel/hypeman/lib/logger" +) + +// ResourceResolver is implemented by managers that support lookup by ID, name, or prefix. +type ResourceResolver interface { + // Resolve looks up a resource by ID, name, or ID prefix. + // Returns the resolved ID, the resource, and any error. + // Should return ErrNotFound if not found, ErrAmbiguousName if prefix matches multiple. + Resolve(ctx context.Context, idOrName string) (id string, resource any, err error) +} + +// resolvedResourceKey is the context key for storing the resolved resource. +type resolvedResourceKey struct{ resourceType string } + +// ResolvedResource holds the resolved resource ID and value. +type ResolvedResource struct { + ID string + Resource any +} + +// Resolvers holds resolvers for different resource types. +type Resolvers struct { + Instance ResourceResolver + Volume ResourceResolver + Ingress ResourceResolver + Image ResourceResolver +} + +// ErrorResponder handles resolver errors by writing HTTP responses. +type ErrorResponder func(w http.ResponseWriter, err error, lookup string) + +// ResolveResource creates middleware that resolves resource IDs before handlers run. +// It detects the resource type from the URL path and uses the appropriate resolver. +// The resolved resource is stored in context and the logger is enriched with the ID. +// +// Supported paths: +// - /instances/{id}/* -> uses Instance resolver +// - /volumes/{id}/* -> uses Volume resolver +// - /ingresses/{id}/* -> uses Ingress resolver +// - /images/{name}/* -> uses Image resolver (by name, not ID) +func ResolveResource(resolvers Resolvers, errResponder ErrorResponder) func(http.Handler) http.Handler { + return func(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + ctx := r.Context() + path := r.URL.Path + + // Determine resource type and resolver based on path + var resolver ResourceResolver + var resourceType string + var paramName string + + switch { + case strings.HasPrefix(path, "/instances/"): + resolver = resolvers.Instance + resourceType = "instance" + paramName = "id" + case strings.HasPrefix(path, "/volumes/"): + resolver = resolvers.Volume + resourceType = "volume" + paramName = "id" + case strings.HasPrefix(path, "/ingresses/"): + resolver = resolvers.Ingress + resourceType = "ingress" + paramName = "id" + case strings.HasPrefix(path, "/images/"): + resolver = resolvers.Image + resourceType = "image" + paramName = "name" + default: + // No resource to resolve (e.g., list endpoints, health) + next.ServeHTTP(w, r) + return + } + + // Skip if no resolver configured for this resource type + if resolver == nil { + next.ServeHTTP(w, r) + return + } + + // Get the ID parameter from the URL + idOrName := chi.URLParam(r, paramName) + if idOrName == "" { + // No ID in path (e.g., list or create endpoint) + next.ServeHTTP(w, r) + return + } + + // Resolve the resource + resolvedID, resource, err := resolver.Resolve(ctx, idOrName) + if err != nil { + errResponder(w, err, idOrName) + return + } + + // Store resolved resource in context + ctx = context.WithValue(ctx, resolvedResourceKey{resourceType}, ResolvedResource{ + ID: resolvedID, + Resource: resource, + }) + + // Enrich logger with resource-specific key + // Use "image_name" for images (keyed by OCI reference), "_id" for others + logKey := resourceType + "_id" + if resourceType == "image" { + logKey = "image_name" + } + log := logger.FromContext(ctx).With(logKey, resolvedID) + ctx = logger.AddToContext(ctx, log) + + next.ServeHTTP(w, r.WithContext(ctx)) + }) + } +} + +// GetResolvedInstance retrieves the resolved instance from context. +// Returns nil if not found or wrong type. +func GetResolvedInstance[T any](ctx context.Context) *T { + return getResolved[T](ctx, "instance") +} + +// GetResolvedVolume retrieves the resolved volume from context. +// Returns nil if not found or wrong type. +func GetResolvedVolume[T any](ctx context.Context) *T { + return getResolved[T](ctx, "volume") +} + +// GetResolvedIngress retrieves the resolved ingress from context. +// Returns nil if not found or wrong type. +func GetResolvedIngress[T any](ctx context.Context) *T { + return getResolved[T](ctx, "ingress") +} + +// GetResolvedImage retrieves the resolved image from context. +// Returns nil if not found or wrong type. +func GetResolvedImage[T any](ctx context.Context) *T { + return getResolved[T](ctx, "image") +} + +// GetResolvedID retrieves just the resolved ID for a resource type. +func GetResolvedID(ctx context.Context, resourceType string) string { + if resolved, ok := ctx.Value(resolvedResourceKey{resourceType}).(ResolvedResource); ok { + return resolved.ID + } + return "" +} + +// getResolved is a generic helper to extract typed resources from context. +func getResolved[T any](ctx context.Context, resourceType string) *T { + resolved, ok := ctx.Value(resolvedResourceKey{resourceType}).(ResolvedResource) + if !ok { + return nil + } + + // Handle pointer types + if typed, ok := resolved.Resource.(*T); ok { + return typed + } + + // Handle value types + if typed, ok := resolved.Resource.(T); ok { + return &typed + } + + return nil +} + +// Test helpers for setting resolved resources in context (used by tests) + +// WithResolvedInstance returns a context with the given instance set as resolved. +func WithResolvedInstance(ctx context.Context, id string, inst any) context.Context { + return context.WithValue(ctx, resolvedResourceKey{"instance"}, ResolvedResource{ID: id, Resource: inst}) +} + +// WithResolvedVolume returns a context with the given volume set as resolved. +func WithResolvedVolume(ctx context.Context, id string, vol any) context.Context { + return context.WithValue(ctx, resolvedResourceKey{"volume"}, ResolvedResource{ID: id, Resource: vol}) +} + +// WithResolvedIngress returns a context with the given ingress set as resolved. +func WithResolvedIngress(ctx context.Context, id string, ing any) context.Context { + return context.WithValue(ctx, resolvedResourceKey{"ingress"}, ResolvedResource{ID: id, Resource: ing}) +} + +// WithResolvedImage returns a context with the given image set as resolved. +func WithResolvedImage(ctx context.Context, id string, img any) context.Context { + return context.WithValue(ctx, resolvedResourceKey{"image"}, ResolvedResource{ID: id, Resource: img}) +} diff --git a/lib/network/bridge.go b/lib/network/bridge.go index 45db556e..75c2b456 100644 --- a/lib/network/bridge.go +++ b/lib/network/bridge.go @@ -430,7 +430,7 @@ func (m *manager) createTAPDevice(tapName, bridgeName string, isolated bool) err // This allows Cloud Hypervisor (running as current user) to access the TAP uid := os.Getuid() gid := os.Getgid() - + tap := &netlink.Tuntap{ LinkAttrs: netlink.LinkAttrs{ Name: tapName, @@ -535,10 +535,17 @@ func (m *manager) queryNetworkState(bridgeName string) (*Network, error) { // CleanupOrphanedTAPs removes TAP devices that aren't used by any running instance. // runningInstanceIDs is a list of instance IDs that currently have a running VMM. +// Pass nil to skip cleanup entirely (used when we couldn't determine running instances). // Returns the number of TAPs deleted. func (m *manager) CleanupOrphanedTAPs(ctx context.Context, runningInstanceIDs []string) int { log := logger.FromContext(ctx) + // If nil, skip cleanup entirely to avoid accidentally deleting TAPs for running VMs + if runningInstanceIDs == nil { + log.DebugContext(ctx, "skipping TAP cleanup (nil instance list)") + return 0 + } + // Build set of expected TAP names for running instances expectedTAPs := make(map[string]bool) for _, id := range runningInstanceIDs { @@ -578,6 +585,3 @@ func (m *manager) CleanupOrphanedTAPs(ctx context.Context, runningInstanceIDs [] return deleted } - - - diff --git a/lib/network/derive.go b/lib/network/derive.go index 7d40900b..86c3bb22 100644 --- a/lib/network/derive.go +++ b/lib/network/derive.go @@ -183,4 +183,3 @@ func fileExists(path string) bool { _, err := os.Stat(path) return err == nil } - diff --git a/lib/oapi/oapi.go b/lib/oapi/oapi.go index 690aa379..4709a156 100644 --- a/lib/oapi/oapi.go +++ b/lib/oapi/oapi.go @@ -54,6 +54,13 @@ const ( Unknown InstanceState = "Unknown" ) +// Defines values for GetInstanceLogsParamsSource. +const ( + App GetInstanceLogsParamsSource = "app" + Hypeman GetInstanceLogsParamsSource = "hypeman" + Vmm GetInstanceLogsParamsSource = "vmm" +) + // AttachVolumeRequest defines model for AttachVolumeRequest. type AttachVolumeRequest struct { // MountPath Path where volume should be mounted @@ -382,8 +389,17 @@ type GetInstanceLogsParams struct { // Follow Continue streaming new lines after initial output Follow *bool `form:"follow,omitempty" json:"follow,omitempty"` + + // Source Log source to stream: + // - app: Guest application logs (serial console output) + // - vmm: Cloud Hypervisor VMM logs (hypervisor stdout+stderr) + // - hypeman: Hypeman operations log (actions taken on this instance) + Source *GetInstanceLogsParamsSource `form:"source,omitempty" json:"source,omitempty"` } +// GetInstanceLogsParamsSource defines parameters for GetInstanceLogs. +type GetInstanceLogsParamsSource string + // CreateVolumeMultipartBody defines parameters for CreateVolume. type CreateVolumeMultipartBody struct { // Content tar.gz archive file containing the volume content @@ -1414,6 +1430,22 @@ func NewGetInstanceLogsRequest(server string, id string, params *GetInstanceLogs } + if params.Source != nil { + + if queryFrag, err := runtime.StyleParamWithLocation("form", true, "source", runtime.ParamLocationQuery, *params.Source); err != nil { + return nil, err + } else if parsed, err := url.ParseQuery(queryFrag); err != nil { + return nil, err + } else { + for k, v := range parsed { + for _, v2 := range v { + queryValues.Add(k, v2) + } + } + } + + } + queryURL.RawQuery = queryValues.Encode() } @@ -4365,6 +4397,14 @@ func (siw *ServerInterfaceWrapper) GetInstanceLogs(w http.ResponseWriter, r *htt return } + // ------------- Optional query parameter "source" ------------- + + err = runtime.BindQueryParameter("form", true, false, "source", r.URL.Query(), ¶ms.Source) + if err != nil { + siw.ErrorHandlerFunc(w, r, &InvalidParamFormatError{ParamName: "source", Err: err}) + return + } + handler := http.Handler(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { siw.Handler.GetInstanceLogs(w, r, id, params) })) @@ -6597,87 +6637,90 @@ func (sh *strictHandler) GetVolume(w http.ResponseWriter, r *http.Request, id st // Base64 encoded, gzipped, json marshaled Swagger object var swaggerSpec = []string{ - "H4sIAAAAAAAC/+xdC2/buJb+Kwfae7HOQn4kaXtbXywWmaTtZNC0QdJm9m7TzdDSsc0pRaok5cQN8t8X", - "fEiWLPmRaeI22wAF6lh8nffHw0P5OohEkgqOXKugfx2oaIwJsR/3tCbR+EywLMET/JKh0ubrVIoUpaZo", - "GyUi4/oiJXps/opRRZKmmgoe9INjosdwOUaJMLGjgBqLjMUwQLD9MA7CAK9IkjIM+kE34bobE02CMNDT", - "1HyltKR8FNyEgUQSC86mbpohyZgO+kPCFIZz0x6ZoYEoMF3atk8x3kAIhoQHN3bELxmVGAf9j2UyPhWN", - "xeBPjLSZfF8i0XiYkNFiTnCSYJ0H7/YPgZp+IHGIEnmE0MLOqBNCLKLPKDtUdBkdSCKnXT6i/KrPiEal", - "tyqsWd62zq858uzalhDGRxKVuiVpv2YJ4W3DZDJgCKYRtJi4RBkRhcBQa5QqhJiOqFYhEB5DTNQYFRih", - "/BMiwrnQoDSRGoQE5DFcUj0GYttVOZBM2ySlbeqWGoRBQq7eIB8ZxXu2GwYpMdOZdf3vR9L+2mu/+NTy", - "H9qf/iP/auu//taoXBlzlFYpPBGZpnwE9jEMhQQ9pgpma6AaE9vvbxKHQT/4t+7MmrrelLo5dzOGZq6E", - "8kPXbbtYCZGSTJulli9umfSUJjxarJnIJ+Y/EsfUEEbYceVxjRtVJrzkEyoFT5BrmBBJjbBVWTTXwdt3", - "By8vXr49C/pm5jiLbNcwOH538j7oB7u9Xs+MW1v/WOiUZaMLRb9ixa6D3de/BPML2SvWDwkmQk6tRPwY", - "0BpX1XEoZEI0MPoZ4dyMdx6EcB5svz4Pqoq1Y6eqMcEa7Vr2vMJQCUspx4WWGv4o1nUp5GcmSNzevmPj", - "4qjN2HUS37oHEAk+pKNMEvO9NzME6tU6CGvqbDgSVxRGy6wWB34fox6jBC2A2FBWDGm+MlP47pCvsMQR", - "N2BD1KgpsZigZGTaoMTbvQYt/l1SbSXq+0FM1WcwnVeosBnN6fDTXl2Je81a3LCohjX9YjTK29Q6KykW", - "sr1z5D/urGtXkyjNVGVJO/PLeZslA5QghjChUmeEwf7xh4rL2SkGplzjCKUd2WKMBjfuIIwqKYKXf6EP", - "RENkfKnRP02t113LtbuRLeAoObil3tz5lcXefAXeonGDT0q9W4wypUUCNEau6ZCihBbJtGiPkKMkGmOg", - "QzBOIZViQmOMqxKbCNY28Mt6gDXdlFsueOIqDsUO5YSySDUvRoP6kKdGAymHER2RwVRXg812ry76Zkbn", - "4zex+qWUQtaZG4m4gcS9NGU0ssrRVilGdEgjQDMCmA7QSkg0phwLc6lydUDiC+nFGTYFW00oa9DaUrhz", - "k/mW0DIeMsmYpilD90xtrauxlvIDO1JdY8OAco7yAnP23GKkBJVqjJhzgSynpWhiHX6Mg2w0Miwps+6I", - "KmXxl5cuDCmyuO8C8ErQa6U5W9hCPfA0rKkNb0wIbjOcICsrgbMos9hESIRCT5zQKlRRPiGMxheUp1mj", - "Sixk5atM2ojmBgUyEJm2jswJrDyJ3atYWx+KjMeNzKqx41ckzG3kqpxQmujMx94sMbwVnw0/Z9OJzyvF", - "4QdpEsNhjrXmBJA0OLv9owMYSpEY1KAJ5SghQU38trFY0cfAbpCCMGgbnYoJJoKDGA7/aVZQmErdy2WM", - "GT2dQwCFgdgwgfEF0Q1LK4cQpUmSQuvk1f7u7u6L+Wi987Td225vP32/3ev3zL//CcLARVkDIonGto9D", - "dYdBRz4yzG1WUAk2wRgSwukQlQbfsjyzGpOdp8/6ZBBt7+zGOHzy9Fmn02maBrmW01RQ3jDVy+LZeqLo", - "OlTcno3ZUeNvk8M97GnWoeU6ON57/2vQD7qZkl0mIsK6akB5v/R38efsgf3g/hxQ3rgXKnzu3Eqti/Ee", - "wYRvZ0ZAFQwJZXMZlDRjzH/fN5RwjAqFFNbZLODrqjD/1qgmo18xhsaMhiYjs8dwGvdtqYsw+JJhhhep", - "UNTNXssr+ScGJAwyymKwPaBliMshjv2qCnB2FpJfQpEWNjjYUZv4oIDqZmbTxs+ZcU2ZzTdNKzM+3X32", - "/B+9F9s7JeOmXD97Eqy1lMLtzsF1S7N/GhY+OUUeuwhq1MB9igSfGKuwf9j1GT/jFKfiwPNnNWGYjRHl", - "o4uYNmjn7+4hxFRipO2WfLUNBV2SpqtVsRnVFT6tIL/kkRtji0/W1KPLd/fkTVh+r4rXM06/ZFhC9NXZ", - "341++/Lf6vgff25/eXN29q/J698O3tJ/nbHjd9+UaFieePuu2bOlWyxqvGEla7auehwRHTUAn7FQegHX", - "/BOzlUxM5w7sEw4D7J/zNryhGiVhfTgPSEo7npmdSCTnAbTwikTa9QLBwQwFYyQxyi3T+dglW0zn63yH", - "ejM/RjzlJKERSM/kAVEYm9FUNohFQijfOufn3I8FOSEKMuVkHENEUp1JNBKBKJNsCgNJIuPTXH56NnkI", - "1yRNb7bOuR4TDXilpaEgJVIrs003EDSfwQrar6oD7w04dc0xhglhGSqILKPOeRE/YrMEM4gmcoS6U+zL", - "Ld7vnPOKQjYzpUkxUyF1JdXwvBc2yBFMOyNIRpVGDkUSiiqrvNDKE0XPexXzf957vno7WujQEvWz2l0/", - "W8mVcg37cApsp3bO+GKsdbr6sMT6G2cj8Ov798eGDeb/U8gHmvGiEHFLcDYFYvbFqOByjBw0s5jE5+a2", - "Gk5cwsBJd02C3rvGphtTq+l4aSeG929OQaNMKHf+uxUZdg7N9h3B5kKoUplRRUpgb//o5VZnjcMhy9ti", - "/Uvk+L6gcC5rk+cya37E9Zhlogx/Qzg8CA2c8hY6A1od4x9eCQnMOZiZXffhg8JqUsuKSkg4PPDxmU1z", - "0z73Xv082MpHTOc9RR9OCnxHiqXY+F6x92LImV3aYc/570YxMruDr40eVtdqLC3fv3jXRpjRMQ0+d2JD", - "8WJXsNz8GzhubV7w+VTz7Wy7nKM2kzWrxkz2945Adm+HQO7nZKh+zkPUheIkVWPRQGqepyeQtwG8okpX", - "MENdQN4N1N1M/VSp6vDdedGSdPd650N/AbxBa//D4cGOT6dXp9Ffn5AXz6+uiH7xjF6qF1+TgRz9uUse", - "yNnU0tOkbz0S8hBjvROhJtUq+xl3BIDxXz4ECgOaNsheKTriGMPhMZA4NtGgvCHNh68KffvFTmf72fPO", - "dq/X2e6tsz1PSLRk7qO9/fUn7+24DUufDPpR3MfhN6QHvNhcQCDskkwVnOch+zxwGKEEDkpK6cP6WgnK", - "+lnbXztam5PCysOz2xyWreU97KnsAtd/ak9sb+/3ny70+yulajbTuBqYOSM6tY3zXhe3SVwhRCJjMf93", - "DQNjeQ6qYewRpULtNMW1pQo+8M9cXPIq6S5/Yez3S4ZyCmdHR5Vsl8RhpmxmbA3CRZoulINIbyWGnRXh", - "d+VqSmejmzgPnfeEJf9656ef5a15fnzjtG6NLXpZ72rU5I+d0tjttztJjftGM8CPDoNMQ1EQYVRun4ks", - "hl+nKcoJVUKC2TNM0CLik4xzykdmBBszIvOETUG675d3PiZG/fK+qf1reY/TcaZjccltHzXONJi/7JIN", - "CR4OLR/CaXIf3grbx680NO5/Dle55oTHg2m9+TwGa7nduoHnWkiM7WTeLPvwqjDFwpi98bYU+o/OQ/hj", - "PnuEueVgvM9bemkFYeC5HoSBY2EQBjlnzEdHof1kFx+EgV9ISW9m1uTUsw67ndonecXj3PEeVdpYWpRJ", - "aXBtqTG0MEn1NE8w59azdTtz2SsGbDp/vestQe/FXSQlPyzNQv4/KRsoe6h8kpW+qSbThVv/iybGHh7M", - "Y1u33/HlslW0OneIrHTbHeM1HiEvKct19bHmWZ52G2Xz54S3KMVtLLkaY8ly3O5+Vou7agu3YJN9YSVU", - "oqy0ksWyceHpG+uWqcoLlv8iyzwiXZ3Hcs4QUpTtQiVyOGs86KWk9tjJM8gx1rDgPw2maM67LUfNR+Sq", - "mMHiWaJgrvzL0THL9NgCsK0OnOR1GXSYD2GX0anC62YIvH5Bd65VdWEsq/DOAVCj4Xn/s8SjLbKtOeWc", - "zREuLyI3rgujTFI9PTUBwanhAIlEuZc5NbSRwhJhv55NbnO5Nze2QGco6uS8Ro6SRrB3fGi1JCGcjIzI", - "zo6A0SFG04ihT8XVQIQtCn23f9h2Zwj5zt1mcqi2DDGtE8LN+EEYTFAqN2+vs9Oxpb0iRU5SGvSD3c52", - "x+xkDRssid1xUVXic6PGDm0kO4zt2rWvOzGcVangyvFmp9dzZThce89KZpVY3T+VOxp20XVV7PUzWBbO", - "hQ3DBpcZcAt12FVlSULk1NBuv4VojNFn+6hr8ataSJCBEIeuyTdStN4BmUXTdfxdozSHNn75N2HwpLd9", - "Zxx2JXUN037gJNNjIelXjM2kT+9QrAsnPeQaJScMFMoJSl8gVTbCoP+xan4fP918KsvdsmvGq1SoBlmX", - "LoYEzjGg0r+IeHpnJDZcPbmpOiHjcW9qmrZzZyvwCtbAZJtrHOTVCG5XRdSUR1tOuzYg6F9InJ8QfDeN", - "ftJ7sgGNnivoe0CWdJwxZu8n+GqUWQlR2Z92rw34vnHBjaHb6let7cB+n1tbSiRJzK5T2RXMyejkTRt5", - "JGKDThzrfPLBPPXw0e1NioqBikWFJcbNQ4BPNWt70oDt7ayOlEc1WUNNnHRzxQgXooVvkL+DsLObd3/f", - "eeWPQv6+88odhvx9d292Ae9+lKW3Kdec15Y/Kt9K5XuNPtjPmGZdkzvXX4X2ilYbAXy+gOo2kK9Y4CPq", - "Wwf1ldm1FPgVtWz3CP2ql3PXAn93J+BC2Zq4bR/lCfafDPK9uP9J9wUfMhppaOca6fbqNkVowxlhtm43", - "z9Xb67C+LIdyyBQ+JNPzqS9aaFzZ/3avabwONiwMcik6yFX38CD0FVeuTiqVOKRXzYHfpnjuGiP6dWwc", - "Jfp5K6F6Izq9lwzoKBOZKpfF2AI7VLNLbRUH/NDw6yw8L0SwP7CW9jYZOjYOUB/1/p6g87xAnfN2Bxir", - "wHPeajPg2dc43Ao95yt8RM9roecSu5aj56Le5D7hc/XtKBvHz7m+NTHcH/D9jAj6gaFSwn2Oe1YlVfVx", - "awPUWQ3r8tjvdePwAGyhz6Lk5f3AUj/55nFpfnfhIeaQbMGgfQ9PjgRnsWYxFPzR9KG3Wd+3eQj4kFXs", - "dfmuUDPYso6oy8SoDLvmK1slkmR2rQIiwZVgCKYXEAWndoHtU+QaXk4MdZ1zfoI6k1zZ8hdGlIa3wChH", - "BS3DNikYwxgGU/jDrOoPKNR5y9714SD8a2rY9JybHpRnqEDZtVA+Ao6XfkA6hD+GgjFxactJ/nDXfhba", - "zhtD63eyn3Bxsa6jRQuQlnHu6hTad3HYeW3x8mxi/56Q2VRFUcx2r7F67bqeLrI8bWQpGWp7D4JqShiI", - "TLt3jzQtxHG+eSmLKrZWuxGNV7qLRpfabn1Vg5rnax2Mi5EnDFqnpy+3Hh3GmjHJsqywdGvhnoENbsNX", - "+dp6uUbkfuIa/PRhKy+H/s5quPk8dGkV1F7U4PFgamU7qzN/SAbiFXpGmXXTnq5GG8mfLbQRX57+09vI", - "TD9+ciuJhLQ36VV+c+rhVO6U4GbJ3Fv2UsvsskiYb3nOjo62FhmNu4y90GTk417IF9H99DHF3vN5eNbi", - "Lm6SgoBlmaKuabTMHkT6aA7+wtdj8HiQwcOmwwpqWiNJIhxmzN5vjO2l3ia78LdWu9fuw+GqpOrsNwp+", - "mBSAv1yyapqcwAdhlJ6mGN11tM3bpCju/zzQ4lb74mlPgt1jlNPDzVGg/AscP4923/1JYNMvmax1DrhR", - "28qvev4wtrXpyOfXkBe1lfnxUMzcaVpOiRZzGLD0SoaF9RD+7QwbqYbwruUWtRA5BY/HxmtUQpSYlTv4", - "ptu+CojN1LvmHTjN0lRIrUBfCkhEjMq+1eK303dvYSDiaR+KfhzcOwm8wvnL5P7t42YPRb+i6XtkK4zM", - "9mQoZFIaIO+ZSmynIs2YfVeGLTP1PHbBioAmsjP6CkRGYzrBhhOZ8u8X3GtJx7wjD4MkJ69ryLOvEKgO", - "Ov9m92ItVXlUaYQhZZi/zJbykeWt51c+ROm1CgPKiZyu+06F+R9tmBRh9SH+ZsMRuaJJlhQvR379C7T8", - "ez/tK/7tDxfQYaFTeBUhxsqWMm992+87hIU4Gy5db7TWJ/emCyP8d6zzgZb/2QEwIjYRP1dyLQQwIke4", - "9dNU03tbmxXTHx7MldI/wAqlSa59M5yxZk3SehuMNXH/fdQjFZvPzVYjnf04mLj0JpgHWBI/KWDmojKo", - "H0sFe5sLCZsufzp7wDmU15hD6lLpkx3AjNikMG9ERBjEOEEmUvsqItc2CINMMv9ilX7X/XDHWChtX/sb", - "3Hy6+b8AAAD//zUiAl4YdQAA", + "H4sIAAAAAAAC/+xdC3PTyJb+K6e0d2qdXfmRBLjgW1tbmQSYTBFIEcjsXcyGtnRs99DqFt0tJ4bKf9/q", + "h2TJkh8ZEkMuVFFFbPXrvL8+fVr+EkQiSQVHrlXQ/xKoaIIJsX8eaE2iyblgWYKv8VOGSpuvUylSlJqi", + "bZSIjOuLlOiJ+RSjiiRNNRU86AenRE/gcoISYWpHATURGYthiGD7YRyEAV6RJGUY9INuwnU3JpoEYaBn", + "qflKaUn5OLgOA4kkFpzN3DQjkjEd9EeEKQwXpj0xQwNRYLq0bZ9ivKEQDAkPru2InzIqMQ7678pkvC8a", + "i+GfGGkz+aFEovE4IePlnOAkwToPXh0eAzX9QOIIJfIIoYWdcSeEWEQfUXao6DI6lETOunxM+VWfEY1K", + "71RYs7ptnV8L5Nm1rSCMjyUqdUPSfssSwtuGyWTIEEwjaDFxiTIiCoGh1ihVCDEdU61CIDyGmKgJKjBC", + "+QdEhHOhQWkiNQgJyGO4pHoCxLarciCZtUlK29QtNQiDhFy9QD42ivdoPwxSYqYz6/q/d6T9udd+8r7l", + "/2i//4/8q53//lujcmXMUVql8LXINOVjsI9hJCToCVUwXwPVmNh+f5M4CvrBv3Xn1tT1ptTNuZsxNHMl", + "lB+7brvFSoiUZNYstXxxq6SnNOHRcs1EPjX/kTimhjDCTiuPa9yoMuEpn1IpeIJcw5RIaoStyqL5Erx8", + "dfT04unL86BvZo6zyHYNg9NXr98E/WC/1+uZcWvrnwidsmx8oehnrNh1sP/812BxIQfF+iHBRMiZlYgf", + "A1qTqjqOhEyIBkY/IgzMeIMghEGw+3wQVBVrz05VY4I12o3seY2hEpZSjkstNfxerOtSyI9MkLi9e8vG", + "xVGbseskvnQPIBJ8RMeZJOZ7b2YI1Kt1ENbU2XAkriiMllktDvwxQT1BCVoAsaGsGNJ8Zabw3SFfYYkj", + "bsCGqFFTYjFFycisQYl3ew1a/Iek2krU94OYqo9gOq9RYTOa0+GHvboS95q1uGFRDWv61WiUt6lNVlIs", + "ZHfvxP+5t6ldTaM0U5Ul7S0u52WWDFGCGMGUSp0RBoenbysuZ68YmHKNY5R2ZIsxGty4gzCqpAhe/oU+", + "EA2R8aVG/zS1Xncj1+5GtoCj5OBWenPnV5Z78zV4i8YNPin1bjHKlBYJ0Bi5piOKElok06I9Ro6SaIyB", + "jsA4hVSKKY0xrkpsKljbwC/rATZ0U2654ImrOBQ7lBPKMtW8GA/rQ54ZDaQcxnRMhjNdDTa7vbromxmd", + "j9/E6qdSCllnbiTiBhIP0pTRyCpHW6UY0RGNAM0IYDpAKyHRhHIszKXK1SGJL6QXZ9gUbDWhrEFrS+HO", + "TeZbQst4yCRjmqYM3TO1s6nGWsqP7Eh1jQ0DyjnKC8zZc4ORElSqMWIuBLKclqKJdfgxDrPx2LCkzLoT", + "qpTFX166MKLI4r4LwGtBr5XmfGFL9cDTsKE2vDAhuM1wiqysBM6izGITIREKPXFCq1BF+ZQwGl9QnmaN", + "KrGUlc8yaSOaGxTIUGTaOjInsPIkdq9ibX0kMh43MqvGjt+QMLeRq3JCaaIzH3uzxPBWfDT8nE8nPq4V", + "hx+kSQzHOdZaEEDS4OwOT45gJEViUIMmlKOEBDXx28ZiRe8Cu0EKwqBtdCommAgOYjT6h1lBYSp1L5cx", + "ZvR0AQEUBmLDBMYXRDcsrRxClCZJCq3Xzw739/efLEbrvYft3m579+Gb3V6/Z/79bxAGLsoaEEk0tn0c", + "qjsMOvaRYWGzgkqwKcaQEE5HqDT4luWZ1YTsPXzUJ8Nod28/xtGDh486nU7TNMi1nKWC8oapnhbPNhNF", + "16Hi9nzMjpp8nRzuYE+zCS1fgtODN78F/aCbKdllIiKsq4aU90ufi4/zB/YP93FIeeNeqPC5Cyu1LsZ7", + "BBO+nRkBVTAilC1kUNKMMf9931DCMSoUUlhns4Sv68L8S6OajH7GGBozGpqMzR7DadzXpS7C4FOGGV6k", + "QlE3ey2v5J8YkDDMKIvB9oCWIS6HOParKsDZW0p+CUVa2OBgR23iowKqm5lNGz9nxjVlNt80q8z4cP/R", + "47/3nuzulYybcv3oQbDRUgq3uwDXLc3+aVj45BR57CKoUQP3VyT41FiF/WDXZ/yMU5yKA8+f1YRhNkaU", + "jy9i2qCdf7iHEFOJkbZb8vU2FHRJmq5XxWZUV/i0gvySR26MLT5ZU48u39yTN2H5gypezzj9lGEJ0Vdn", + "fzX+/dP/qNO//7n76cX5+T+nz38/ekn/ec5OX31VomF14u2bZs9WbrGo8YaVrNmm6nFCdNQAfCZC6SVc", + "80/MVjIxnTtwSDgMsT/gbXhBNUrC+jAISEo7npmdSCSDAFp4RSLteoHgYIaCCZIY5Y7pfOqSLabzl3yH", + "er04RjzjJKERSM/kIVEYm9FUNoxFQijfGfAB92NBToiCTDkZxxCRVGcSjUQgyiSbwVCSyPg0l5+eTx7C", + "F5Km1zsDridEA15paShIidTKbNMNBM1nsIL2q+rAGwNOXXOMYUpYhgoiy6gBL+JHbJZgBtFEjlF3in25", + "xfudAa8oZDNTmhQzFVJXUg2Pe2GDHMG0M4JkVGnkUCShqLLKC608UfS4VzH/x73H67ejhQ6tUD+r3fWz", + "lVwpN7APp8B2aueMLyZap+sPS6y/cTYCv715c2rYYP4/g3ygOS8KEbcEZzMgZl+MCi4nyEEzi0l8bm6n", + "4cQlDJx0NyTojWtsujG1no6ndmJ48+IMNMqEcue/W5Fh58hs3xFsLoQqlRlVpAQODk+e7nQ2OByyvC3W", + "v0KObwoKF7I2eS6z5kdcj3kmyvA3hOOj0MApb6FzoNUx/uGZkMCcg5nbdR/eKqwmtayohITjIx+f2Sw3", + "7YH36oNgJx8xXfQUfXhd4DtSLMXG94q9F0PO7dIOO+B/GMXI7A6+NnpYXauxtHz/4l0bYUbHNPjciQ3F", + "y13BavNv4Li1ecEXU803s+1yjtpM1qwac9nfOQLZvxkCuZuTofo5D1EXipNUTUQDqXmenkDeBvCKKl3B", + "DHUBeTdQdzP1U6Wqw3fnRSvS3ZudD/0F8Aatw7fHR3s+nV6dRn9+QJ48vroi+skjeqmefE6GcvznPrkn", + "Z1MrT5O+9kjIQ4zNToSaVKvsZ9wRAMZ/+RAoDGjaIHul6JhjDMenQOLYRIPyhjQfvir03Sd7nd1Hjzu7", + "vV5nt7fJ9jwh0Yq5Tw4ON5+8t+c2LH0y7EdxH0dfkR7wYnMBgbBLMlMwyEP2IHAYoQQOSkrpw/pGCcr6", + "WdtfO1pbkMLaw7ObHJZt5D3sqewS139mT2xv7vcfLvX7a6VqNtO4Hpg5IzqzjfNeFzdJXCFEImMx/3cN", + "Q2N5Dqph7BGlQu00xbWlCt7yj1xc8irpLn9h7PdThnIG5ycnlWyXxFGmbGZsA8JFmi6Vg0hvJIa9NeF3", + "7WpKZ6PbOA9d9IQl/3rrp5/lrXl+fOO0boMtelnvatTkj53S2O23O0mN+0YzwI8Ow0xDURBhVO6QiSyG", + "32YpyilVQoLZM0zRIuLXGeeUj80INmZE5gmbgXTfr+58Soz65X1T+2l1j7NJpmNxyW0fNck0mE92yYYE", + "D4dWD+E0uQ8vhe3jVxoa97+Aq1xzwuPhrN58EYO13G7dwHMtJMZ2Mm+WfXhWmGJhzN54Wwr9n85D+GM+", + "e4S542C8z1t6aQVh4LkehIFjYRAGOWfMn45C+5ddfBAGfiElvZlbk1PPOux2ap/kFY8Lx3tUaWNpUSal", + "wbWlxtDCJNWzPMGcW8/OzczloBiw6fz1trcEvSe3kZR8uzIL+S9SNlD2UPkka31TTaZLt/4XTYw9PlrE", + "tm6/48tlq2h14RBZ6bY7xms8Ql5RluvqY82zPO02zhbPCW9QittYcjXBkuW43f28FnfdFm7JJvvCSqhE", + "WWkly2XjwtNX1i1TlRcs/0WWeUS6Po/lnCGkKNuFSuRw1njQS0ntsZNnkGOsYcF/GUzRnHdbjZpPyFUx", + "g8WzRMFC+ZejY57psQVgOx14nddl0FE+hF1GpwqvmyHw5gXduVbVhbGqwjsHQI2G5/3PCo+2zLYWlHM+", + "R7i6iNy4LowySfXszAQEp4ZDJBLlQebU0EYKS4T9ej65zeVeX9sCnZGok/McOUoawcHpsdWShHAyNiI7", + "PwFGRxjNIoY+FVcDEbYo9NXhcdudIeQ7d5vJodoyxLROCDfjB2EwRancvL3OXseW9ooUOUlp0A/2O7sd", + "s5M1bLAkdidFVYnPjRo7tJHsOLZr177uxHBWpYIrx5u9Xs+V4XDtPSuZV2J1/1TuaNhF13Wx189gWbgQ", + "NgwbXGbALdRhV5UlCZEzQ7v9FqIJRh/to67Fr2opQQZCHLsmX0nRZgdkFk3X8XeN0hza+OVfh8GD3u6t", + "cdiV1DVM+5aTTE+EpJ8xNpM+vEWxLp30mGuUnDBQKKcofYFU2QiD/ruq+b17f/2+LHfLrjmvUqEaZF26", + "GBI4x4BK/yri2a2R2HD15LrqhIzHva5p2t6trcArWAOTba5xmFcjuF0VUTMe7Tjt2oKgfyVxfkLwzTT6", + "Qe/BFjR6oaDvHlnSacaYvZ/gq1HmJURlf9r9YsD3tQtuDN1Wv2ptR/b73NpSIklidp3KrmBBRq9ftJFH", + "IjboxLHOJx/MUw8f3d6kqBioWFRYYtwiBHhfs7YHDdjezupI+akmG6iJk26uGOFStPAV8ncQdn7z7pe9", + "Z/4o5Je9Z+4w5Jf9g/kFvLtRlt62XHNeW/5T+dYq33P0wX7ONOua3Ln+OrRXtNoK4PMFVDeBfMUCf6K+", + "TVBfmV0rgV9Ry3aH0K96OXcj8Hd7Ai6UrYnb9lGeYP/BIN+Tu5/0UPARo5GGdq6Rbq9uU4Q2nBFm63bz", + "XL29DuvLciiHTOF9Mj2f+qKFxpX9b/cLjTfBhoVBrkQHueoeH4W+4srVSaUSR/SqOfDbFM9tY0S/jq2j", + "RD9vJVRvRacPkiEdZyJT5bIYW2CHan6preKA7xt+nYfnpQj2O9bS3jZDx9YB6k+9vyPovChQ57zdAcY6", + "8Jy32g549jUON0LP+Qp/oueN0HOJXavRc1Fvcpfwufp2lK3j51zfmhjuD/h+RAR9z1Ap4T7HPa+Sqvq4", + "jQHqvIZ1dez3unF8BLbQZ1ny8m5gqZ98+7g0v7twH3NItmDQvocnR4LzWLMcCn5v+tDbru/bPgS8zyr2", + "vHxXqBlsWUfUZWJchl2Lla0SSTK/VgGmNRAFZ3Zh7TPkGp5ODVWdAc8vFn1QIpMRfoBCUUELUMgw0nA5", + "odHEjGO/s+PbUswPJE0/FBfodvrw3F7mKXHXTd5SKClhEAmuBHMljR+mSfKhXy9YOD85sZ1sm4krTfjQ", + "h7xIobAxZVoN+IC/Rp1JriwVjCgNL4FRjgpaRuBSMIYxDGfwwfCzRN+OvaVkRnS3Y9hswE0PyjNUnkrK", + "x8Dx0g9IR/BhJBgTl7YQ5oO7sLTU6l8YKX0jyw+Xlxk7WrQAaRnnLn2hfYuIndeWXc8n9m84mU9VlPPs", + "9hrr7r7UE12Wp40sJSNtb3BQbfRDZNq9NaVpIY7zzUtZWmtWf8HLGJymL6gySdNN1dcv02rxNElW6DC0", + "JvMvlY5Fpv9T6RiluwvstXuZckOLRO6DJh/dzdXKfSlXaNvEKkdhM6sCdz8/r891n6ZJEoSBX09Dve0G", + "kUTjle6icSttx9aqT10csL4fM5KxHaF1dvZ052fM2BCWWJZVnb1nYEPk8IXetmSycfP22jX44ZFLXhH/", + "jdVw+0cRpVVQe1eHx8OZle38qsF9MhCv0HPKbLzzdDXaSP5sqY34Gwo/vI3M9eMHt5JISPsyBZVfnrs/", + "xVulHUfJ3Fv2XtP8vlCY73rPT052lhmNu4+/1GTkz+2wr6P84WOKvep1/6zF3d0lBQGrkoVd02iVPYj0", + "pzn4O38/g8e9DB42I1pQ0xpLEuEoY/aKa2zvdTfZhb+43P3i/jhel1ef/0zFd5NL8feL1k2TE3gvjNLT", + "FKO7kbh9mxTFFbB7Wt9s3z3uSbB7jPIJQXMUKP8Iy4+j3bd/GNz0YzYbHQVv1bby277fjW1tO/L5NeR1", + "jWV+3Bczd5qWU6LFAgYsvZVjaUmMf0HHVgpivGu5QTlMTsHPyoENimFKzModfNOFbwXEHnm45h04y9JU", + "SK1AXwpIRIzKHkH8fvbqJQxFPOtD0Y+Dey2FVzj/PgH/Anqzh6Kf0fQ9sUVmZnsyEjIpDZD3TCW2U5Fm", + "zL4uxVYaex67YEVAE9kZfwYiowmdYsPRVvknLO60qmfRkYdBkpPXNeTZt0hUB118uX+xlqo8qjTCiDLM", + "32dM+djy1vMrH6L0Zo0h5UTONn2txuLvdkyLsHoff7bjhFzRJEuK92M//xVa/tWv9lce7G9X0FGhU3gV", + "IcbKHljtfN1PfISFOBvu3W+13Cv3pksj/Dcs9YKW/+UJMCI2ET9Xci0EMCLHuPPDXKjwtja/T3F8tHCb", + "4h4WqU1z7ZvjjA3L0jbbYGyI+++iJK3YfG63IO38+8HEpZcB3cNbEdMCZi6rhPu+VLC3vZCw7Qq483uc", + "Q3mOOaQuVb/ZAcyITQrzQkSEQYxTZCK1b6NybYMwyCTz79bpd91vt0yE0vbNz8H1++v/DwAA///8wcL2", + "G3cAAA==", } // GetSwagger returns the content of the embedded swagger specification file diff --git a/lib/paths/paths.go b/lib/paths/paths.go index c9e6596a..65e66a97 100644 --- a/lib/paths/paths.go +++ b/lib/paths/paths.go @@ -160,9 +160,19 @@ func (p *Paths) InstanceLogs(id string) string { return filepath.Join(p.InstanceDir(id), "logs") } -// InstanceConsoleLog returns the path to instance console log file. -func (p *Paths) InstanceConsoleLog(id string) string { - return filepath.Join(p.InstanceLogs(id), "console.log") +// InstanceAppLog returns the path to instance application log (guest serial console). +func (p *Paths) InstanceAppLog(id string) string { + return filepath.Join(p.InstanceLogs(id), "app.log") +} + +// InstanceVMMLog returns the path to instance VMM log (Cloud Hypervisor stdout+stderr). +func (p *Paths) InstanceVMMLog(id string) string { + return filepath.Join(p.InstanceLogs(id), "vmm.log") +} + +// InstanceHypemanLog returns the path to instance hypeman operations log. +func (p *Paths) InstanceHypemanLog(id string) string { + return filepath.Join(p.InstanceLogs(id), "hypeman.log") } // InstanceSnapshots returns the path to instance snapshots directory. diff --git a/lib/providers/providers.go b/lib/providers/providers.go index f6046ff7..62523063 100644 --- a/lib/providers/providers.go +++ b/lib/providers/providers.go @@ -21,11 +21,21 @@ import ( "go.opentelemetry.io/otel" ) -// ProvideLogger provides a structured logger with subsystem-specific levels -func ProvideLogger() *slog.Logger { +// ProvideLogger provides a structured logger with subsystem-specific levels. +// Wraps with InstanceLogHandler to automatically write logs with "id" attribute +// to per-instance hypeman.log files. +func ProvideLogger(p *paths.Paths) *slog.Logger { cfg := logger.NewConfig() otelHandler := hypemanotel.GetGlobalLogHandler() - return logger.NewSubsystemLogger(logger.SubsystemAPI, cfg, otelHandler) + baseLogger := logger.NewSubsystemLogger(logger.SubsystemAPI, cfg, otelHandler) + + // Wrap the handler with instance log handler for per-instance logging + logPathFunc := func(id string) string { + return p.InstanceHypemanLog(id) + } + instanceHandler := logger.NewInstanceLogHandler(baseLogger.Handler(), logPathFunc) + + return slog.New(instanceHandler) } // ProvideContext provides a context with logger attached diff --git a/lib/vmm/client.go b/lib/vmm/client.go index e7da17a2..322dac88 100644 --- a/lib/vmm/client.go +++ b/lib/vmm/client.go @@ -117,33 +117,29 @@ func StartProcessWithArgs(ctx context.Context, p *paths.Paths, version CHVersion Setpgid: true, // Create new process group } - // Redirect stdout/stderr to log files (process won't block on I/O) + // Redirect stdout/stderr to combined VMM log file (process won't block on I/O) instanceDir := filepath.Dir(socketPath) - stdoutFile, err := os.OpenFile( - filepath.Join(instanceDir, "ch-stdout.log"), - os.O_CREATE|os.O_WRONLY|os.O_APPEND, - 0644, - ) - if err != nil { - return 0, fmt.Errorf("create stdout log: %w", err) + logsDir := filepath.Join(instanceDir, "logs") + if err := os.MkdirAll(logsDir, 0755); err != nil { + return 0, fmt.Errorf("create logs directory: %w", err) } - // Note: These defers close the parent's file descriptors after cmd.Start(). - // The child process receives duplicated file descriptors during fork/exec, - // so it can continue writing to the log files even after we close them here. - defer stdoutFile.Close() - stderrFile, err := os.OpenFile( - filepath.Join(instanceDir, "ch-stderr.log"), + vmmLogFile, err := os.OpenFile( + filepath.Join(logsDir, "vmm.log"), os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644, ) if err != nil { - return 0, fmt.Errorf("create stderr log: %w", err) + return 0, fmt.Errorf("create vmm log: %w", err) } - defer stderrFile.Close() - - cmd.Stdout = stdoutFile - cmd.Stderr = stderrFile + // Note: This defer closes the parent's file descriptor after cmd.Start(). + // The child process receives a duplicated file descriptor during fork/exec, + // so it can continue writing to the log file even after we close it here. + defer vmmLogFile.Close() + + // Both stdout and stderr go to the same file + cmd.Stdout = vmmLogFile + cmd.Stderr = vmmLogFile if err := cmd.Start(); err != nil { return 0, fmt.Errorf("start cloud-hypervisor: %w", err) diff --git a/lib/vmm/client_test.go b/lib/vmm/client_test.go index 2bde9a2a..e88cf609 100644 --- a/lib/vmm/client_test.go +++ b/lib/vmm/client_test.go @@ -161,15 +161,15 @@ func TestStartProcessCreatesLogFiles(t *testing.T) { require.NoError(t, err) assert.Greater(t, pid, 0) - // Verify log files exist - they are created and accessible by the daemon - stdoutLog := filepath.Join(tmpDir, "ch-stdout.log") - stderrLog := filepath.Join(tmpDir, "ch-stderr.log") + // Verify logs directory and vmm.log file exist + logsDir := filepath.Join(tmpDir, "logs") + vmmLog := filepath.Join(logsDir, "vmm.log") - _, err = os.Stat(stdoutLog) - require.NoError(t, err, "stdout log should exist") + _, err = os.Stat(logsDir) + require.NoError(t, err, "logs directory should exist") - _, err = os.Stat(stderrLog) - require.NoError(t, err, "stderr log should exist") + _, err = os.Stat(vmmLog) + require.NoError(t, err, "vmm.log should exist") // Verify the daemon is running and responsive client, err := NewVMM(socketPath) @@ -179,16 +179,13 @@ func TestStartProcessCreatesLogFiles(t *testing.T) { require.NoError(t, err) assert.Equal(t, 200, pingResp.StatusCode()) - // Read log files - with verbose mode, Cloud Hypervisor writes to logs - stdoutContent, err := os.ReadFile(stdoutLog) - require.NoError(t, err) - stderrContent, err := os.ReadFile(stderrLog) + // Read log file - with verbose mode, Cloud Hypervisor writes to logs + vmmContent, err := os.ReadFile(vmmLog) require.NoError(t, err) // Verify that logs contain output (proves daemon can write after parent closed files) - totalLogSize := len(stdoutContent) + len(stderrContent) - assert.Greater(t, totalLogSize, 0, - "Cloud Hypervisor daemon should write logs even after parent closed the file descriptors") + assert.Greater(t, len(vmmContent), 0, + "Cloud Hypervisor daemon should write logs even after parent closed the file descriptor") // Cleanup client.ShutdownVMMWithResponse(ctx) diff --git a/openapi.yaml b/openapi.yaml index 6984a1ff..19e39ee1 100644 --- a/openapi.yaml +++ b/openapi.yaml @@ -930,7 +930,12 @@ paths: get: summary: Stream instance logs (SSE) description: | - Streams instance console logs as Server-Sent Events. + Streams instance logs as Server-Sent Events. + Use the `source` parameter to select which log to stream: + - `app` (default): Guest application logs (serial console) + - `vmm`: Cloud Hypervisor VMM logs + - `hypeman`: Hypeman operations log + Returns the last N lines (controlled by `tail` parameter), then optionally continues streaming new lines if `follow=true`. operationId: getInstanceLogs @@ -957,6 +962,18 @@ paths: type: boolean default: false description: Continue streaming new lines after initial output + - name: source + in: query + required: false + schema: + type: string + enum: [app, vmm, hypeman] + default: app + description: | + Log source to stream: + - app: Guest application logs (serial console output) + - vmm: Cloud Hypervisor VMM logs (hypervisor stdout+stderr) + - hypeman: Hypeman operations log (actions taken on this instance) responses: 200: description: Log stream (SSE)