Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions server/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,18 @@ curl http://localhost:10001/recording/stop -d {}
curl http://localhost:10001/recording/download --output recording.mp4
```

#### Additional routes

```bash
# Screenshot

# | GET /screenshot
curl http://localhost:10001/screenshot -o screenshot.png

# | POST /screenshot
curl -X POST -H "Content-Type: application/json" --data '{"x":0,"y":0,"width":200,"height":200}' -o screenshot_region.png http://localhost:10001/screenshot
```

### ⚙️ Configuration

Configure the server using environment variables:
Expand Down
12 changes: 12 additions & 0 deletions server/cmd/api/api/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ type ApiService struct {
// Filesystem watch management
watchMu sync.RWMutex
watches map[string]*fsWatch
// Server start time for health endpoint
startTime time.Time
}

var _ oapi.StrictServerInterface = (*ApiService)(nil)
Expand All @@ -39,6 +41,7 @@ func New(recordManager recorder.RecordManager, factory recorder.FFmpegRecorderFa
factory: factory,
defaultRecorderID: "default",
watches: make(map[string]*fsWatch),
startTime: time.Now(),
}, nil
}

Expand Down Expand Up @@ -235,3 +238,12 @@ func (s *ApiService) ListRecorders(ctx context.Context, _ oapi.ListRecordersRequ
func (s *ApiService) Shutdown(ctx context.Context) error {
return s.recordManager.StopAll(ctx)
}

// GetHealth implements the health check endpoint
func (s *ApiService) GetHealth(ctx context.Context, _ oapi.GetHealthRequestObject) (oapi.GetHealthResponseObject, error) {
uptimeSec := int(time.Since(s.startTime).Seconds())
return oapi.GetHealth200JSONResponse{
Status: oapi.Ok,
UptimeSec: uptimeSec,
}, nil
}
195 changes: 195 additions & 0 deletions server/cmd/api/api/screenshot.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
package api

import (
"bytes"
"context"
"fmt"
"io"
"os"
"os/exec"
"strconv"
"strings"

"github.com/onkernel/kernel-images/server/lib/logger"
oapi "github.com/onkernel/kernel-images/server/lib/oapi"
)

// getScreenDimensions uses xdpyinfo to get the screen dimensions
func getScreenDimensions(display string) (width, height int, err error) {
cmd := exec.Command("xdpyinfo")
cmd.Env = append(os.Environ(), fmt.Sprintf("DISPLAY=%s", display))

output, err := cmd.Output()
if err != nil {
// Fallback to default dimensions if xdpyinfo fails
return 1024, 768, nil
}
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: Screen Dimensions Fallback Hides Errors

The getScreenDimensions function returns a nil error even when xdpyinfo fails, causing it to silently use fallback dimensions. This prevents captureScreenshot from logging a warning and leads to incorrect bounds validation, potentially rejecting valid screenshot regions or allowing invalid ones that cause ffmpeg to fail.

Fix in Cursor Fix in Web


lines := strings.Split(string(output), "\n")
for _, line := range lines {
if strings.Contains(line, "dimensions:") {
// Parse line like " dimensions: 1920x1080 pixels (508x285 millimeters)"
parts := strings.Fields(line)
if len(parts) >= 2 {
dims := strings.Split(parts[1], "x")
if len(dims) == 2 {
w, _ := strconv.Atoi(dims[0])
h, _ := strconv.Atoi(dims[1])
if w > 0 && h > 0 {
return w, h, nil
}
}
}
}
}

// Default dimensions if parsing fails
return 1024, 768, nil
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the fallback is needed would it be useful to read fallback dimensions from something such as environment variables before fixed values?

}

// captureScreenshot captures a screenshot using ffmpeg
func captureScreenshot(ctx context.Context, display string, region *oapi.ScreenshotRegionRequest) (io.Reader, error) {
log := logger.FromContext(ctx)

// Get screen dimensions for bounds checking
screenWidth, screenHeight, err := getScreenDimensions(display)
if err != nil {
log.Warn("failed to get screen dimensions, using defaults", "error", err)
}

// Validate region bounds if provided
if region != nil {
if region.X < 0 || region.Y < 0 {
return nil, fmt.Errorf("coordinates must be non-negative")
}
if region.Width <= 0 || region.Height <= 0 {
return nil, fmt.Errorf("width and height must be positive")
}
if region.X+region.Width > screenWidth || region.Y+region.Height > screenHeight {
return nil, fmt.Errorf("region exceeds screen bounds (screen: %dx%d, region: %d,%d %dx%d)",
screenWidth, screenHeight, region.X, region.Y, region.Width, region.Height)
}
}

// Build ffmpeg command
args := []string{
"-f", "x11grab",
"-video_size", fmt.Sprintf("%dx%d", screenWidth, screenHeight),
"-i", display,
"-vframes", "1",
}

// Add crop filter if region is specified
if region != nil {
cropFilter := fmt.Sprintf("crop=%d:%d:%d:%d", region.Width, region.Height, region.X, region.Y)
args = append(args, "-vf", cropFilter)
}

// Output as PNG to stdout
args = append(args, "-f", "image2pipe", "-vcodec", "png", "-")

cmd := exec.CommandContext(ctx, "ffmpeg", args...)
cmd.Env = append(os.Environ(), fmt.Sprintf("DISPLAY=%s", display))

log.Debug("executing ffmpeg command", "args", args, "display", display)

output, err := cmd.Output()
if err != nil {
if exitErr, ok := err.(*exec.ExitError); ok {
log.Error("ffmpeg failed", "stderr", string(exitErr.Stderr), "error", err)
return nil, fmt.Errorf("screenshot capture failed: %s", string(exitErr.Stderr))
}
return nil, fmt.Errorf("screenshot capture failed: %w", err)
}

return bytes.NewReader(output), nil
}

// CaptureScreenshot implements the GET /screenshot endpoint
func (s *ApiService) CaptureScreenshot(ctx context.Context, _ oapi.CaptureScreenshotRequestObject) (oapi.CaptureScreenshotResponseObject, error) {
log := logger.FromContext(ctx)

// Get display from environment, default to :1 for production
display := os.Getenv("DISPLAY")
if display == "" {
display = ":1"
}

log.Info("capturing full screenshot", "display", display)

screenshot, err := captureScreenshot(ctx, display, nil)
if err != nil {
log.Error("failed to capture screenshot", "error", err)
return oapi.CaptureScreenshot500JSONResponse{
InternalErrorJSONResponse: oapi.InternalErrorJSONResponse{
Message: err.Error(),
},
}, nil
}

// Read the screenshot data
data, err := io.ReadAll(screenshot)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not sure how oapi does response body reading under the hood but I'd assume it's doing this read of the reader anyway, so this feels like duplicate / unnecessary reading of the screenshot into memory

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should we serve as base64 string [ @matthewjmarangoni ] or binary (or add optional ?format={} ) ?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I understand the question correctly, offering both with a way for the client to choose either the original binary or base 64 encoded string is what I had in mind.

if err != nil {
log.Error("failed to read screenshot data", "error", err)
return oapi.CaptureScreenshot500JSONResponse{
InternalErrorJSONResponse: oapi.InternalErrorJSONResponse{
Message: "failed to read screenshot data",
},
}, nil
}

return oapi.CaptureScreenshot200ImagepngResponse{
Body: bytes.NewReader(data),
ContentLength: int64(len(data)),
}, nil
}

// CaptureScreenshotRegion implements the POST /screenshot endpoint
func (s *ApiService) CaptureScreenshotRegion(ctx context.Context, req oapi.CaptureScreenshotRegionRequestObject) (oapi.CaptureScreenshotRegionResponseObject, error) {
log := logger.FromContext(ctx)

// Get display from environment, default to :1 for production
display := os.Getenv("DISPLAY")
if display == "" {
display = ":1"
}

log.Info("capturing screenshot region", "display", display,
"x", req.Body.X, "y", req.Body.Y,
"width", req.Body.Width, "height", req.Body.Height)

screenshot, err := captureScreenshot(ctx, display, req.Body)
if err != nil {
log.Error("failed to capture screenshot region", "error", err)
// Check if it's a bounds error
if strings.Contains(err.Error(), "exceeds screen bounds") ||
strings.Contains(err.Error(), "must be") {
return oapi.CaptureScreenshotRegion400JSONResponse{
BadRequestErrorJSONResponse: oapi.BadRequestErrorJSONResponse{
Message: err.Error(),
},
}, nil
}
return oapi.CaptureScreenshotRegion500JSONResponse{
InternalErrorJSONResponse: oapi.InternalErrorJSONResponse{
Message: err.Error(),
},
}, nil
}

// Read the screenshot data
data, err := io.ReadAll(screenshot)
if err != nil {
log.Error("failed to read screenshot data", "error", err)
return oapi.CaptureScreenshotRegion500JSONResponse{
InternalErrorJSONResponse: oapi.InternalErrorJSONResponse{
Message: "failed to read screenshot data",
},
}, nil
}

return oapi.CaptureScreenshotRegion200ImagepngResponse{
Body: bytes.NewReader(data),
ContentLength: int64(len(data)),
}, nil
}
121 changes: 121 additions & 0 deletions server/cmd/api/api/screenshot_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
package api

import (
"context"
"os"
"os/exec"
"testing"

oapi "github.com/onkernel/kernel-images/server/lib/oapi"
"github.com/onkernel/kernel-images/server/lib/recorder"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

func TestApiService_GetHealth(t *testing.T) {
ctx := context.Background()

mgr := recorder.NewFFmpegManager()
svc, err := New(mgr, newMockFactory())
require.NoError(t, err)

// Call health endpoint
resp, err := svc.GetHealth(ctx, oapi.GetHealthRequestObject{})
require.NoError(t, err)

// Check response type
healthResp, ok := resp.(oapi.GetHealth200JSONResponse)
require.True(t, ok, "expected GetHealth200JSONResponse")

// Check status is "ok"
assert.Equal(t, oapi.Ok, healthResp.Status)

// Check uptime is non-negative
assert.GreaterOrEqual(t, healthResp.UptimeSec, 0)
}

func TestApiService_CaptureScreenshot(t *testing.T) {
// Skip if ffmpeg is not available
if _, err := exec.LookPath("ffmpeg"); err != nil {
t.Skip("ffmpeg not found in PATH")
}

// Skip if no display is available
if os.Getenv("DISPLAY") == "" && os.Getenv("CI") != "" {
t.Skip("No DISPLAY available in CI environment")
}

ctx := context.Background()

mgr := recorder.NewFFmpegManager()
svc, err := New(mgr, newMockFactory())
require.NoError(t, err)

// Test full screenshot capture
t.Run("full_screenshot", func(t *testing.T) {
// Set test display
os.Setenv("DISPLAY", ":20")
defer os.Unsetenv("DISPLAY")

resp, err := svc.CaptureScreenshot(ctx, oapi.CaptureScreenshotRequestObject{})

// In test environment, screenshot might fail due to no X server
// Just ensure no panic and proper error handling
if err == nil {
// If successful, check response type
_, ok := resp.(oapi.CaptureScreenshot200ImagepngResponse)
if !ok {
// Might be 500 error which is acceptable in test
_, ok = resp.(oapi.CaptureScreenshot500JSONResponse)
assert.True(t, ok, "expected either 200 or 500 response")
}
}
})
}

func TestApiService_CaptureScreenshotRegion(t *testing.T) {
ctx := context.Background()

mgr := recorder.NewFFmpegManager()
svc, err := New(mgr, newMockFactory())
require.NoError(t, err)

// Test region validation
t.Run("invalid_region_negative_coords", func(t *testing.T) {
req := oapi.CaptureScreenshotRegionRequestObject{
Body: &oapi.ScreenshotRegionRequest{
X: -1,
Y: 0,
Width: 100,
Height: 100,
},
}

resp, err := svc.CaptureScreenshotRegion(ctx, req)
require.NoError(t, err)

// Should return 400 bad request
badReqResp, ok := resp.(oapi.CaptureScreenshotRegion400JSONResponse)
assert.True(t, ok, "expected 400 bad request response")
assert.Contains(t, badReqResp.Message, "must be non-negative")
})

t.Run("invalid_region_zero_dimensions", func(t *testing.T) {
req := oapi.CaptureScreenshotRegionRequestObject{
Body: &oapi.ScreenshotRegionRequest{
X: 0,
Y: 0,
Width: 0,
Height: 100,
},
}

resp, err := svc.CaptureScreenshotRegion(ctx, req)
require.NoError(t, err)

// Should return 400 bad request
badReqResp, ok := resp.(oapi.CaptureScreenshotRegion400JSONResponse)
assert.True(t, ok, "expected 400 bad request response")
assert.Contains(t, badReqResp.Message, "must be positive")
})
}
Loading
Loading