replicate
diff --git a/‎internal/runner/manager.go‎
Lines changed: 48 additions & 3 deletions b/‎internal/runner/manager.go‎
Lines changed: 48 additions & 3 deletions
diff --git a/‎internal/runner/path.go‎
Lines changed: 9 additions & 3 deletions b/‎internal/runner/path.go‎
Lines changed: 9 additions & 3 deletions
diff --git a/‎internal/runner/path_test.go‎
Lines changed: 1 addition & 1 deletion b/‎internal/runner/path_test.go‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎internal/runner/runner.go‎
Lines changed: 45 additions & 34 deletions b/‎internal/runner/runner.go‎
Lines changed: 45 additions & 34 deletions
diff --git a/‎internal/runner/runner_test.go‎
Lines changed: 26 additions & 17 deletions b/‎internal/runner/runner_test.go‎
Lines changed: 26 additions & 17 deletions
@@ -29,6 +29,8 @@ var (
 	ErrRunnerNotFound      = errors.New("runner not found")
 	ErrNoEmptySlot         = errors.New("no empty slot available")
 	ErrInvalidRunnerStatus = errors.New("invalid runner status for new prediction")
+	// ErrAsyncPrediction is a sentinel error used to indicate that a prediction is being served asynchronously, it is not surfaced outside of runner
+	ErrAsyncPrediction = errors.New("async prediction")
 )
 
 // Manager manages the lifecycle and capacity of prediction runners
@@ -184,6 +186,7 @@ func (m *Manager) PredictAsync(ctx context.Context, req PredictionRequest) (*Pre
 
 // predict is the internal implementation shared by both sync and async predictions
 func (m *Manager) predict(ctx context.Context, req PredictionRequest, async bool) (chan PredictionResponse, *PredictionResponse, error) {
+	log := m.logger.Sugar()
 	if err := m.claimSlot(); err != nil {
 		return nil, nil, err
 	}
@@ -202,7 +205,40 @@ func (m *Manager) predict(ctx context.Context, req PredictionRequest, async bool
 		return nil, nil, fmt.Errorf("runner not ready: %s", runner.status)
 	}
 
-	respChan, initialResponse, err := runner.predict(req)
+	runner.mu.RLock()
+	pending, exists := runner.pending[req.ID]
+	setupCompletedChan := runner.setupComplete
+	runner.mu.RUnlock()
+	if !exists {
+		m.releaseSlot()
+		return nil, nil, fmt.Errorf("failed to find pending prediction after allocation: %s", req.ID)
+	}
+	select {
+	case <-setupCompletedChan:
+		// We need to wait for setup to complete before proceeding so that we can ensure that
+		// the OpenAPI schema is available for input processing
+		log.Tracew("runner setup complete, proceeding with prediction", "prediction_id", req.ID, "runner", runner.runnerCtx.id)
+	case <-pending.ctx.Done():
+		// Prediction was canceled, watcher will perform cleanup, we need to abort
+		// the rest of the prediction processing
+		log.Tracew("prediction was canceled before setup complete, aborting", "prediction_id", req.ID, "runner", runner.runnerCtx.id)
+		m.releaseSlot()
+		return nil, nil, fmt.Errorf("prediction %s was canceled: %w", req.ID, pending.ctx.Err())
+	}
+
+	// Check for setup failure before calling predict
+	runner.mu.Lock()
+	status := runner.status
+	runner.mu.Unlock()
+	if status == StatusSetupFailed {
+		// Setup failure will be handled by async webhook machinery
+		// Return sentinel error to indicate async handling
+		log.Tracew("setup failed, using async handling", "prediction_id", req.ID, "runner", runner.runnerCtx.id)
+		m.releaseSlot()
+		return nil, nil, ErrAsyncPrediction
+	}
+
+	respChan, initialResponse, err := runner.predict(req.ID)
 	if err != nil {
 		m.releaseSlot()
 		return nil, nil, err
@@ -330,19 +366,28 @@ func (m *Manager) allocatePrediction(runner *Runner, req PredictionRequest) { //
 	// NOTE(morgan): by design we do not use the passed in context, as the passed
 	// in context is tied to the http request, and would cause the prediction to
 	// fail at the end of the http request's lifecycle.
-	watcherCtx, cancel := context.WithCancel(m.ctx)
+	predictionCtx, cancel := context.WithCancel(m.ctx)
 
 	pending := &PendingPrediction{
 		request:       req,
 		outputCache:   make(map[string]string),
 		c:             make(chan PredictionResponse, 1),
 		cancel:        cancel, // Manager can cancel this watcher explicitly
+		ctx:           predictionCtx,
 		watcherDone:   make(chan struct{}),
 		outputNotify:  make(chan struct{}, 1),
 		webhookSender: m.webhookSender,
 	}
 	runner.pending[req.ID] = pending
 
+	now := time.Now().Format(config.TimeFormat)
+	if pending.request.CreatedAt == "" {
+		pending.request.CreatedAt = now
+	}
+	if pending.request.StartedAt == "" {
+		pending.request.StartedAt = now
+	}
+
 	// Start per-prediction response watcher with cleanup wrapper
 	go func() {
 		defer func() {
@@ -399,7 +444,7 @@ func (m *Manager) allocatePrediction(runner *Runner, req PredictionRequest) { //
 			}
 		}()
 
-		runner.watchPredictionResponses(watcherCtx, req.ID, pending)
+		runner.watchPredictionResponses(predictionCtx, req.ID, pending)
 	}()
 }
 
 
@@ -3,6 +3,7 @@ package runner
 import (
 	"bytes"
 	"encoding/base64"
+	"errors"
 	"fmt"
 	"io"
 	"net/http"
@@ -18,17 +19,22 @@ import (
 	"github.com/replicate/go/httpclient"
 )
 
-var Base64Regex = regexp.MustCompile(`^data:.*;base64,(?P<base64>.*)$`)
+var (
+	Base64Regex           = regexp.MustCompile(`^data:.*;base64,(?P<base64>.*)$`)
+	ErrSchemaNotAvailable = errors.New("OpenAPI schema not available for input processing")
+)
 
 func isURI(s *openapi3.SchemaRef) bool {
 	return s.Value.Type.Is("string") && s.Value.Format == "uri"
 }
 
 // ProcessInputPaths processes the input paths and discards the now unused paths from the input.
-// Note that we return the input, but the expectation is that input will be mutated in-place.
+// Note that we return the input, but the expectation is that input will be mutated in-place. This function
+// returns ErrSchemaNotAvailable if the OpenAPI schema is not available. It is up to the caller to decide how
+// handles this error (e.g. log a warning and proceed without path processing).
 func ProcessInputPaths(input any, doc *openapi3.T, paths *[]string, fn func(string, *[]string) (string, error)) (any, error) {
 	if doc == nil {
-		return input, nil
+		return input, ErrSchemaNotAvailable
 	}
 
 	schema, ok := doc.Components.Schemas["Input"]
 
@@ -576,7 +576,7 @@ func TestProcessInputPaths(t *testing.T) {
 		}
 
 		result, err := ProcessInputPaths(input, nil, &paths, mockFn)
-		require.NoError(t, err)
+		require.ErrorIs(t, err, ErrSchemaNotAvailable)
 		assert.Equal(t, input, result)
 		assert.Empty(t, paths)
 	})
 
@@ -812,40 +812,53 @@ func (r *Runner) verifyProcessCleanup(pid int) {
 
 // predict returns a channel that will receive the prediction response and an initial prediction response
 // populated with the relevant fields from the request
-func (r *Runner) predict(req PredictionRequest) (chan PredictionResponse, *PredictionResponse, error) {
+func (r *Runner) predict(reqID string) (chan PredictionResponse, *PredictionResponse, error) {
 	log := r.logger.Sugar()
 	r.mu.Lock()
 	defer r.mu.Unlock()
 
-	log.Tracew("runner.predict called", "prediction_id", req.ID, "status", r.status)
+	log.Tracew("runner.predict called", "prediction_id", reqID, "status", r.status)
 
 	// Prediction must be pre-allocated by manager
-	pending, exists := r.pending[req.ID]
+	pending, exists := r.pending[reqID]
 	if !exists {
-		return nil, nil, fmt.Errorf("prediction %s not allocated", req.ID)
+		return nil, nil, fmt.Errorf("prediction %s not allocated", reqID)
 	}
 
-	log.Tracew("prediction found in pending", "prediction_id", req.ID)
+	if pending.request.ID != reqID {
+		return nil, nil, fmt.Errorf("prediction ID mismatch: expected %s, got %s", reqID, pending.request.ID)
+	}
+
+	pending.mu.Lock()
+	defer pending.mu.Unlock()
+
+	log.Tracew("prediction found in pending", "prediction_id", reqID)
 
 	// Process input paths (base64 and URL inputs)
 	inputPaths := make([]string, 0)
-	input, err := ProcessInputPaths(req.Input, r.doc, &inputPaths, Base64ToInput)
-	if err != nil {
-		return nil, nil, fmt.Errorf("failed to process base64 inputs: %w", err)
-	}
-	input, err = ProcessInputPaths(input, r.doc, &inputPaths, URLToInput)
-	if err != nil {
-		return nil, nil, fmt.Errorf("failed to process URL inputs: %w", err)
+	if r.doc == nil {
+		log.Errorw("OpenAPI schema not available for input processing - cannot convert base64 or URL inputs", "prediction_id", reqID)
+	} else {
+		// Process base64 inputs first, then URL inputs (to allow URL inputs to reference base64-decoded files)
+		input, err := ProcessInputPaths(pending.request.Input, r.doc, &inputPaths, Base64ToInput)
+		if err != nil {
+			return nil, nil, fmt.Errorf("failed to process base64 inputs: %w", err)
+		}
+
+		input, err = ProcessInputPaths(input, r.doc, &inputPaths, URLToInput)
+		if err != nil {
+			return nil, nil, fmt.Errorf("failed to process URL inputs: %w", err)
+		}
+		pending.request.Input = input
 	}
-	req.Input = input
 	pending.inputPaths = inputPaths
 
-	// Write prediction request to file (async like original)
-	requestFile := fmt.Sprintf("request-%s.json", req.ID)
-	log.Debugw("writing prediction request file", "prediction_id", req.ID, "file", requestFile)
+	// Write prediction request to file
+	requestFile := fmt.Sprintf("request-%s.json", reqID)
+	log.Debugw("writing prediction request file", "prediction_id", reqID, "file", requestFile)
 	requestPath := path.Join(r.runnerCtx.workingdir, requestFile)
 
-	requestData, err := json.Marshal(req)
+	requestData, err := json.Marshal(pending.request)
 	if err != nil {
 		return nil, nil, fmt.Errorf("failed to marshal request: %w", err)
 	}
@@ -854,13 +867,13 @@ func (r *Runner) predict(req PredictionRequest) (chan PredictionResponse, *Predi
 		return nil, nil, fmt.Errorf("failed to write request file: %w", err)
 	}
 
-	log.Tracew("wrote prediction request file", "prediction_id", req.ID, "path", requestPath, "working_dir", r.runnerCtx.workingdir, "request_data", string(requestData))
+	log.Tracew("wrote prediction request file", "prediction_id", reqID, "path", requestPath, "working_dir", r.runnerCtx.workingdir, "request_data", string(requestData))
 
 	// Debug: Check if file actually exists and list directory contents
 	if _, err := os.Stat(requestPath); err != nil {
-		log.Tracew("ERROR: written request file does not exist", "prediction_id", req.ID, "path", requestPath, "error", err)
+		log.Tracew("ERROR: written request file does not exist", "prediction_id", reqID, "path", requestPath, "error", err)
 	} else {
-		log.Tracew("confirmed request file exists", "prediction_id", req.ID, "path", requestPath)
+		log.Tracew("confirmed request file exists", "prediction_id", reqID, "path", requestPath)
 	}
 
 	// Debug: List all files in working directory
@@ -869,25 +882,14 @@ func (r *Runner) predict(req PredictionRequest) (chan PredictionResponse, *Predi
 		for i, entry := range entries {
 			fileNames[i] = entry.Name()
 		}
-		log.Tracew("working directory contents after write", "prediction_id", req.ID, "working_dir", r.runnerCtx.workingdir, "files", fileNames)
-	}
-
-	// Update pending prediction with request details
-	pending.request = req
-
-	now := time.Now().Format(config.TimeFormat)
-	if pending.request.CreatedAt == "" {
-		pending.request.CreatedAt = now
-	}
-	if pending.request.StartedAt == "" {
-		pending.request.StartedAt = now
+		log.Tracew("working directory contents after write", "prediction_id", reqID, "working_dir", r.runnerCtx.workingdir, "files", fileNames)
 	}
 
-	log.Tracew("returning prediction channel", "prediction_id", req.ID)
+	log.Tracew("returning prediction channel", "prediction_id", reqID)
 	initialResponse := &PredictionResponse{
 		Status: PredictionStarting,
 	}
-	initialResponse.populateFromRequest(req)
+	initialResponse.populateFromRequest(pending.request)
 	return pending.c, initialResponse, nil
 }
 
@@ -970,14 +972,23 @@ func (r *Runner) updateSchema() {
 	r.mu.Lock()
 	defer r.mu.Unlock()
 
+	log := r.logger.Sugar()
 	schemaPath := filepath.Join(r.runnerCtx.workingdir, "openapi.json")
+	log.Tracew("attempting to read openapi.json", "path", schemaPath)
+
 	if schemaData, err := os.ReadFile(schemaPath); err == nil { //nolint:gosec // expected dynamic path
 		r.schema = string(schemaData)
+		log.Tracew("successfully read openapi.json", "schema_length", len(schemaData))
 
 		// Parse the schema for use in ProcessInputPaths
 		if doc, parseErr := openapi3.NewLoader().LoadFromData(schemaData); parseErr == nil {
 			r.doc = doc
+			log.Tracew("successfully parsed openapi schema for ProcessInputPaths")
+		} else {
+			log.Errorw("failed to parse openapi schema", "error", parseErr)
 		}
+	} else {
+		log.Tracew("failed to read openapi.json", "error", err)
 	}
 }
 
 
@@ -492,29 +492,36 @@ func TestRunnerPredict(t *testing.T) {
 			runnerCtx: RunnerContext{workingdir: tempDir},
 			logger:    loggingtest.NewTestLogger(t),
 		}
-
-		// Pre-allocate prediction
-		r.pending["test-id"] = &PendingPrediction{
-			c: make(chan PredictionResponse, 1),
-		}
+		predictionID, _ := PredictionID()
 
 		req := PredictionRequest{
-			ID:    "test-id",
+			ID:    predictionID,
 			Input: map[string]any{"key": "value"},
+			// CreatedAt and StartedAt would be set in the manager allocatePrediction step
+			// so we need to set them directly here
+			CreatedAt: time.Now().Format(config.TimeFormat),
+			StartedAt: time.Now().Format(config.TimeFormat),
+		}
+		// Pre-allocate prediction
+		r.pending[predictionID] = &PendingPrediction{
+			c:       make(chan PredictionResponse, 1),
+			request: req,
 		}
 
-		ch, initialResponse, err := r.predict(req)
-		assert.NotNil(t, initialResponse)
+		ch, initialResponse, err := r.predict(req.ID)
+		require.NoError(t, err)
+		require.NotNil(t, initialResponse)
 		assert.Equal(t, PredictionStarting, initialResponse.Status)
 		assert.NotEmpty(t, initialResponse.ID)
 		assert.Equal(t, req.Input, initialResponse.Input)
+		assert.NotEmpty(t, initialResponse.CreatedAt)
+		assert.NotEmpty(t, initialResponse.StartedAt)
 		assert.Equal(t, req.CreatedAt, initialResponse.CreatedAt)
 		assert.Equal(t, req.StartedAt, initialResponse.StartedAt)
-		require.NoError(t, err)
 		assert.NotNil(t, ch)
 
 		// Check request file was created
-		requestFile := path.Join(tempDir, "request-test-id.json")
+		requestFile := path.Join(tempDir, fmt.Sprintf("request-%s.json", predictionID))
 		_, err = os.Stat(requestFile)
 		assert.NoError(t, err)
 	})
@@ -528,12 +535,14 @@ func TestRunnerPredict(t *testing.T) {
 			logger:  loggingtest.NewTestLogger(t),
 		}
 
-		req := PredictionRequest{ID: "test-id"}
-		ch, initialResponse, err := r.predict(req)
+		predictionID, _ := PredictionID()
+
+		req := PredictionRequest{ID: predictionID}
+		ch, initialResponse, err := r.predict(req.ID)
 		require.Error(t, err)
 		assert.Nil(t, ch)
 		assert.Nil(t, initialResponse)
-		assert.Contains(t, err.Error(), "prediction test-id not allocated")
+		assert.Contains(t, err.Error(), fmt.Sprintf("prediction %s not allocated", predictionID))
 	})
 }
 
@@ -1118,7 +1127,7 @@ func TestPerPredictionWatcher(t *testing.T) {
 
 		// Setup temp directory with response files
 		tempDir := t.TempDir()
-		predictionID := "test-prediction-123"
+		predictionID, _ := PredictionID()
 
 		// Create response files - one for our prediction, one for another
 		responseFile1 := fmt.Sprintf("response-%s-00001.json", predictionID)
@@ -1170,7 +1179,7 @@ func TestPerPredictionWatcher(t *testing.T) {
 		t.Parallel()
 
 		tempDir := t.TempDir()
-		predictionID := "test-prediction-456"
+		predictionID, _ := PredictionID()
 		filename := fmt.Sprintf("response-%s-00001.json", predictionID)
 		filePath := filepath.Join(tempDir, filename)
 
@@ -1223,7 +1232,7 @@ func TestPerPredictionWatcher(t *testing.T) {
 		t.Parallel()
 
 		tempDir := t.TempDir()
-		predictionID := "test-prediction-789"
+		predictionID, _ := PredictionID()
 
 		// Setup runner
 		logger := loggingtest.NewTestLogger(t)
@@ -1290,7 +1299,7 @@ func TestPerPredictionWatcher(t *testing.T) {
 		t.Parallel()
 
 		tempDir := t.TempDir()
-		predictionID := "test-prediction-abc"
+		predictionID, _ := PredictionID()
 
 		// Setup runner
 		logger := loggingtest.NewTestLogger(t)
Original file line number	Diff line number	Diff line change
`@@ -576,7 +576,7 @@ func TestProcessInputPaths(t *testing.T) {`
`576`	`576`	`}`
`577`	`577`
`578`	`578`	`result, err := ProcessInputPaths(input, nil, &paths, mockFn)`
`579`		`- require.NoError(t, err)`
	`579`	`+ require.ErrorIs(t, err, ErrSchemaNotAvailable)`
`580`	`580`	`assert.Equal(t, input, result)`
`581`	`581`	`assert.Empty(t, paths)`
`582`	`582`	`})`