diff --git a/.gitignore b/.gitignore
index 54a5a7e74..c2fe864c3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -11,3 +11,4 @@ phpunit.xml
 ray.php
 CLAUDE.md
 .phpunit.result.cache
+AGENTS.md
diff --git a/config/prism.php b/config/prism.php
index a810ebb66..3587d17fe 100644
--- a/config/prism.php
+++ b/config/prism.php
@@ -59,5 +59,13 @@
                 'x_title' => env('OPENROUTER_SITE_X_TITLE', null),
             ],
         ],
+        'replicate' => [
+            'api_key' => env('REPLICATE_API_KEY', ''),
+            'url' => env('REPLICATE_URL', 'https://api.replicate.com/v1'),
+            'webhook_url' => env('REPLICATE_WEBHOOK_URL', null),
+            'use_sync_mode' => env('REPLICATE_USE_SYNC_MODE', true), // Use Prefer: wait header
+            'polling_interval' => env('REPLICATE_POLLING_INTERVAL', 1000),
+            'max_wait_time' => env('REPLICATE_MAX_WAIT_TIME', 60),
+        ],
     ],
 ];
diff --git a/docs/.vitepress/config.mts b/docs/.vitepress/config.mts
index d36061e7d..1f13038a0 100644
--- a/docs/.vitepress/config.mts
+++ b/docs/.vitepress/config.mts
@@ -203,6 +203,14 @@ export default defineConfig({
                 text: "OpenAI",
                 link: "/providers/openai",
               },
+              {
+                text: "OpenRouter",
+                link: "/providers/openrouter",
+              },
+              {
+                text: "Replicate",
+                link: "/providers/replicate",
+              },
               {
                 text: "Voyage AI",
                 link: "/providers/voyageai",
diff --git a/docs/components/ProviderSupport.vue b/docs/components/ProviderSupport.vue
index b5005f074..00f2627df 100644
--- a/docs/components/ProviderSupport.vue
+++ b/docs/components/ProviderSupport.vue
@@ -327,6 +327,18 @@ export default {
           tools: Supported,
           documents: Supported,
         },
+        {
+          name: "Replicate",
+          text: Supported,
+          streaming: Supported,
+          structured: Supported,
+          embeddings: Supported,
+          image: Supported,
+          "speech-to-text": Supported,
+          "text-to-speech": Supported,
+          tools: Unsupported,
+          documents: Unsupported,
+        },
         {
           name: "VoyageAI",
           text: Unsupported,
diff --git a/docs/getting-started/introduction.md b/docs/getting-started/introduction.md
index bd84074fb..8fa252856 100644
--- a/docs/getting-started/introduction.md
+++ b/docs/getting-started/introduction.md
@@ -60,6 +60,19 @@ $response = Prism::text()
     ->withPrompt('Explain quantum computing to a 5-year-old.')
     ->asText();
 
+echo $response->text;
+```
+
+```php [Replicate]
+use Prism\Prism\Facades\Prism;
+use Prism\Prism\Enums\Provider;
+
+$response = Prism::text()
+    ->using(Provider::Replicate, 'meta/meta-llama-3.1-405b-instruct')
+    ->withSystemPrompt(view('prompts.system'))
+    ->withPrompt('Explain quantum computing to a 5-year-old.')
+    ->asText();
+
 echo $response->text;
 ```
 :::
@@ -92,6 +105,7 @@ We currently offer first-party support for these leading AI providers:
 - [Mistral](/providers/mistral.md)
 - [Ollama](/providers/ollama.md)
 - [OpenAI](/providers/openai.md)
+- [Replicate](/providers/replicate.md)
 - [xAI](/providers/xai.md)
 
 Each provider brings its own strengths to the table, and Prism makes it easy to use them all through a consistent, elegant interface.
diff --git a/docs/providers/replicate.md b/docs/providers/replicate.md
new file mode 100644
index 000000000..ef57d403c
--- /dev/null
+++ b/docs/providers/replicate.md
@@ -0,0 +1,455 @@
+# Replicate
+
+Replicate is a cloud platform that makes it easy to run machine learning models at scale. Unlike traditional LLM APIs, Replicate uses an **asynchronous prediction-based architecture** where you submit a request and poll for results.
+
+## Configuration
+
+```php
+'replicate' => [
+    'api_key' => env('REPLICATE_API_KEY', ''),
+    'url' => env('REPLICATE_URL', 'https://api.replicate.com/v1'),
+    'webhook_url' => env('REPLICATE_WEBHOOK_URL', null),
+    'use_sync_mode' => env('REPLICATE_USE_SYNC_MODE', true), // Use Prefer: wait header
+    'polling_interval' => env('REPLICATE_POLLING_INTERVAL', 1000), // milliseconds
+    'max_wait_time' => env('REPLICATE_MAX_WAIT_TIME', 60), // seconds
+]
+```
+
+### Configuration Options
+
+- **`api_key`**: Your Replicate API token (get one at [replicate.com/account](https://replicate.com/account))
+- **`url`**: Base API URL (default: `https://api.replicate.com/v1`)
+- **`webhook_url`**: Optional webhook URL for async completion notifications  
+- **`use_sync_mode`**: Enable sync mode with `Prefer: wait` header (default: `true`) - reduces latency
+- **`polling_interval`**: Time between prediction status checks in milliseconds (default: 1000ms) - used when sync mode times out
+- **`max_wait_time`**: Maximum time to wait for prediction completion in seconds (default: 60s)
+
+## How Replicate Works
+
+Replicate's API differs from most LLM providers with an asynchronous prediction-based architecture. Prism provides two modes:
+
+### Sync Mode (Default - Recommended)
+Uses the `Prefer: wait` header to make Replicate wait for the prediction to complete before responding:
+
+1. **Submit prediction with `Prefer: wait`** → Replicate waits up to 60 seconds for completion
+2. **Immediate response** → Get results directly if prediction completes within timeout
+3. **Automatic fallback** → Falls back to polling if prediction takes longer than timeout
+
+**Benefits**: Lower latency, fewer API calls, faster responses for quick predictions.
+
+### Async Mode (Polling)
+Traditional polling approach:
+
+1. **Submit a prediction** → Get a prediction ID
+2. **Poll for completion** → Check prediction status until `succeeded` or `failed`
+3. **Retrieve output** → Extract results from the completed prediction
+
+**When to use**: Disable sync mode (`use_sync_mode: false`) for very long-running predictions (>60s) to avoid timeouts.
+
+Prism handles all complexity automatically, providing a clean synchronous interface regardless of mode.
+
+## Supported Features
+
+### ✅ Text Generation
+
+Generate text using large language models like Meta Llama 3.1.
+
+```php
+use Prism\Prism\Facades\Prism;
+use Prism\Prism\Enums\Provider;
+
+$response = Prism::text()
+    ->using(Provider::Replicate, 'meta/meta-llama-3-8b-instruct')
+    ->withPrompt('Explain quantum computing in simple terms')
+    ->generate();
+
+echo $response->text;
+```
+
+**Popular text models:**
+- `meta/meta-llama-3.1-405b-instruct` - Meta's flagship LLM
+- `meta/meta-llama-3-70b-instruct` - Balanced performance/cost
+- `meta/meta-llama-3-8b-instruct` - Fast, efficient model
+
+### ✅ Structured Output
+
+Extract structured data using JSON mode.
+
+```php
+use Prism\Prism\Facades\Prism;
+use Prism\Prism\Enums\Provider;
+use Prism\Prism\Schema\ObjectSchema;
+use Prism\Prism\Schema\StringSchema;
+use Prism\Prism\Schema\NumberSchema;
+
+$schema = new ObjectSchema(
+  name: "book_review",
+  description: "A structure movie review",
+  properties: [
+    new StringSchema("title", "Book title"),
+    new StringSchema("author", "Author name"),
+    new NumberSchema("rating", "Rating from 1-5"),
+    new StringSchema("summary", "Brief review summary")
+  ],
+  requiredFields: ["title", "author", "rating", "summary"]
+);
+
+$response = Prism::structured()
+    ->using(Provider::Replicate, 'meta/meta-llama-3-8b-instruct')
+    ->withPrompt('Review "1984" by George Orwell')
+    ->withSchema($schema)
+    ->generate();
+
+echo $response->structured['title']; // "1984"
+echo $response->structured['rating']; // 5
+```
+
+**How it works:** Prism injects the JSON schema into the prompt and instructs the model to return valid JSON matching the schema.
+
+### ✅ Streaming
+
+Stream text generation token-by-token for real-time UX using Server-Sent Events (SSE).
+
+```php
+use Prism\Prism\Facades\Prism;
+
+$stream = Prism::text()
+    ->using('replicate', 'meta-llama-3-8b-instruct')
+    ->withPrompt('Write a short story about a robot')
+    ->stream();
+
+foreach ($stream as $chunk) {
+    echo $chunk->text; // Prints tokens as they arrive in real-time
+}
+```
+
+**How it works:** 
+- Prism connects to Replicate's SSE streaming endpoint (`urls.stream`) for true real-time token delivery
+- Tokens arrive progressively as the model generates them (no waiting for completion)
+- Full event lifecycle support: StreamStart → TextStart → TextDelta(s) → TextComplete → StreamEnd
+- Automatic fallback to simulated streaming if SSE is unavailable
+
+### ✅ Image Generation
+
+Generate images using state-of-the-art diffusion models.
+
+```php
+use Prism\Prism\Facades\Prism;
+
+$response = Prism::image()
+    ->using('replicate', 'black-forest-labs/flux-schnell')
+    ->withPrompt('A cute baby sea otter floating on its back in calm blue water')
+    ->generate();
+
+$image = $response->firstImage();
+echo $image->url;
+```
+
+**Popular image models:**
+- `bytedance/seedream-4` - Fast, high-quality generation (1-4 steps)
+- `black-forest-labs/flux-dev` - Development model with more control
+- `stability-ai/sdxl` - Stable Diffusion XL
+
+**Provider-specific options:**
+
+```php
+$response = Prism::image()
+  ->using("replicate", "bytedance/seedream-4")
+  ->withPrompt("A beautiful sunset over mountains")
+  ->withProviderOptions([
+    "size" => "2K",
+    "width" => 2048,
+    "height" => 2048,
+    "aspect_ratio" => "4:3"
+  ])
+  ->generate();
+```
+
+### ✅ Text-to-Speech (TTS)
+
+Convert text to natural-sounding speech.
+
+```php
+use Prism\Prism\Facades\Prism;
+
+$response = Prism::audio()
+    ->using('replicate', 'jaaari/kokoro-82m:f559560eb822dc509045f3921a1921234918b91739db4bf3daab2169b71c7a13')
+    ->withInput('Hello! Welcome to Replicate text-to-speech.')
+    ->withVoice('af_bella') 
+    ->asAudio();
+
+$audio = $response->audio;
+if ($audio->hasBase64()) {
+  file_put_contents("output.mp3", base64_decode($audio->base64));
+  echo "Audio saved as: output.mp3";
+}
+```
+
+**Available voices for Kokoro-82m:**
+- `af_bella` 
+- `af_nicole`
+- `am_fenrir` 
+-  `am_puck`
+
+**Provider-specific options:**
+
+```php
+$response = Prism::audio()
+  ->using(
+    "replicate",
+    "jaaari/kokoro-82m:f559560eb822dc509045f3921a1921234918b91739db4bf3daab2169b71c7a13"
+  )
+  ->withInput("Hello! Welcome to Replicate text-to-speech.")
+  ->withVoice("af_jessica")
+  ->withProviderOptions([
+    "speed" => 2 // Speech speed multiplier
+  ])
+  ->asAudio();
+```
+
+### ✅ Speech-to-Text (STT)
+
+Transcribe audio files to text using Whisper.
+
+```php
+use Prism\Prism\Facades\Prism;
+use Prism\Prism\ValueObjects\Media\Audio;
+
+$audioFile = new Audio('path/to/audio.mp3');
+
+$response = Prism::audio()
+    ->using('replicate', 'vaibhavs10/incredibly-fast-whisper:3ab86df6c8f54c11309d4d1f930ac292bad43ace52d10c80d87eb258b3c9f79c')
+    ->withInput($audioFile)
+    ->asText();
+
+echo "Transcription: " . $response->text;
+```
+
+**Supported formats:** WAV, MP3, FLAC, OGG, M4A
+
+**Provider-specific options:**
+
+```php
+$response = Prism::audio()
+    ->using('replicate', 'vaibhavs10/incredibly-fast-whisper:3ab86df6c8f54c11309d4d1f930ac292bad43ace52d10c80d87eb258b3c9f79c')
+    ->withInput($audioFile)
+    ->withProviderOptions([
+        'task' => 'translate',      // or 'translate' (to English)
+        'language' => 'english',           // Optional: specify source language
+        'timestamp' => 'chunk',       // chunk, word, or false
+        'batch_size' => 64,           // Batch size for processing
+    ])
+    ->asText();
+```
+
+### ✅ Embeddings
+
+Generate vector embeddings for semantic search and similarity.
+
+```php
+use Prism\Prism\Facades\Prism;
+
+// Single input
+$response = Prism::embeddings()
+  ->using(
+    "replicate",
+    "mark3labs/embeddings-gte-base:d619cff29338b9a37c3d06605042e1ff0594a8c3eff0175fd6967f5643fc4d47"
+  )
+  ->fromInput("The quick brown fox jumps over the lazy dog")
+  ->asEmbeddings();
+
+$embeddings = $response->embeddings[0]->embedding;
+
+// Check token usage
+echo $response->usage->tokens;
+
+// Multiple inputs
+$response = Prism::embeddings()
+    ->using(
+        "replicate",
+        "mark3labs/embeddings-gte-base:d619cff29338b9a37c3d06605042e1ff0594a8c3eff0175fd6967f5643fc4d47"
+    )
+    ->fromArray([
+        'Document 1 text',
+        'Document 2 text',
+        'Document 3 text',
+    ])
+    ->asEmbeddings();
+
+foreach ($response->embeddings as $embedding) {
+    // Process each 768-dimensional vector
+}
+```
+
+**Embeddings model:**
+- `mark3labs/embeddings-gte-base` - 768-dimensional embeddings
+
+## Model Versioning
+
+Replicate models are versioned using SHA-256 hashes. Prism automatically maps friendly model names to their latest stable versions:
+
+```php
+// These are equivalent:
+->using('replicate', 'meta/meta-llama-3.1-405b-instruct')
+->using('replicate', 'meta/meta-llama-3.1-405b-instruct:e7...') // Full version hash
+```
+
+**Best practice:** Use the short name (without version hash) to automatically get the latest stable version.
+
+**NOTE:** When you are not using an Official Maintend Replicate model you need to used the hash version.
+
+## Async Predictions & Polling
+
+Prism handles Replicate's async architecture transparently:
+
+```php
+// This looks synchronous but Prism polls internally
+$response = Prism::text()
+    ->using('replicate', 'meta/meta-llama-3.1-405b-instruct')
+    ->withPrompt('Generate text')
+    ->generate();
+
+// Prism automatically:
+// 1. Creates a prediction
+// 2. Polls every 1 second (configurable via polling_interval)
+// 3. Returns when prediction succeeds (or times out after max_wait_time)
+```
+
+### Custom Polling Configuration
+
+```php
+// Set custom polling per provider instance
+$prism = Prism::text()
+    ->using(
+        new \Prism\Prism\Providers\Replicate\Replicate(
+            apiKey: env('REPLICATE_API_KEY'),
+            url: 'https://api.replicate.com/v1',
+            pollingInterval: 500,  // Poll every 500ms
+            maxWaitTime: 120       // Wait up to 2 minutes
+        ),
+        'meta/meta-llama-3.1-405b-instruct'
+    );
+```
+
+## Error Handling
+
+Replicate-specific exceptions:
+
+```php
+use Prism\Prism\Exceptions\PrismRateLimitedException;
+use Prism\Prism\Exceptions\PrismProviderOverloadedException;
+use Prism\Prism\Exceptions\PrismRequestTooLargeException;
+
+try {
+    $response = Prism::text()
+        ->using('replicate', 'meta/meta-llama-3.1-405b-instruct')
+        ->withPrompt('Generate text')
+        ->generate();
+} catch (PrismRateLimitedException $e) {
+    // HTTP 429: Rate limit exceeded
+    // Wait and retry with exponential backoff
+} catch (PrismProviderOverloadedException $e) {
+    // HTTP 529: Replicate's infrastructure is overloaded
+    // Retry with longer delay
+} catch (PrismRequestTooLargeException $e) {
+    // HTTP 413: Request payload too large
+    // Reduce input size
+}
+```
+
+## Performance Optimization
+
+### Sync Mode vs Async Mode
+
+By default, Prism uses **sync mode** (`Prefer: wait` header) for optimal performance:
+
+```php
+// Sync mode (default) - Recommended for most use cases
+'use_sync_mode' => true,  // Uses Prefer: wait header
+
+// Benefits:
+// ✅ Lower latency (no polling delay)
+// ✅ Fewer API calls (single request)
+// ✅ Faster for quick predictions (<60s)
+// ✅ Automatic fallback to polling if needed
+```
+
+Disable sync mode for very long predictions:
+
+```php
+// Async mode - For predictions that take >60 seconds
+'use_sync_mode' => false,  // Traditional polling
+
+// When to use:
+// • Very large image generations
+// • Complex multi-step processes
+// • Known slow models
+```
+
+### Custom Sync Mode
+
+You can also configure sync mode per provider instance:
+
+```php
+use Prism\Prism\Providers\Replicate\Replicate;
+
+$prism = Prism::text()
+    ->using(
+        new Replicate(
+            apiKey: env('REPLICATE_API_KEY'),
+            url: 'https://api.replicate.com/v1',
+            useSyncMode: true,  // Enable sync mode
+            maxWaitTime: 60      // Max 60s for Prefer: wait
+        ),
+        'meta/meta-llama-3.1-405b-instruct'
+    )
+    ->withPrompt('Generate text')
+    ->generate();
+```
+
+## Advanced: Webhooks (Future)
+
+> **Note:** Webhook support is planned but not yet implemented.
+
+Replicate supports webhooks for async notifications when predictions complete:
+
+```php
+// Future API
+'replicate' => [
+    'webhook_url' => 'https://your-app.com/webhooks/replicate',
+]
+
+// Prediction will POST to webhook_url when complete
+```
+
+## Cost Optimization Tips
+
+1. **Use smaller models when possible**: `meta-llama-3.1-8b-instruct` is much cheaper than `405b-instruct`
+2. **Optimize image generation**: FLUX Schnell (1-4 steps) is faster and cheaper than FLUX Dev
+3. **Batch embeddings**: Process multiple texts in one request
+4. **Monitor polling**: Reduce `polling_interval` for faster results but more API calls
+
+## Rate Limits
+
+Replicate's rate limits vary by account tier:
+- **Free tier**: Limited predictions per month
+- **Pro/Team**: Higher limits based on subscription
+
+Prism automatically handles 429 responses with `PrismRateLimitedException`.
+
+## Resources
+
+- [Replicate Documentation](https://replicate.com/docs)
+- [Replicate API Reference](https://replicate.com/docs/reference/http)
+- [Replicate Models](https://replicate.com/explore)
+- [Get API Token](https://replicate.com/account)
+
+## Testing
+
+Prism provides comprehensive test coverage for Replicate:
+
+```bash
+./vendor/bin/pest tests/Providers/Replicate/
+```
+
+**Test fixtures:** All tests use real API response fixtures for consistent, offline testing.
diff --git a/src/Enums/Provider.php b/src/Enums/Provider.php
index 21c7a17f6..b9ca962c7 100644
--- a/src/Enums/Provider.php
+++ b/src/Enums/Provider.php
@@ -17,4 +17,5 @@ enum Provider: string
     case Gemini = 'gemini';
     case VoyageAI = 'voyageai';
     case ElevenLabs = 'elevenlabs';
+    case Replicate = 'replicate';
 }
diff --git a/src/PrismManager.php b/src/PrismManager.php
index 943024f5b..963eaf095 100644
--- a/src/PrismManager.php
+++ b/src/PrismManager.php
@@ -18,6 +18,7 @@
 use Prism\Prism\Providers\OpenAI\OpenAI;
 use Prism\Prism\Providers\OpenRouter\OpenRouter;
 use Prism\Prism\Providers\Provider;
+use Prism\Prism\Providers\Replicate\Replicate;
 use Prism\Prism\Providers\VoyageAI\VoyageAI;
 use Prism\Prism\Providers\XAI\XAI;
 use RuntimeException;
@@ -224,4 +225,19 @@ protected function createElevenlabsProvider(array $config): ElevenLabs
             url: $config['url'] ?? 'https://api.elevenlabs.io/v1/',
         );
     }
+
+    /**
+     * @param  array<string, mixed>  $config
+     */
+    protected function createReplicateProvider(array $config): Replicate
+    {
+        return new Replicate(
+            apiKey: $config['api_key'] ?? '',
+            url: $config['url'] ?? 'https://api.replicate.com/v1',
+            webhookUrl: $config['webhook_url'] ?? null,
+            useSyncMode: $config['use_sync_mode'] ?? true,
+            pollingInterval: $config['polling_interval'] ?? 1000,
+            maxWaitTime: $config['max_wait_time'] ?? 60,
+        );
+    }
 }
diff --git a/src/Providers/Replicate/Concerns/HandlesPredictions.php b/src/Providers/Replicate/Concerns/HandlesPredictions.php
new file mode 100644
index 000000000..6758f35fc
--- /dev/null
+++ b/src/Providers/Replicate/Concerns/HandlesPredictions.php
@@ -0,0 +1,145 @@
+<?php
+
+declare(strict_types=1);
+
+namespace Prism\Prism\Providers\Replicate\Concerns;
+
+use Illuminate\Http\Client\PendingRequest;
+use Illuminate\Http\Client\RequestException;
+use Prism\Prism\Exceptions\PrismException;
+use Prism\Prism\Providers\Replicate\ValueObjects\ReplicatePrediction;
+
+trait HandlesPredictions
+{
+    /**
+     * Create a new prediction on Replicate.
+     *
+     * @param  array<string, mixed>  $payload
+     * @param  bool  $wait  Whether to use sync mode (Prefer: wait header)
+     * @param  int  $waitTimeout  Timeout in seconds for sync mode (max 60)
+     *
+     * @throws RequestException
+     */
+    protected function createPrediction(
+        PendingRequest $client,
+        array $payload,
+        bool $wait = false,
+        int $waitTimeout = 60
+    ): ReplicatePrediction {
+        // If sync mode is enabled, add the Prefer: wait header
+        if ($wait) {
+            // Replicate allows max 60 seconds for the Prefer: wait header
+            $timeout = min($waitTimeout, 60);
+            $client = $client->withHeaders([
+                'Prefer' => "wait={$timeout}",
+            ]);
+        }
+
+        $response = $client->post('/predictions', $payload);
+
+        if ($response->failed()) {
+            throw new RequestException($response);
+        }
+
+        return ReplicatePrediction::fromArray($response->json());
+    }
+
+    /**
+     * Get the status of a prediction.
+     *
+     * @throws RequestException
+     */
+    protected function getPrediction(PendingRequest $client, string $predictionId): ReplicatePrediction
+    {
+        $response = $client->get("/predictions/{$predictionId}");
+
+        if ($response->failed()) {
+            throw new RequestException($response);
+        }
+
+        return ReplicatePrediction::fromArray($response->json());
+    }
+
+    /**
+     * Wait for a prediction to complete by polling.
+     *
+     * @throws PrismException
+     */
+    protected function waitForPrediction(
+        PendingRequest $client,
+        string $predictionId,
+        int $pollingInterval = 1000,
+        int $maxWaitTime = 60
+    ): ReplicatePrediction {
+        $startTime = time();
+        $maxWaitSeconds = $maxWaitTime;
+
+        while (true) {
+            $prediction = $this->getPrediction($client, $predictionId);
+
+            if ($prediction->isComplete()) {
+                return $prediction;
+            }
+
+            if (time() - $startTime > $maxWaitSeconds) {
+                throw new PrismException(
+                    "Replicate: prediction timed out after {$maxWaitSeconds} seconds"
+                );
+            }
+
+            // Convert milliseconds to microseconds for usleep
+            usleep($pollingInterval * 1000);
+        }
+    }
+
+    /**
+     * Cancel a prediction.
+     *
+     * @throws RequestException
+     */
+    protected function cancelPrediction(PendingRequest $client, string $predictionId): ReplicatePrediction
+    {
+        $response = $client->post("/predictions/{$predictionId}/cancel");
+
+        if ($response->failed()) {
+            throw new RequestException($response);
+        }
+
+        return ReplicatePrediction::fromArray($response->json());
+    }
+
+    /**
+     * Create a prediction and wait for completion.
+     * Uses sync mode (Prefer: wait) if enabled, otherwise polls.
+     *
+     * @param  array<string, mixed>  $payload
+     * @param  bool  $useSyncMode  Whether to use Prefer: wait header
+     *
+     * @throws RequestException
+     * @throws PrismException
+     */
+    protected function createAndWaitForPrediction(
+        PendingRequest $client,
+        array $payload,
+        bool $useSyncMode = true,
+        int $pollingInterval = 1000,
+        int $maxWaitTime = 60
+    ): ReplicatePrediction {
+        if ($useSyncMode) {
+            // Use sync mode: Prefer: wait header
+            $prediction = $this->createPrediction($client, $payload, wait: true, waitTimeout: $maxWaitTime);
+
+            // If prediction is still not complete (timed out), fall back to polling
+            if (! $prediction->isComplete()) {
+                return $this->waitForPrediction($client, $prediction->id, $pollingInterval, $maxWaitTime);
+            }
+
+            return $prediction;
+        }
+
+        // Use async mode: create then poll
+        $prediction = $this->createPrediction($client, $payload);
+
+        return $this->waitForPrediction($client, $prediction->id, $pollingInterval, $maxWaitTime);
+    }
+}
diff --git a/src/Providers/Replicate/Handlers/Audio.php b/src/Providers/Replicate/Handlers/Audio.php
new file mode 100644
index 000000000..86fe480cd
--- /dev/null
+++ b/src/Providers/Replicate/Handlers/Audio.php
@@ -0,0 +1,225 @@
+<?php
+
+declare(strict_types=1);
+
+namespace Prism\Prism\Providers\Replicate\Handlers;
+
+use Illuminate\Http\Client\PendingRequest;
+use Prism\Prism\Audio\AudioResponse;
+use Prism\Prism\Audio\SpeechToTextRequest;
+use Prism\Prism\Audio\TextResponse;
+use Prism\Prism\Audio\TextToSpeechRequest;
+use Prism\Prism\Exceptions\PrismException;
+use Prism\Prism\Providers\Replicate\Concerns\HandlesPredictions;
+use Prism\Prism\ValueObjects\GeneratedAudio;
+use Prism\Prism\ValueObjects\Usage;
+
+class Audio
+{
+    use HandlesPredictions;
+
+    public function __construct(
+        protected PendingRequest $client,
+        protected bool $useSyncMode = true,
+        protected int $pollingInterval = 1000,
+        protected int $maxWaitTime = 60,
+    ) {}
+
+    public function handleTextToSpeech(TextToSpeechRequest $request): AudioResponse
+    {
+        // Build input parameters for text-to-speech
+        $input = [
+            'text' => $request->input(),
+        ];
+
+        // Add provider-specific options (voice, speed, etc.)
+        $providerOptions = $request->providerOptions();
+        if (! empty($providerOptions)) {
+            $input = array_merge($input, $providerOptions);
+        }
+
+        // Create prediction
+        $payload = [
+            'version' => $this->extractVersionFromModel($request->model()),
+            'input' => $input,
+        ];
+
+        // Create prediction and wait for completion (uses sync mode if enabled)
+        $prediction = $this->createAndWaitForPrediction(
+            $this->client,
+            $payload,
+            $this->useSyncMode,
+            $this->pollingInterval,
+            $this->maxWaitTime
+        );
+
+        // Check for errors
+        if ($prediction->isFailed()) {
+            throw new PrismException(
+                "Replicate TTS prediction failed: {$prediction->error}"
+            );
+        }
+
+        // Extract audio URL from output
+        $audioUrl = $this->extractAudioUrl($prediction->output);
+
+        if (! $audioUrl) {
+            throw new PrismException('No audio output found in Replicate response');
+        }
+
+        // Download audio content
+        $audioContent = $this->client->get($audioUrl)->body();
+        $base64Audio = base64_encode($audioContent);
+
+        return new AudioResponse(
+            audio: new GeneratedAudio(
+                base64: $base64Audio,
+            ),
+        );
+    }
+
+    public function handleSpeechToText(SpeechToTextRequest $request): TextResponse
+    {
+        $audioInput = $request->input()->url();
+
+        // If "URL" is actually a local file path, or if we have raw content, convert to data URL
+        if ($audioInput && is_file($audioInput)) {
+            // URL is actually a local file path
+            $content = file_get_contents($audioInput);
+            if ($content === false) {
+                throw new PrismException("Failed to read audio file: {$audioInput}");
+            }
+            $base64 = base64_encode($content);
+            $mimeType = mime_content_type($audioInput) ?: 'audio/mpeg';
+            $audioInput = "data:{$mimeType};base64,{$base64}";
+        } elseif (! $audioInput && $request->input()->rawContent()) {
+            // No URL but we have content (using fromLocalPath)
+            $base64 = $request->input()->base64();
+            $mimeType = $request->input()->mimeType() ?? 'audio/mpeg';
+            $audioInput = "data:{$mimeType};base64,{$base64}";
+        }
+
+        // Build input parameters for speech-to-text
+        $input = [
+            'audio' => $audioInput,
+            'task' => 'transcribe',
+        ];
+
+        // Add provider-specific options (language, etc.)
+        $providerOptions = $request->providerOptions();
+        if (! empty($providerOptions)) {
+            $input = array_merge($input, $providerOptions);
+        }
+
+        // Create prediction
+        $payload = [
+            'version' => $this->extractVersionFromModel($request->model()),
+            'input' => $input,
+        ];
+
+        // Create prediction and wait for completion (uses sync mode if enabled)
+        $prediction = $this->createAndWaitForPrediction(
+            $this->client,
+            $payload,
+            $this->useSyncMode,
+            $this->pollingInterval,
+            $this->maxWaitTime
+        );
+
+        // Check for errors
+        if ($prediction->isFailed()) {
+            throw new PrismException(
+                "Replicate STT prediction failed: {$prediction->error}"
+            );
+        }
+
+        // Extract text from output
+        $text = $this->extractTextFromOutput($prediction->output);
+
+        return new TextResponse(
+            text: $text,
+            usage: new Usage(
+                promptTokens: 0,
+                completionTokens: 0,
+            ),
+            additionalContent: [
+                'metrics' => $prediction->metrics,
+            ],
+        );
+    }
+
+    /**
+     * Extract version ID from model string.
+     */
+    protected function extractVersionFromModel(string $model): string
+    {
+        // Otherwise, return as-is and let Replicate use the latest version
+        return $model;
+    }
+
+    /**
+     * Extract audio URL from Replicate output.
+     */
+    protected function extractAudioUrl(mixed $output): ?string
+    {
+        if (is_string($output)) {
+            return $output;
+        }
+
+        if (is_array($output)) {
+            // Output might be an array with a URL
+            if (isset($output[0]) && is_string($output[0])) {
+                return $output[0];
+            }
+
+            // Or it might have an 'audio' or 'url' key
+            if (isset($output['audio'])) {
+                return is_string($output['audio']) ? $output['audio'] : null;
+            }
+
+            if (isset($output['url'])) {
+                return is_string($output['url']) ? $output['url'] : null;
+            }
+        }
+
+        return null;
+    }
+
+    /**
+     * Extract text from Replicate output.
+     */
+    protected function extractTextFromOutput(mixed $output): string
+    {
+        if (is_string($output)) {
+            return $output;
+        }
+
+        if (is_array($output)) {
+            // Check for common keys
+            if (isset($output['text'])) {
+                return (string) $output['text'];
+            }
+
+            if (isset($output['transcription'])) {
+                return (string) $output['transcription'];
+            }
+
+            if (isset($output['segments'])) {
+                // Whisper-style output with segments
+                /** @var array<array{text: string}> $segments */
+                $segments = $output['segments'];
+
+                return collect($segments)
+                    ->pluck('text')
+                    ->join(' ');
+            }
+
+            // If it's an array of strings, join them
+            if (isset($output[0]) && is_string($output[0])) {
+                return implode('', $output);
+            }
+        }
+
+        return '';
+    }
+}
diff --git a/src/Providers/Replicate/Handlers/Embeddings.php b/src/Providers/Replicate/Handlers/Embeddings.php
new file mode 100644
index 000000000..85b5dba86
--- /dev/null
+++ b/src/Providers/Replicate/Handlers/Embeddings.php
@@ -0,0 +1,103 @@
+<?php
+
+declare(strict_types=1);
+
+namespace Prism\Prism\Providers\Replicate\Handlers;
+
+use Illuminate\Http\Client\PendingRequest;
+use Prism\Prism\Embeddings\Request;
+use Prism\Prism\Embeddings\Response as EmbeddingsResponse;
+use Prism\Prism\Providers\Replicate\Concerns\HandlesPredictions;
+use Prism\Prism\ValueObjects\Embedding;
+use Prism\Prism\ValueObjects\EmbeddingsUsage;
+use Prism\Prism\ValueObjects\Meta;
+
+class Embeddings
+{
+    use HandlesPredictions;
+
+    public function __construct(
+        protected PendingRequest $client,
+        protected bool $useSyncMode = true,
+        protected int $pollingInterval = 1000,
+        protected int $maxWaitTime = 60
+    ) {}
+
+    public function handle(Request $request): EmbeddingsResponse
+    {
+        $embeddings = [];
+        $totalTokens = 0;
+
+        // Process each input separately
+        foreach ($request->inputs() as $input) {
+            $payload = [
+                'version' => $this->extractVersionFromModel($request->model()),
+                'input' => array_merge(
+                    ['text' => $input],
+                    $this->buildInputParameters($request)
+                ),
+            ];
+
+            // Create prediction and wait for completion (uses sync mode if enabled)
+            $completedPrediction = $this->createAndWaitForPrediction(
+                $this->client,
+                $payload,
+                $this->useSyncMode,
+                $this->pollingInterval,
+                $this->maxWaitTime
+            );
+
+            // Extract embedding from output
+            $vectors = $completedPrediction->output['vectors'] ?? [];
+            if (! empty($vectors)) {
+                $embeddings[] = Embedding::fromArray($vectors);
+                $totalTokens += $this->estimateTokens($input);
+            }
+        }
+
+        return new EmbeddingsResponse(
+            embeddings: $embeddings,
+            usage: new EmbeddingsUsage($totalTokens),
+            meta: new Meta(
+                id: '',
+                model: $request->model(),
+            ),
+        );
+    }
+
+    /**
+     * Build input parameters from request.
+     *
+     * @return array<string, mixed>
+     */
+    protected function buildInputParameters(Request $request): array
+    {
+        $params = [];
+
+        // Map provider options
+        foreach ($request->providerOptions() as $key => $value) {
+            $params[$key] = $value;
+        }
+
+        return $params;
+    }
+
+    /**
+     * Estimate tokens for usage tracking.
+     * Rough approximation: ~4 characters per token.
+     */
+    protected function estimateTokens(string $text): int
+    {
+        return (int) ceil(mb_strlen($text) / 4);
+    }
+
+    /**
+     * Extract version from model string.
+     * Supports formats like "owner/model:version" or just "owner/model".
+     */
+    protected function extractVersionFromModel(string $model): string
+    {
+        // Return as-is and let Replicate use the latest version or resolve the format
+        return $model;
+    }
+}
diff --git a/src/Providers/Replicate/Handlers/Images.php b/src/Providers/Replicate/Handlers/Images.php
new file mode 100644
index 000000000..80247467f
--- /dev/null
+++ b/src/Providers/Replicate/Handlers/Images.php
@@ -0,0 +1,146 @@
+<?php
+
+declare(strict_types=1);
+
+namespace Prism\Prism\Providers\Replicate\Handlers;
+
+use Illuminate\Http\Client\PendingRequest;
+use Illuminate\Support\Facades\Http;
+use Prism\Prism\Images\Request;
+use Prism\Prism\Images\Response;
+use Prism\Prism\Images\ResponseBuilder;
+use Prism\Prism\Providers\Replicate\Concerns\HandlesPredictions;
+use Prism\Prism\ValueObjects\GeneratedImage;
+use Prism\Prism\ValueObjects\Meta;
+use Prism\Prism\ValueObjects\Usage;
+
+class Images
+{
+    use HandlesPredictions;
+
+    public function __construct(
+        protected PendingRequest $client,
+        protected bool $useSyncMode = true,
+        protected int $pollingInterval = 1000,
+        protected int $maxWaitTime = 60
+    ) {}
+
+    public function handle(Request $request): Response
+    {
+        // Prepare the prediction payload
+        $payload = [
+            'version' => $this->extractVersionFromModel($request->model()),
+            'input' => array_merge(
+                ['prompt' => $request->prompt()],
+                $this->buildInputParameters($request)
+            ),
+        ];
+
+        // Create prediction and wait for completion (uses sync mode if enabled)
+        $completedPrediction = $this->createAndWaitForPrediction(
+            $this->client,
+            $payload,
+            $this->useSyncMode,
+            $this->pollingInterval,
+            $this->maxWaitTime
+        );
+
+        // Extract images from output
+        $images = $this->extractImages($completedPrediction->output ?? []);
+
+        $responseBuilder = new ResponseBuilder(
+            usage: new Usage(
+                promptTokens: 0, // Replicate doesn't provide token usage for image generation
+                completionTokens: 0,
+            ),
+            meta: new Meta(
+                id: $completedPrediction->id,
+                model: $request->model(),
+            ),
+            images: $images,
+        );
+
+        return $responseBuilder->toResponse();
+    }
+
+    /**
+     * Build input parameters from request.
+     *
+     * @return array<string, mixed>
+     */
+    protected function buildInputParameters(Request $request): array
+    {
+        $params = [];
+
+        // Map provider options
+        foreach ($request->providerOptions() as $key => $value) {
+            $params[$key] = $value;
+        }
+
+        return $params;
+    }
+
+    /**
+     * Extract images from prediction output.
+     *
+     * @return GeneratedImage[]
+     */
+    protected function extractImages(mixed $output): array
+    {
+        $images = [];
+
+        // Replicate returns either a single URL string or an array of URLs
+        if (is_string($output)) {
+            $output = [$output];
+        }
+
+        if (! is_array($output)) {
+            return $images;
+        }
+
+        foreach ($output as $imageUrl) {
+            if (is_string($imageUrl)) {
+                // Download the image and convert to base64
+                $base64 = $this->downloadImageAsBase64($imageUrl);
+
+                $images[] = new GeneratedImage(
+                    url: $imageUrl,
+                    base64: $base64, // Replicate doesn't provide revised prompts
+                );
+            }
+        }
+
+        return $images;
+    }
+
+    /**
+     * Download an image from URL and convert to base64.
+     */
+    protected function downloadImageAsBase64(string $url): ?string
+    {
+        try {
+            $response = Http::get($url);
+
+            if ($response->successful()) {
+                return base64_encode($response->body());
+            }
+        } catch (\Exception) {
+            // If download fails, return null and rely on URL
+        }
+
+        return null;
+    }
+
+    /**
+     * Extract version from model string.
+     */
+    /**
+     * Extract version ID from model string.
+     * Supports formats like "owner/model:version" or just "owner/model".
+     */
+    protected function extractVersionFromModel(string $model): string
+    {
+        // Return as-is and let Replicate use the latest version or resolve the format
+        return $model;
+    }
+}
diff --git a/src/Providers/Replicate/Handlers/Stream.php b/src/Providers/Replicate/Handlers/Stream.php
new file mode 100644
index 000000000..66b94ca92
--- /dev/null
+++ b/src/Providers/Replicate/Handlers/Stream.php
@@ -0,0 +1,329 @@
+<?php
+
+declare(strict_types=1);
+
+namespace Prism\Prism\Providers\Replicate\Handlers;
+
+use Generator;
+use Illuminate\Http\Client\PendingRequest;
+use Prism\Prism\Exceptions\PrismException;
+use Prism\Prism\Providers\Replicate\Concerns\HandlesPredictions;
+use Prism\Prism\Providers\Replicate\Maps\FinishReasonMap;
+use Prism\Prism\Providers\Replicate\Maps\MessageMap;
+use Prism\Prism\Streaming\EventID;
+use Prism\Prism\Streaming\Events\StreamEndEvent;
+use Prism\Prism\Streaming\Events\StreamEvent;
+use Prism\Prism\Streaming\Events\StreamStartEvent;
+use Prism\Prism\Streaming\Events\TextCompleteEvent;
+use Prism\Prism\Streaming\Events\TextDeltaEvent;
+use Prism\Prism\Streaming\Events\TextStartEvent;
+use Prism\Prism\Streaming\StreamState;
+use Prism\Prism\Text\Request;
+use Prism\Prism\ValueObjects\Usage;
+use Psr\Http\Message\StreamInterface;
+use Throwable;
+
+class Stream
+{
+    use HandlesPredictions;
+
+    protected StreamState $state;
+
+    public function __construct(
+        protected PendingRequest $client,
+        protected bool $useSyncMode = true,
+        protected int $pollingInterval = 1000,
+        protected int $maxWaitTime = 60
+    ) {
+        $this->state = new StreamState;
+    }
+
+    /**
+     * @return Generator<StreamEvent>
+     */
+    public function handle(Request $request): Generator
+    {
+        // Tool calling is not supported with streaming
+        if ($request->tools() !== []) {
+            throw new PrismException(
+                'Replicate: Tool calling is not supported with streaming. '
+                .'Use ->generate() instead of ->stream()'
+            );
+        }
+
+        $this->state->reset()->withMessageId(EventID::generate());
+
+        // Build the prompt from messages
+        $prompt = MessageMap::map($request->messages());
+
+        // Prepare the prediction payload with stream enabled
+        $payload = [
+            'version' => $this->extractVersionFromModel($request->model()),
+            'input' => array_merge(
+                ['prompt' => $prompt],
+                $this->buildInputParameters($request)
+            ),
+            'stream' => true, // Enable streaming
+        ];
+
+        // Create prediction
+        $prediction = $this->createPrediction($this->client, $payload);
+
+        // Emit stream start
+        yield new StreamStartEvent(
+            id: EventID::generate(),
+            timestamp: time(),
+            model: $request->model(),
+            provider: 'replicate',
+        );
+
+        // Check if streaming URL is available
+        $streamUrl = $prediction->urls['stream'] ?? null;
+
+        if ($streamUrl !== null) {
+            // Use real-time SSE streaming
+            yield from $this->processSSEStream($streamUrl, $prediction->id);
+        } else {
+            // Fallback to simulated streaming (poll + tokenize)
+            $completedPrediction = $this->waitForPrediction(
+                $this->client,
+                $prediction->id,
+                $this->pollingInterval,
+                $this->maxWaitTime
+            );
+
+            yield from $this->processTokenizedOutput($completedPrediction);
+        }
+    }
+
+    /**
+     * Process real-time SSE stream from Replicate.
+     *
+     * @return Generator<StreamEvent>
+     */
+    protected function processSSEStream(string $streamUrl, string $predictionId): Generator
+    {
+        // Connect to the SSE stream with proper headers
+        $response = $this->client
+            ->withHeaders(['Accept' => 'text/event-stream'])
+            ->withOptions(['stream' => true])
+            ->get($streamUrl);
+
+        $stream = $response->getBody();
+
+        $textStarted = false;
+        $finalStatus = 'succeeded';
+        $metrics = [];
+        $currentEvent = null;
+
+        try {
+            while (! $stream->eof()) {
+                $line = $this->readLine($stream);
+                // Skip empty lines and comments
+                if ($line === '') {
+                    continue;
+                }
+                if ($line === "\n") {
+                    continue;
+                }
+                if (str_starts_with($line, ':')) {
+                    continue;
+                }
+
+                // Parse SSE field
+                if (str_starts_with($line, 'event:')) {
+                    $currentEvent = trim(substr($line, strlen('event:')));
+                } elseif (str_starts_with($line, 'data:')) {
+                    $data = substr($line, strlen('data:'));
+                    // Remove leading space if present (SSE spec)
+                    if (str_starts_with($data, ' ')) {
+                        $data = substr($data, 1);
+                    }
+                    // Remove trailing newline
+                    $data = rtrim($data, "\n");
+
+                    // Handle event based on type
+                    if ($currentEvent === 'output') {
+                        // Text output event (data is plain text)
+                        if (! $textStarted) {
+                            yield new TextStartEvent(
+                                id: EventID::generate(),
+                                timestamp: time(),
+                                messageId: $this->state->messageId()
+                            );
+                            $textStarted = true;
+                        }
+
+                        if ($data !== '') {
+                            $this->state->appendText($data);
+
+                            yield new TextDeltaEvent(
+                                id: EventID::generate(),
+                                timestamp: time(),
+                                delta: $data,
+                                messageId: $this->state->messageId()
+                            );
+                        }
+                    } elseif ($currentEvent === 'done') {
+                        // Stream completion event (data is JSON)
+                        try {
+                            $doneData = json_decode($data, true, flags: JSON_THROW_ON_ERROR);
+                            $finalStatus = $doneData['status'] ?? 'succeeded';
+                            $metrics = $doneData['metrics'] ?? [];
+                        } catch (Throwable) {
+                            // Empty done event
+                            $finalStatus = 'succeeded';
+                        }
+                        break;
+                    } elseif ($currentEvent === 'error') {
+                        // Error event (data is JSON)
+                        try {
+                            $errorData = json_decode($data, true, flags: JSON_THROW_ON_ERROR);
+                            $errorMessage = $errorData['detail'] ?? $data;
+                        } catch (Throwable) {
+                            $errorMessage = $data;
+                        }
+                        throw new PrismException("Replicate streaming error: {$errorMessage}");
+                    }
+
+                    // Reset event type after processing
+                    $currentEvent = null;
+                }
+            }
+        } finally {
+            $stream->close();
+        }
+
+        // Emit text complete if text was started
+        if ($textStarted) {
+            yield new TextCompleteEvent(
+                id: EventID::generate(),
+                timestamp: time(),
+                messageId: $this->state->messageId()
+            );
+        }
+
+        // Emit stream end
+        yield new StreamEndEvent(
+            id: EventID::generate(),
+            timestamp: time(),
+            finishReason: FinishReasonMap::map($finalStatus),
+            usage: new Usage(
+                promptTokens: $metrics['input_token_count'] ?? 0,
+                completionTokens: $metrics['output_token_count'] ?? 0,
+            ),
+        );
+    }
+
+    /**
+     * Read a single line from the stream.
+     */
+    protected function readLine(StreamInterface $stream): string
+    {
+        $buffer = '';
+
+        while (! $stream->eof()) {
+            $byte = $stream->read(1);
+
+            if ($byte === '') {
+                return $buffer;
+            }
+
+            $buffer .= $byte;
+
+            if ($byte === "\n") {
+                break;
+            }
+        }
+
+        return $buffer;
+    }
+
+    /**
+     * Process tokenized output as streaming events (fallback method).
+     *
+     * @param  object{id: string, status: string, output: mixed, error: string|null, metrics: array<string, mixed>}  $prediction
+     * @return Generator<StreamEvent>
+     */
+    protected function processTokenizedOutput(object $prediction): Generator
+    {
+        $output = $prediction->output ?? [];
+
+        if (! is_array($output)) {
+            $output = [$output];
+        }
+
+        // Emit text start
+        yield new TextStartEvent(
+            id: EventID::generate(),
+            timestamp: time(),
+            messageId: $this->state->messageId()
+        );
+
+        // Stream each token as a delta
+        foreach ($output as $token) {
+            if (is_string($token) && $token !== '') {
+                $this->state->appendText($token);
+
+                yield new TextDeltaEvent(
+                    id: EventID::generate(),
+                    timestamp: time(),
+                    delta: $token,
+                    messageId: $this->state->messageId()
+                );
+            }
+        }
+
+        // Emit text complete
+        yield new TextCompleteEvent(
+            id: EventID::generate(),
+            timestamp: time(),
+            messageId: $this->state->messageId()
+        );
+
+        // Emit stream end
+        yield new StreamEndEvent(
+            id: EventID::generate(),
+            timestamp: time(),
+            finishReason: FinishReasonMap::map($prediction->status),
+            usage: new Usage(
+                promptTokens: $prediction->metrics['input_token_count'] ?? 0,
+                completionTokens: $prediction->metrics['output_token_count'] ?? 0,
+            ),
+        );
+    }
+
+    /**
+     * Build input parameters from request.
+     *
+     * @return array<string, mixed>
+     */
+    protected function buildInputParameters(Request $request): array
+    {
+        $params = [];
+
+        if ($request->maxTokens()) {
+            $params['max_tokens'] = $request->maxTokens();
+        }
+
+        // Map provider options
+        foreach ($request->providerOptions() as $key => $value) {
+            $params[$key] = $value;
+        }
+
+        return $params;
+    }
+
+    /**
+     * Extract version from model string.
+     */
+    /**
+     * Extract version ID from model string.
+     * Supports formats like "owner/model:version" or just "owner/model".
+     */
+    protected function extractVersionFromModel(string $model): string
+    {
+        // Return as-is and let Replicate use the latest version or resolve the format
+        return $model;
+    }
+}
diff --git a/src/Providers/Replicate/Handlers/Structured.php b/src/Providers/Replicate/Handlers/Structured.php
new file mode 100644
index 000000000..29b11bbdc
--- /dev/null
+++ b/src/Providers/Replicate/Handlers/Structured.php
@@ -0,0 +1,184 @@
+<?php
+
+declare(strict_types=1);
+
+namespace Prism\Prism\Providers\Replicate\Handlers;
+
+use Illuminate\Http\Client\PendingRequest;
+use Prism\Prism\Enums\StructuredMode;
+use Prism\Prism\Exceptions\PrismException;
+use Prism\Prism\Providers\Replicate\Concerns\HandlesPredictions;
+use Prism\Prism\Providers\Replicate\Maps\FinishReasonMap;
+use Prism\Prism\Providers\Replicate\Maps\MessageMap;
+use Prism\Prism\Structured\Request;
+use Prism\Prism\Structured\Response;
+use Prism\Prism\Structured\ResponseBuilder;
+use Prism\Prism\Structured\Step;
+use Prism\Prism\ValueObjects\Messages\AssistantMessage;
+use Prism\Prism\ValueObjects\Meta;
+use Prism\Prism\ValueObjects\Usage;
+
+class Structured
+{
+    use HandlesPredictions;
+
+    protected ResponseBuilder $responseBuilder;
+
+    public function __construct(
+        protected PendingRequest $client,
+        protected bool $useSyncMode = true,
+        protected int $pollingInterval = 1000,
+        protected int $maxWaitTime = 60
+    ) {
+        $this->responseBuilder = new ResponseBuilder;
+    }
+
+    public function handle(Request $request): Response
+    {
+        // Build the prompt from messages with JSON instruction
+        $prompt = $this->buildStructuredPrompt($request);
+
+        // Prepare the prediction payload
+        $payload = [
+            'version' => $this->extractVersionFromModel($request->model()),
+            'input' => array_merge(
+                ['prompt' => $prompt],
+                $this->buildInputParameters($request)
+            ),
+        ];
+
+        // Create prediction and wait for completion (uses sync mode if enabled)
+        $completedPrediction = $this->createAndWaitForPrediction(
+            $this->client,
+            $payload,
+            $this->useSyncMode,
+            $this->pollingInterval,
+            $this->maxWaitTime
+        );
+
+        // Extract and parse JSON output
+        $text = $this->extractTextFromOutput($completedPrediction->output ?? []);
+        $structured = $this->parseStructuredOutput($text, $request);
+
+        $responseMessage = new AssistantMessage($text);
+        $request->addMessage($responseMessage);
+
+        $this->addStep($completedPrediction, $text, $structured, $request);
+
+        return $this->responseBuilder->toResponse();
+    }
+
+    /**
+     * Build prompt with JSON instructions based on mode.
+     */
+    protected function buildStructuredPrompt(Request $request): string
+    {
+        $basePrompt = MessageMap::map($request->messages());
+
+        // Add JSON instruction based on mode
+        $schemaJson = json_encode($request->schema()->toArray(), JSON_PRETTY_PRINT);
+        $jsonInstruction = match ($request->mode()) {
+            StructuredMode::Json, StructuredMode::Auto => "\n\nRespond ONLY with valid JSON that matches this schema: ".$schemaJson,
+            StructuredMode::Structured => "\n\nRespond ONLY with valid JSON that matches this schema: ".$schemaJson,
+        };
+
+        return $basePrompt.$jsonInstruction;
+    }
+
+    /**
+     * Build input parameters from request.
+     *
+     * @return array<string, mixed>
+     */
+    protected function buildInputParameters(Request $request): array
+    {
+        $params = ['max_tokens' => 4096]; // Default for structured output
+
+        // Map provider options
+        foreach ($request->providerOptions() as $key => $value) {
+            $params[$key] = $value;
+        }
+
+        return $params;
+    }
+
+    /**
+     * Extract text from prediction output.
+     */
+    protected function extractTextFromOutput(mixed $output): string
+    {
+        if (is_string($output)) {
+            return $output;
+        }
+
+        if (is_array($output)) {
+            return implode('', $output);
+        }
+
+        return '';
+    }
+
+    /**
+     * Parse structured output based on mode.
+     *
+     * @return array<string, mixed>
+     */
+    protected function parseStructuredOutput(string $text, Request $request): array
+    {
+        // Try to extract JSON from the response
+        $jsonMatch = [];
+        if (preg_match('/\{.*\}/s', $text, $jsonMatch)) {
+            $json = json_decode($jsonMatch[0], true);
+
+            if (json_last_error() === JSON_ERROR_NONE && is_array($json)) {
+                return $json;
+            }
+        }
+
+        // If we can't parse JSON in structured modes, throw an exception
+        if (in_array($request->mode(), [StructuredMode::Json, StructuredMode::Structured])) {
+            throw new PrismException('Replicate: Failed to parse structured JSON output');
+        }
+
+        return [];
+    }
+
+    /**
+     * Add step to response builder.
+     *
+     * @param  object{id: string, status: string, output: mixed, error: string|null, metrics: array<string, mixed>}  $prediction
+     * @param  array<string, mixed>  $structured
+     */
+    protected function addStep(object $prediction, string $text, array $structured, Request $request): void
+    {
+        $this->responseBuilder->addStep(new Step(
+            text: $text,
+            finishReason: FinishReasonMap::map($prediction->status),
+            usage: new Usage(
+                promptTokens: $prediction->metrics['input_token_count'] ?? 0,
+                completionTokens: $prediction->metrics['output_token_count'] ?? 0,
+            ),
+            meta: new Meta(
+                id: $prediction->id,
+                model: $request->model(),
+            ),
+            messages: $request->messages(),
+            systemPrompts: $request->systemPrompts(),
+            additionalContent: [],
+            structured: $structured,
+        ));
+    }
+
+    /**
+     * Extract version from model string.
+     */
+    /**
+     * Extract version ID from model string.
+     * Supports formats like "owner/model:version" or just "owner/model".
+     */
+    protected function extractVersionFromModel(string $model): string
+    {
+        // Return as-is and let Replicate use the latest version or resolve the format
+        return $model;
+    }
+}
diff --git a/src/Providers/Replicate/Handlers/Text.php b/src/Providers/Replicate/Handlers/Text.php
new file mode 100644
index 000000000..e1a4a302f
--- /dev/null
+++ b/src/Providers/Replicate/Handlers/Text.php
@@ -0,0 +1,199 @@
+<?php
+
+declare(strict_types=1);
+
+namespace Prism\Prism\Providers\Replicate\Handlers;
+
+use Illuminate\Http\Client\PendingRequest;
+use Prism\Prism\Exceptions\PrismException;
+use Prism\Prism\Providers\Replicate\Concerns\HandlesPredictions;
+use Prism\Prism\Providers\Replicate\Maps\FinishReasonMap;
+use Prism\Prism\Providers\Replicate\Maps\MessageMap;
+use Prism\Prism\Text\Request;
+use Prism\Prism\Text\Response;
+use Prism\Prism\Text\ResponseBuilder;
+use Prism\Prism\Text\Step;
+use Prism\Prism\ValueObjects\Messages\AssistantMessage;
+use Prism\Prism\ValueObjects\Meta;
+use Prism\Prism\ValueObjects\Usage;
+
+class Text
+{
+    use HandlesPredictions;
+
+    protected ResponseBuilder $responseBuilder;
+
+    public function __construct(
+        protected PendingRequest $client,
+        protected bool $useSyncMode = true,
+        protected int $pollingInterval = 1000,
+        protected int $maxWaitTime = 60,
+    ) {
+        $this->responseBuilder = new ResponseBuilder;
+    }
+
+    public function handle(Request $request): Response
+    {
+        // Build payload
+        $payload = $this->buildPayload($request);
+
+        // Create prediction and wait for completion (uses sync mode if enabled)
+        $prediction = $this->createAndWaitForPrediction(
+            $this->client,
+            $payload,
+            $this->useSyncMode,
+            $this->pollingInterval,
+            $this->maxWaitTime
+        );
+
+        // Check for errors
+        if ($prediction->isFailed()) {
+            throw new PrismException(
+                "Replicate prediction failed: {$prediction->error}"
+            );
+        }
+
+        // Extract the text output
+        $text = $this->extractTextFromOutput($prediction->output);
+
+        // Create assistant message
+        $responseMessage = new AssistantMessage(
+            content: $text,
+        );
+
+        $request->addMessage($responseMessage);
+
+        // Add step to response builder
+        $this->responseBuilder->addStep(new Step(
+            text: $text,
+            finishReason: FinishReasonMap::map($prediction->status),
+            toolCalls: [],
+            toolResults: [],
+            usage: new Usage(
+                promptTokens: 0, // Replicate doesn't provide token counts
+                completionTokens: 0,
+            ),
+            meta: new Meta(
+                id: $prediction->id,
+                model: $request->model(),
+            ),
+            messages: $request->messages(),
+            systemPrompts: $request->systemPrompts(),
+            additionalContent: [
+                'metrics' => $prediction->metrics,
+            ],
+        ));
+
+        return $this->responseBuilder->toResponse();
+    }
+
+    /**
+     * Build the prediction payload.
+     *
+     * @return array<string, mixed>
+     */
+    protected function buildPayload(Request $request): array
+    {
+        return [
+            'version' => $this->extractVersionFromModel($request->model()),
+            'input' => $this->buildInput($request),
+        ];
+    }
+
+    /**
+     * Build input parameters.
+     *
+     * @return array<string, mixed>
+     */
+    protected function buildInput(Request $request): array
+    {
+        $input = ['prompt' => MessageMap::map($request->messages())];
+
+        // Build system prompt
+        if ($request->systemPrompts() !== []) {
+            $input['system_prompt'] = implode("\n\n", array_map(
+                fn ($prompt): string => $prompt->content,
+                $request->systemPrompts()
+            ));
+        }
+
+        // Add other parameters
+        $params = $this->buildInputParameters($request);
+
+        return array_merge($input, $params);
+    }
+
+    /**
+     * Extract version from model string for the Replicate API.
+     * The version field accepts:
+     * - "owner/model:version" format (uses specific version)
+     * - "owner/model" format (uses latest version)
+     * - Just "version_hash" (64-char hash)
+     */
+    protected function extractVersionFromModel(string $model): string
+    {
+        // Replicate API accepts the full string as-is
+        // It handles owner/model, owner/model:version, or version hash formats
+        return $model;
+    }
+
+    /**
+     * Build input parameters from request.
+     *
+     * @return array<string, mixed>
+     */
+    protected function buildInputParameters(Request $request): array
+    {
+        $params = [];
+
+        if ($request->maxTokens() !== null) {
+            $params['max_tokens'] = $request->maxTokens();
+            $params['max_length'] = $request->maxTokens(); // Some models use max_length
+        }
+
+        if ($request->temperature() !== null) {
+            $params['temperature'] = $request->temperature();
+        }
+
+        if ($request->topP() !== null) {
+            $params['top_p'] = $request->topP();
+        }
+
+        // Add any provider-specific options
+        $providerOptions = $request->providerOptions();
+        if (! empty($providerOptions)) {
+            return array_merge($params, $providerOptions);
+        }
+
+        return $params;
+    }
+
+    /**
+     * Extract text from Replicate output.
+     * Replicate outputs can be strings, arrays, or objects.
+     */
+    protected function extractTextFromOutput(mixed $output): string
+    {
+        if (is_string($output)) {
+            return $output;
+        }
+
+        if (is_array($output)) {
+            // If it's an array of strings, join them
+            if (isset($output[0]) && is_string($output[0])) {
+                return implode('', $output);
+            }
+
+            // If it has a 'text' or 'output' key
+            if (isset($output['text'])) {
+                return (string) $output['text'];
+            }
+
+            if (isset($output['output'])) {
+                return $this->extractTextFromOutput($output['output']);
+            }
+        }
+
+        return '';
+    }
+}
diff --git a/src/Providers/Replicate/Maps/FinishReasonMap.php b/src/Providers/Replicate/Maps/FinishReasonMap.php
new file mode 100644
index 000000000..f17dfbd64
--- /dev/null
+++ b/src/Providers/Replicate/Maps/FinishReasonMap.php
@@ -0,0 +1,19 @@
+<?php
+
+declare(strict_types=1);
+
+namespace Prism\Prism\Providers\Replicate\Maps;
+
+use Prism\Prism\Enums\FinishReason;
+
+class FinishReasonMap
+{
+    public static function map(string $status): FinishReason
+    {
+        return match ($status) {
+            'succeeded' => FinishReason::Stop,
+            'failed', 'canceled' => FinishReason::Error,
+            default => FinishReason::Unknown,
+        };
+    }
+}
diff --git a/src/Providers/Replicate/Maps/MessageMap.php b/src/Providers/Replicate/Maps/MessageMap.php
new file mode 100644
index 000000000..c32858520
--- /dev/null
+++ b/src/Providers/Replicate/Maps/MessageMap.php
@@ -0,0 +1,70 @@
+<?php
+
+declare(strict_types=1);
+
+namespace Prism\Prism\Providers\Replicate\Maps;
+
+use Prism\Prism\Contracts\Message;
+use Prism\Prism\ValueObjects\Messages\AssistantMessage;
+use Prism\Prism\ValueObjects\Messages\SystemMessage;
+use Prism\Prism\ValueObjects\Messages\ToolResultMessage;
+use Prism\Prism\ValueObjects\Messages\UserMessage;
+
+class MessageMap
+{
+    /**
+     * Map Prism messages to Replicate prompt format.
+     *
+     * @param  array<int, Message>  $messages
+     */
+    public static function map(array $messages): string
+    {
+        $prompt = '';
+
+        foreach ($messages as $message) {
+            $prompt .= match ($message::class) {
+                SystemMessage::class => self::mapSystemMessage($message),
+                UserMessage::class => self::mapUserMessage($message),
+                AssistantMessage::class => self::mapAssistantMessage($message),
+                ToolResultMessage::class => self::mapToolResultMessage($message),
+                default => '',
+            };
+        }
+
+        return trim($prompt);
+    }
+
+    protected static function mapSystemMessage(SystemMessage $message): string
+    {
+        return "System: {$message->content}\n\n";
+    }
+
+    protected static function mapUserMessage(UserMessage $message): string
+    {
+        return "User: {$message->text()}\n\n";
+    }
+
+    protected static function mapAssistantMessage(AssistantMessage $message): string
+    {
+        return "Assistant: {$message->content}\n\n";
+    }
+
+    protected static function mapToolResultMessage(ToolResultMessage $message): string
+    {
+        $results = [];
+
+        foreach ($message->toolResults as $result) {
+            $resultText = is_string($result->result)
+                ? $result->result
+                : json_encode($result->result);
+
+            $results[] = sprintf(
+                'Tool: %s\nResult: %s',
+                $result->toolName,
+                $resultText
+            );
+        }
+
+        return "Tool Results:\n".implode("\n\n", $results)."\n\n";
+    }
+}
diff --git a/src/Providers/Replicate/Replicate.php b/src/Providers/Replicate/Replicate.php
new file mode 100644
index 000000000..300aac662
--- /dev/null
+++ b/src/Providers/Replicate/Replicate.php
@@ -0,0 +1,163 @@
+<?php
+
+declare(strict_types=1);
+
+namespace Prism\Prism\Providers\Replicate;
+
+use Generator;
+use Illuminate\Http\Client\PendingRequest;
+use Illuminate\Http\Client\RequestException;
+use Prism\Prism\Audio\AudioResponse as TextToSpeechResponse;
+use Prism\Prism\Audio\SpeechToTextRequest;
+use Prism\Prism\Audio\TextResponse as SpeechToTextResponse;
+use Prism\Prism\Audio\TextToSpeechRequest;
+use Prism\Prism\Concerns\InitializesClient;
+use Prism\Prism\Embeddings\Request as EmbeddingsRequest;
+use Prism\Prism\Embeddings\Response as EmbeddingsResponse;
+use Prism\Prism\Enums\Provider as ProviderName;
+use Prism\Prism\Exceptions\PrismException;
+use Prism\Prism\Exceptions\PrismProviderOverloadedException;
+use Prism\Prism\Exceptions\PrismRateLimitedException;
+use Prism\Prism\Exceptions\PrismRequestTooLargeException;
+use Prism\Prism\Images\Request as ImagesRequest;
+use Prism\Prism\Images\Response as ImagesResponse;
+use Prism\Prism\Providers\Provider;
+use Prism\Prism\Providers\Replicate\Handlers\Audio;
+use Prism\Prism\Providers\Replicate\Handlers\Embeddings;
+use Prism\Prism\Providers\Replicate\Handlers\Images;
+use Prism\Prism\Providers\Replicate\Handlers\Stream;
+use Prism\Prism\Providers\Replicate\Handlers\Structured as StructuredHandler;
+use Prism\Prism\Providers\Replicate\Handlers\Text;
+use Prism\Prism\Structured\Request as StructuredRequest;
+use Prism\Prism\Structured\Response as StructuredResponse;
+use Prism\Prism\Text\Request as TextRequest;
+use Prism\Prism\Text\Response as TextResponse;
+
+class Replicate extends Provider
+{
+    use InitializesClient;
+
+    public function __construct(
+        #[\SensitiveParameter] public readonly string $apiKey,
+        public readonly string $url,
+        public readonly ?string $webhookUrl = null,
+        public readonly bool $useSyncMode = true,
+        public readonly int $pollingInterval = 1000,
+        public readonly int $maxWaitTime = 60,
+    ) {}
+
+    #[\Override]
+    public function text(TextRequest $request): TextResponse
+    {
+        $handler = new Text(
+            $this->client($request->clientOptions(), $request->clientRetry()),
+            $this->useSyncMode,
+            $this->pollingInterval,
+            $this->maxWaitTime
+        );
+
+        return $handler->handle($request);
+    }
+
+    #[\Override]
+    public function textToSpeech(TextToSpeechRequest $request): TextToSpeechResponse
+    {
+        $handler = new Audio(
+            $this->client($request->clientOptions(), $request->clientRetry()),
+            $this->useSyncMode,
+            $this->pollingInterval,
+            $this->maxWaitTime
+        );
+
+        return $handler->handleTextToSpeech($request);
+    }
+
+    #[\Override]
+    public function speechToText(SpeechToTextRequest $request): SpeechToTextResponse
+    {
+        $handler = new Audio(
+            $this->client($request->clientOptions(), $request->clientRetry()),
+            $this->useSyncMode,
+            $this->pollingInterval,
+            $this->maxWaitTime
+        );
+
+        return $handler->handleSpeechToText($request);
+    }
+
+    #[\Override]
+    public function images(ImagesRequest $request): ImagesResponse
+    {
+        $handler = new Images(
+            $this->client($request->clientOptions(), $request->clientRetry()),
+            $this->useSyncMode,
+            $this->pollingInterval,
+            $this->maxWaitTime
+        );
+
+        return $handler->handle($request);
+    }
+
+    #[\Override]
+    public function structured(StructuredRequest $request): StructuredResponse
+    {
+        $handler = new StructuredHandler(
+            $this->client($request->clientOptions(), $request->clientRetry()),
+            $this->useSyncMode,
+            $this->pollingInterval,
+            $this->maxWaitTime
+        );
+
+        return $handler->handle($request);
+    }
+
+    #[\Override]
+    public function stream(TextRequest $request): Generator
+    {
+        $handler = new Stream(
+            $this->client($request->clientOptions(), $request->clientRetry()),
+            $this->useSyncMode,
+            $this->pollingInterval,
+            $this->maxWaitTime
+        );
+
+        return $handler->handle($request);
+    }
+
+    #[\Override]
+    public function embeddings(EmbeddingsRequest $request): EmbeddingsResponse
+    {
+        $handler = new Embeddings(
+            $this->client($request->clientOptions(), $request->clientRetry()),
+            $this->useSyncMode,
+            $this->pollingInterval,
+            $this->maxWaitTime
+        );
+
+        return $handler->handle($request);
+    }
+
+    #[\Override]
+    public function handleRequestException(string $model, RequestException $e): never
+    {
+        match ($e->response->getStatusCode()) {
+            429 => throw PrismRateLimitedException::make([]),
+            529 => throw PrismProviderOverloadedException::make(ProviderName::Replicate),
+            413 => throw PrismRequestTooLargeException::make(ProviderName::Replicate),
+            default => throw PrismException::providerRequestError($model, $e),
+        };
+    }
+
+    /**
+     * @param  array<string, mixed>  $options
+     * @param  array<mixed>  $retry
+     */
+    protected function client(array $options = [], array $retry = [], ?string $baseUrl = null): PendingRequest
+    {
+        return $this->baseClient()
+            ->withToken($this->apiKey)
+            ->withOptions($options)
+            ->when($retry !== [], fn ($client) => $client->retry(...$retry))
+            ->baseUrl($baseUrl ?? $this->url);
+    }
+}
diff --git a/src/Providers/Replicate/ValueObjects/ReplicatePrediction.php b/src/Providers/Replicate/ValueObjects/ReplicatePrediction.php
new file mode 100644
index 000000000..2fe4ee09b
--- /dev/null
+++ b/src/Providers/Replicate/ValueObjects/ReplicatePrediction.php
@@ -0,0 +1,56 @@
+<?php
+
+declare(strict_types=1);
+
+namespace Prism\Prism\Providers\Replicate\ValueObjects;
+
+readonly class ReplicatePrediction
+{
+    /**
+     * @param  array<string, mixed>  $input
+     * @param  array<string, string>  $urls
+     * @param  array<string, float>  $metrics
+     */
+    public function __construct(
+        public string $id,
+        public string $status,
+        public array $input,
+        public mixed $output,
+        public ?string $error,
+        public ?string $logs,
+        public array $urls,
+        public array $metrics = [],
+    ) {}
+
+    /**
+     * @param  array<string, mixed>  $data
+     */
+    public static function fromArray(array $data): self
+    {
+        return new self(
+            id: $data['id'],
+            status: $data['status'],
+            input: $data['input'] ?? [],
+            output: $data['output'] ?? null,
+            error: $data['error'] ?? null,
+            logs: $data['logs'] ?? null,
+            urls: $data['urls'] ?? [],
+            metrics: $data['metrics'] ?? [],
+        );
+    }
+
+    public function isComplete(): bool
+    {
+        return in_array($this->status, ['succeeded', 'failed', 'canceled']);
+    }
+
+    public function isSuccessful(): bool
+    {
+        return $this->status === 'succeeded';
+    }
+
+    public function isFailed(): bool
+    {
+        return in_array($this->status, ['failed', 'canceled']);
+    }
+}
diff --git a/tests/Fixtures/replicate/embeddings-multiple-inputs-1.json b/tests/Fixtures/replicate/embeddings-multiple-inputs-1.json
new file mode 100644
index 000000000..6e63cc5fd
--- /dev/null
+++ b/tests/Fixtures/replicate/embeddings-multiple-inputs-1.json
@@ -0,0 +1,20 @@
+{
+    "id": "g3yqcwrxaxrg80ct9ht9dknmym",
+    "model": "mark3labs\/embeddings-gte-base",
+    "version": "d619cff29338b9a37c3d06605042e1ff0594a8c3eff0175fd6967f5643fc4d47",
+    "input": {
+        "text": "The food was delicious."
+    },
+    "logs": "",
+    "output": null,
+    "data_removed": false,
+    "error": null,
+    "source": "api",
+    "status": "starting",
+    "created_at": "2025-11-04T01:21:09.463Z",
+    "urls": {
+        "cancel": "https:\/\/api.replicate.com\/v1\/predictions\/g3yqcwrxaxrg80ct9ht9dknmym\/cancel",
+        "get": "https:\/\/api.replicate.com\/v1\/predictions\/g3yqcwrxaxrg80ct9ht9dknmym",
+        "web": "https:\/\/replicate.com\/p\/g3yqcwrxaxrg80ct9ht9dknmym"
+    }
+}
\ No newline at end of file
diff --git a/tests/Fixtures/replicate/embeddings-multiple-inputs-2.json b/tests/Fixtures/replicate/embeddings-multiple-inputs-2.json
new file mode 100644
index 000000000..6ca98bc8b
--- /dev/null
+++ b/tests/Fixtures/replicate/embeddings-multiple-inputs-2.json
@@ -0,0 +1,28 @@
+{
+    "id": "g3yqcwrxaxrg80ct9ht9dknmym",
+    "model": "mark3labs/embeddings-gte-base",
+    "version": "d619cff29338b9a37c3d06605042e1ff0594a8c3eff0175fd6967f5643fc4d47",
+    "input": {
+        "text": "The food was delicious."
+    },
+    "logs": "",
+    "output": {
+        "text": "The food was delicious.",
+        "vectors": [0.015234, -0.032451, 0.042132, -0.012543, 0.051234, 0.023451, -0.041234, 0.033214, -0.025341, 0.041235, 0.012341, -0.032145, 0.051432, -0.023451, 0.034512, -0.041235, 0.025341, 0.032145, -0.015234, 0.042134]
+    },
+    "data_removed": false,
+    "error": null,
+    "source": "api",
+    "status": "succeeded",
+    "created_at": "2025-11-04T01:21:10.100Z",
+    "started_at": "2025-11-04T01:21:10.500Z",
+    "completed_at": "2025-11-04T01:21:10.900Z",
+    "urls": {
+        "cancel": "https://api.replicate.com/v1/predictions/g3yqcwrxaxrg80ct9ht9dknmym/cancel",
+        "get": "https://api.replicate.com/v1/predictions/g3yqcwrxaxrg80ct9ht9dknmym"
+    },
+    "metrics": {
+        "predict_time": 0.340000,
+        "total_time": 0.400000
+    }
+}
diff --git a/tests/Fixtures/replicate/embeddings-multiple-inputs-3.json b/tests/Fixtures/replicate/embeddings-multiple-inputs-3.json
new file mode 100644
index 000000000..049a3d923
--- /dev/null
+++ b/tests/Fixtures/replicate/embeddings-multiple-inputs-3.json
@@ -0,0 +1,19 @@
+{
+    "id": "h4zrdxsybysrh90ct9hu0elm1n",
+    "model": "mark3labs/embeddings-gte-base",
+    "version": "d619cff29338b9a37c3d06605042e1ff0594a8c3eff0175fd6967f5643fc4d47",
+    "input": {
+        "text": "The drinks were not so good"
+    },
+    "logs": "",
+    "output": null,
+    "data_removed": false,
+    "error": null,
+    "source": "api",
+    "status": "starting",
+    "created_at": "2025-11-04T01:21:15.100Z",
+    "urls": {
+        "cancel": "https://api.replicate.com/v1/predictions/h4zrdxsybysrh90ct9hu0elm1n/cancel",
+        "get": "https://api.replicate.com/v1/predictions/h4zrdxsybysrh90ct9hu0elm1n"
+    }
+}
diff --git a/tests/Fixtures/replicate/embeddings-multiple-inputs-4.json b/tests/Fixtures/replicate/embeddings-multiple-inputs-4.json
new file mode 100644
index 000000000..9beb8cc35
--- /dev/null
+++ b/tests/Fixtures/replicate/embeddings-multiple-inputs-4.json
@@ -0,0 +1,28 @@
+{
+    "id": "h4zrdxsybysrh90ct9hu0elm1n",
+    "model": "mark3labs/embeddings-gte-base",
+    "version": "d619cff29338b9a37c3d06605042e1ff0594a8c3eff0175fd6967f5643fc4d47",
+    "input": {
+        "text": "The drinks were not so good"
+    },
+    "logs": "",
+    "output": {
+        "text": "The drinks were not so good",
+        "vectors": [-0.025341, 0.041235, -0.012341, 0.032145, -0.051432, 0.023451, -0.034512, 0.041235, -0.025341, -0.032145, 0.015234, -0.042134, 0.032451, -0.042132, 0.012543, -0.051234, -0.023451, 0.041234, -0.033214, 0.025341]
+    },
+    "data_removed": false,
+    "error": null,
+    "source": "api",
+    "status": "succeeded",
+    "created_at": "2025-11-04T01:21:15.100Z",
+    "started_at": "2025-11-04T01:21:15.500Z",
+    "completed_at": "2025-11-04T01:21:15.900Z",
+    "urls": {
+        "cancel": "https://api.replicate.com/v1/predictions/h4zrdxsybysrh90ct9hu0elm1n/cancel",
+        "get": "https://api.replicate.com/v1/predictions/h4zrdxsybysrh90ct9hu0elm1n"
+    },
+    "metrics": {
+        "predict_time": 0.340000,
+        "total_time": 0.400000
+    }
+}
diff --git a/tests/Fixtures/replicate/embeddings-single-input-1.json b/tests/Fixtures/replicate/embeddings-single-input-1.json
new file mode 100644
index 000000000..1ccc5814b
--- /dev/null
+++ b/tests/Fixtures/replicate/embeddings-single-input-1.json
@@ -0,0 +1,20 @@
+{
+    "id": "0sm4g8ceddrgc0ct9hssk54y6m",
+    "model": "mark3labs\/embeddings-gte-base",
+    "version": "d619cff29338b9a37c3d06605042e1ff0594a8c3eff0175fd6967f5643fc4d47",
+    "input": {
+        "text": "The food was delicious and the waiter..."
+    },
+    "logs": "",
+    "output": null,
+    "data_removed": false,
+    "error": null,
+    "source": "api",
+    "status": "starting",
+    "created_at": "2025-11-04T01:20:32.875Z",
+    "urls": {
+        "cancel": "https:\/\/api.replicate.com\/v1\/predictions\/0sm4g8ceddrgc0ct9hssk54y6m\/cancel",
+        "get": "https:\/\/api.replicate.com\/v1\/predictions\/0sm4g8ceddrgc0ct9hssk54y6m",
+        "web": "https:\/\/replicate.com\/p\/0sm4g8ceddrgc0ct9hssk54y6m"
+    }
+}
\ No newline at end of file
diff --git a/tests/Fixtures/replicate/embeddings-single-input-2.json b/tests/Fixtures/replicate/embeddings-single-input-2.json
new file mode 100644
index 000000000..c3cede244
--- /dev/null
+++ b/tests/Fixtures/replicate/embeddings-single-input-2.json
@@ -0,0 +1,28 @@
+{
+    "id": "0sm4g8ceddrgc0ct9hssk54y6m",
+    "model": "mark3labs/embeddings-gte-base",
+    "version": "d619cff29338b9a37c3d06605042e1ff0594a8c3eff0175fd6967f5643fc4d47",
+    "input": {
+        "text": "The food was delicious and the waiter..."
+    },
+    "logs": "",
+    "output": {
+        "text": "The food was delicious and the waiter...",
+        "vectors": [-0.03071732632815838, -0.024373497813940048, 0.03353334963321686, 0.002168054925277829, 0.06559721380472183, -0.009457750245928764, 0.011472711339592934, 0.028100572526454926, -0.017572680488228798, -0.04897604510188103, -0.02974911965429783, 0.017224477604031563, -0.05097205936908722, 0.0567566379904747, -0.002953317714855075, 0.06358537822961807, 0.030977727845311165, 0.01616182178258896, 0.012071298435330391, -0.0005556714604608715]
+    },
+    "data_removed": false,
+    "error": null,
+    "source": "api",
+    "status": "succeeded",
+    "created_at": "2025-11-04T01:20:41.639Z",
+    "started_at": "2025-11-04T01:20:42.100Z",
+    "completed_at": "2025-11-04T01:20:42.500Z",
+    "urls": {
+        "cancel": "https://api.replicate.com/v1/predictions/0sm4g8ceddrgc0ct9hssk54y6m/cancel",
+        "get": "https://api.replicate.com/v1/predictions/0sm4g8ceddrgc0ct9hssk54y6m"
+    },
+    "metrics": {
+        "predict_time": 0.340934,
+        "total_time": 0.400000
+    }
+}
diff --git a/tests/Fixtures/replicate/generate-image-basic-1.json b/tests/Fixtures/replicate/generate-image-basic-1.json
new file mode 100644
index 000000000..1970fe1fa
--- /dev/null
+++ b/tests/Fixtures/replicate/generate-image-basic-1.json
@@ -0,0 +1,21 @@
+{
+    "id": "8mver00185rme0ct9gt80xrw5m",
+    "model": "black-forest-labs\/flux-schnell",
+    "version": "hidden",
+    "input": {
+        "prompt": "A cute baby sea otter floating on its back in calm blue water"
+    },
+    "logs": "",
+    "output": null,
+    "data_removed": false,
+    "error": null,
+    "source": "api",
+    "status": "starting",
+    "created_at": "2025-11-04T00:11:07.969Z",
+    "urls": {
+        "cancel": "https:\/\/api.replicate.com\/v1\/predictions\/8mver00185rme0ct9gt80xrw5m\/cancel",
+        "get": "https:\/\/api.replicate.com\/v1\/predictions\/8mver00185rme0ct9gt80xrw5m",
+        "stream": "https:\/\/stream.replicate.com\/v1\/files\/bcwr-zkfgmidksx3jiedqz7ceaez2sf4lqlhh3yyjtnwcmdlpxislvsxq",
+        "web": "https:\/\/replicate.com\/p\/8mver00185rme0ct9gt80xrw5m"
+    }
+}
\ No newline at end of file
diff --git a/tests/Fixtures/replicate/generate-image-basic-2.json b/tests/Fixtures/replicate/generate-image-basic-2.json
new file mode 100644
index 000000000..d15104b5e
--- /dev/null
+++ b/tests/Fixtures/replicate/generate-image-basic-2.json
@@ -0,0 +1,30 @@
+{
+    "id": "8mver00185rme0ct9gt80xrw5m",
+    "model": "black-forest-labs\/flux-schnell",
+    "version": "hidden",
+    "input": {
+        "prompt": "A cute baby sea otter floating on its back in calm blue water"
+    },
+    "logs": "running quantized prediction\nUsing seed: 1371263524\n  0%|          | 0\/4 [00:00<?, ?it\/s]\n 75%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258c  | 3\/4 [00:00<00:00, 18.26it\/s]\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 4\/4 [00:00<00:00, 16.20it\/s]\nTotal safe images: 1 out of 1\n",
+    "output": [
+        "https:\/\/replicate.delivery\/xezq\/rIFyuseoP81zRKZZh7fO6AAwnDq1h5k2TOyOmvkKIye40eWWB\/out-0.webp"
+    ],
+    "data_removed": false,
+    "error": null,
+    "source": "api",
+    "status": "succeeded",
+    "created_at": "2025-11-04T00:11:07.969Z",
+    "started_at": "2025-11-04T00:11:07.980101003Z",
+    "completed_at": "2025-11-04T00:11:08.513448409Z",
+    "urls": {
+        "cancel": "https:\/\/api.replicate.com\/v1\/predictions\/8mver00185rme0ct9gt80xrw5m\/cancel",
+        "get": "https:\/\/api.replicate.com\/v1\/predictions\/8mver00185rme0ct9gt80xrw5m",
+        "stream": "https:\/\/stream.replicate.com\/v1\/files\/bcwr-zkfgmidksx3jiedqz7ceaez2sf4lqlhh3yyjtnwcmdlpxislvsxq",
+        "web": "https:\/\/replicate.com\/p\/8mver00185rme0ct9gt80xrw5m"
+    },
+    "metrics": {
+        "image_count": 1,
+        "predict_time": 0.5333474,
+        "total_time": 0.544448409
+    }
+}
\ No newline at end of file
diff --git a/tests/Fixtures/replicate/generate-image-with-options-1.json b/tests/Fixtures/replicate/generate-image-with-options-1.json
new file mode 100644
index 000000000..c2e359325
--- /dev/null
+++ b/tests/Fixtures/replicate/generate-image-with-options-1.json
@@ -0,0 +1,23 @@
+{
+    "id": "d6bptc8af1rme0ct9gtbw7a320",
+    "model": "black-forest-labs\/flux-schnell",
+    "version": "hidden",
+    "input": {
+        "aspect_ratio": "16:9",
+        "output_format": "png",
+        "prompt": "A mountain landscape at sunset with dramatic clouds"
+    },
+    "logs": "",
+    "output": null,
+    "data_removed": false,
+    "error": null,
+    "source": "api",
+    "status": "starting",
+    "created_at": "2025-11-04T00:11:10.328Z",
+    "urls": {
+        "cancel": "https:\/\/api.replicate.com\/v1\/predictions\/d6bptc8af1rme0ct9gtbw7a320\/cancel",
+        "get": "https:\/\/api.replicate.com\/v1\/predictions\/d6bptc8af1rme0ct9gtbw7a320",
+        "stream": "https:\/\/stream.replicate.com\/v1\/files\/bcwr-4zb5fc3qi3by7v3c224ovveunnfcze6tflelqhnnflvotgs3rysq",
+        "web": "https:\/\/replicate.com\/p\/d6bptc8af1rme0ct9gtbw7a320"
+    }
+}
\ No newline at end of file
diff --git a/tests/Fixtures/replicate/generate-image-with-options-2.json b/tests/Fixtures/replicate/generate-image-with-options-2.json
new file mode 100644
index 000000000..791f1fbbd
--- /dev/null
+++ b/tests/Fixtures/replicate/generate-image-with-options-2.json
@@ -0,0 +1,32 @@
+{
+    "id": "d6bptc8af1rme0ct9gtbw7a320",
+    "model": "black-forest-labs\/flux-schnell",
+    "version": "hidden",
+    "input": {
+        "aspect_ratio": "16:9",
+        "output_format": "png",
+        "prompt": "A mountain landscape at sunset with dramatic clouds"
+    },
+    "logs": "running quantized prediction\nUsing seed: 894245188\n  0%|          | 0\/4 [00:00<?, ?it\/s]\n 75%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258c  | 3\/4 [00:00<00:00, 18.01it\/s]\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 4\/4 [00:00<00:00, 15.93it\/s]\nTotal safe images: 1 out of 1\n",
+    "output": [
+        "https:\/\/replicate.delivery\/xezq\/rEeiG3mnBNX3Q6MnVh0DG1sgsmFqAbCoA7JyR6dqdGxPt3yKA\/out-0.png"
+    ],
+    "data_removed": false,
+    "error": null,
+    "source": "api",
+    "status": "succeeded",
+    "created_at": "2025-11-04T00:11:10.328Z",
+    "started_at": "2025-11-04T00:11:10.335905457Z",
+    "completed_at": "2025-11-04T00:11:11.108274035Z",
+    "urls": {
+        "cancel": "https:\/\/api.replicate.com\/v1\/predictions\/d6bptc8af1rme0ct9gtbw7a320\/cancel",
+        "get": "https:\/\/api.replicate.com\/v1\/predictions\/d6bptc8af1rme0ct9gtbw7a320",
+        "stream": "https:\/\/stream.replicate.com\/v1\/files\/bcwr-4zb5fc3qi3by7v3c224ovveunnfcze6tflelqhnnflvotgs3rysq",
+        "web": "https:\/\/replicate.com\/p\/d6bptc8af1rme0ct9gtbw7a320"
+    },
+    "metrics": {
+        "image_count": 1,
+        "predict_time": 0.772368585,
+        "total_time": 0.780274035
+    }
+}
\ No newline at end of file
diff --git a/tests/Fixtures/replicate/generate-text-with-a-prompt-1.json b/tests/Fixtures/replicate/generate-text-with-a-prompt-1.json
new file mode 100644
index 000000000..76247d97a
--- /dev/null
+++ b/tests/Fixtures/replicate/generate-text-with-a-prompt-1.json
@@ -0,0 +1,21 @@
+{
+  "id": "5f4g1h9e3nrj00ct9fjbxs9wz8",
+  "model": "meta/meta-llama-3-70b-instruct",
+  "version": "hidden",
+  "input": {
+    "max_tokens": 100,
+    "prompt": "Hello, world!"
+  },
+  "logs": "",
+  "output": null,
+  "data_removed": false,
+  "error": null,
+  "source": "api",
+  "status": "starting",
+  "created_at": "2025-11-03T22:43:56.573Z",
+  "urls": {
+    "cancel": "https://api.replicate.com/v1/predictions/5f4g1h9e3nrj00ct9fjbxs9wz8/cancel",
+    "get": "https://api.replicate.com/v1/predictions/5f4g1h9e3nrj00ct9fjbxs9wz8",
+    "web": "https://replicate.com/p/5f4g1h9e3nrj00ct9fjbxs9wz8"
+  }
+}
diff --git a/tests/Fixtures/replicate/generate-text-with-a-prompt-2.json b/tests/Fixtures/replicate/generate-text-with-a-prompt-2.json
new file mode 100644
index 000000000..8358a7963
--- /dev/null
+++ b/tests/Fixtures/replicate/generate-text-with-a-prompt-2.json
@@ -0,0 +1,77 @@
+{
+  "id": "5f4g1h9e3nrj00ct9fjbxs9wz8",
+  "model": "meta/meta-llama-3-70b-instruct",
+  "version": "hidden",
+  "input": {
+    "max_tokens": 100,
+    "prompt": "Hello, world!"
+  },
+  "logs": "Random seed used: `48082`\nNote: Random seed will not impact output if greedy decoding is used.\nFormatted prompt: `<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nHello, world!<|eot_id|><|start_header_id|>assistant<|end_header_id|>`Random seed used: `48082`\nNote: Random seed will not impact output if greedy decoding is used.\nFormatted prompt: `<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nHello, world!<|eot_id|><|start_header_id|>assistant<|end_header_id|>`",
+  "output": [
+    "\n\n",
+    "Hello",
+    " there",
+    "!",
+    " It",
+    "'s",
+    " wonderful",
+    " to",
+    " meet",
+    " you",
+    "!",
+    " Is",
+    " there",
+    " something",
+    " I",
+    " can",
+    " help",
+    " you",
+    " with",
+    " or",
+    " would",
+    " you",
+    " like",
+    " to",
+    " chat",
+    " about",
+    " something",
+    " in",
+    " particular",
+    "?",
+    " I",
+    "'m",
+    " all",
+    " ears",
+    " (",
+    "or",
+    " in",
+    " this",
+    " case",
+    ",",
+    " all",
+    " text",
+    ")!"
+  ],
+  "data_removed": false,
+  "error": null,
+  "source": "api",
+  "status": "succeeded",
+  "created_at": "2025-11-03T22:43:56.573Z",
+  "started_at": "2025-11-03T22:43:56.578750586Z",
+  "completed_at": "2025-11-03T22:43:57.402950866Z",
+  "urls": {
+    "cancel": "https://api.replicate.com/v1/predictions/5f4g1h9e3nrj00ct9fjbxs9wz8/cancel",
+    "get": "https://api.replicate.com/v1/predictions/5f4g1h9e3nrj00ct9fjbxs9wz8",
+    "web": "https://replicate.com/p/5f4g1h9e3nrj00ct9fjbxs9wz8"
+  },
+  "metrics": {
+    "batch_size": 3.0027687333103597,
+    "input_token_count": 24,
+    "output_token_count": 44,
+    "predict_time": 0.824200279,
+    "predict_time_share": 0.27398014068603516,
+    "time_to_first_token": 0.038646233,
+    "tokens_per_second": 56.0114230510882,
+    "total_time": 0.829950866
+  }
+}
diff --git a/tests/Fixtures/replicate/generate-text-with-system-prompt-1.json b/tests/Fixtures/replicate/generate-text-with-system-prompt-1.json
new file mode 100644
index 000000000..76247d97a
--- /dev/null
+++ b/tests/Fixtures/replicate/generate-text-with-system-prompt-1.json
@@ -0,0 +1,21 @@
+{
+  "id": "5f4g1h9e3nrj00ct9fjbxs9wz8",
+  "model": "meta/meta-llama-3-70b-instruct",
+  "version": "hidden",
+  "input": {
+    "max_tokens": 100,
+    "prompt": "Hello, world!"
+  },
+  "logs": "",
+  "output": null,
+  "data_removed": false,
+  "error": null,
+  "source": "api",
+  "status": "starting",
+  "created_at": "2025-11-03T22:43:56.573Z",
+  "urls": {
+    "cancel": "https://api.replicate.com/v1/predictions/5f4g1h9e3nrj00ct9fjbxs9wz8/cancel",
+    "get": "https://api.replicate.com/v1/predictions/5f4g1h9e3nrj00ct9fjbxs9wz8",
+    "web": "https://replicate.com/p/5f4g1h9e3nrj00ct9fjbxs9wz8"
+  }
+}
diff --git a/tests/Fixtures/replicate/generate-text-with-system-prompt-2.json b/tests/Fixtures/replicate/generate-text-with-system-prompt-2.json
new file mode 100644
index 000000000..8358a7963
--- /dev/null
+++ b/tests/Fixtures/replicate/generate-text-with-system-prompt-2.json
@@ -0,0 +1,77 @@
+{
+  "id": "5f4g1h9e3nrj00ct9fjbxs9wz8",
+  "model": "meta/meta-llama-3-70b-instruct",
+  "version": "hidden",
+  "input": {
+    "max_tokens": 100,
+    "prompt": "Hello, world!"
+  },
+  "logs": "Random seed used: `48082`\nNote: Random seed will not impact output if greedy decoding is used.\nFormatted prompt: `<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nHello, world!<|eot_id|><|start_header_id|>assistant<|end_header_id|>`Random seed used: `48082`\nNote: Random seed will not impact output if greedy decoding is used.\nFormatted prompt: `<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nHello, world!<|eot_id|><|start_header_id|>assistant<|end_header_id|>`",
+  "output": [
+    "\n\n",
+    "Hello",
+    " there",
+    "!",
+    " It",
+    "'s",
+    " wonderful",
+    " to",
+    " meet",
+    " you",
+    "!",
+    " Is",
+    " there",
+    " something",
+    " I",
+    " can",
+    " help",
+    " you",
+    " with",
+    " or",
+    " would",
+    " you",
+    " like",
+    " to",
+    " chat",
+    " about",
+    " something",
+    " in",
+    " particular",
+    "?",
+    " I",
+    "'m",
+    " all",
+    " ears",
+    " (",
+    "or",
+    " in",
+    " this",
+    " case",
+    ",",
+    " all",
+    " text",
+    ")!"
+  ],
+  "data_removed": false,
+  "error": null,
+  "source": "api",
+  "status": "succeeded",
+  "created_at": "2025-11-03T22:43:56.573Z",
+  "started_at": "2025-11-03T22:43:56.578750586Z",
+  "completed_at": "2025-11-03T22:43:57.402950866Z",
+  "urls": {
+    "cancel": "https://api.replicate.com/v1/predictions/5f4g1h9e3nrj00ct9fjbxs9wz8/cancel",
+    "get": "https://api.replicate.com/v1/predictions/5f4g1h9e3nrj00ct9fjbxs9wz8",
+    "web": "https://replicate.com/p/5f4g1h9e3nrj00ct9fjbxs9wz8"
+  },
+  "metrics": {
+    "batch_size": 3.0027687333103597,
+    "input_token_count": 24,
+    "output_token_count": 44,
+    "predict_time": 0.824200279,
+    "predict_time_share": 0.27398014068603516,
+    "time_to_first_token": 0.038646233,
+    "tokens_per_second": 56.0114230510882,
+    "total_time": 0.829950866
+  }
+}
diff --git a/tests/Fixtures/replicate/generate-text-with-version-1.json b/tests/Fixtures/replicate/generate-text-with-version-1.json
new file mode 100644
index 000000000..76247d97a
--- /dev/null
+++ b/tests/Fixtures/replicate/generate-text-with-version-1.json
@@ -0,0 +1,21 @@
+{
+  "id": "5f4g1h9e3nrj00ct9fjbxs9wz8",
+  "model": "meta/meta-llama-3-70b-instruct",
+  "version": "hidden",
+  "input": {
+    "max_tokens": 100,
+    "prompt": "Hello, world!"
+  },
+  "logs": "",
+  "output": null,
+  "data_removed": false,
+  "error": null,
+  "source": "api",
+  "status": "starting",
+  "created_at": "2025-11-03T22:43:56.573Z",
+  "urls": {
+    "cancel": "https://api.replicate.com/v1/predictions/5f4g1h9e3nrj00ct9fjbxs9wz8/cancel",
+    "get": "https://api.replicate.com/v1/predictions/5f4g1h9e3nrj00ct9fjbxs9wz8",
+    "web": "https://replicate.com/p/5f4g1h9e3nrj00ct9fjbxs9wz8"
+  }
+}
diff --git a/tests/Fixtures/replicate/generate-text-with-version-2.json b/tests/Fixtures/replicate/generate-text-with-version-2.json
new file mode 100644
index 000000000..8358a7963
--- /dev/null
+++ b/tests/Fixtures/replicate/generate-text-with-version-2.json
@@ -0,0 +1,77 @@
+{
+  "id": "5f4g1h9e3nrj00ct9fjbxs9wz8",
+  "model": "meta/meta-llama-3-70b-instruct",
+  "version": "hidden",
+  "input": {
+    "max_tokens": 100,
+    "prompt": "Hello, world!"
+  },
+  "logs": "Random seed used: `48082`\nNote: Random seed will not impact output if greedy decoding is used.\nFormatted prompt: `<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nHello, world!<|eot_id|><|start_header_id|>assistant<|end_header_id|>`Random seed used: `48082`\nNote: Random seed will not impact output if greedy decoding is used.\nFormatted prompt: `<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nHello, world!<|eot_id|><|start_header_id|>assistant<|end_header_id|>`",
+  "output": [
+    "\n\n",
+    "Hello",
+    " there",
+    "!",
+    " It",
+    "'s",
+    " wonderful",
+    " to",
+    " meet",
+    " you",
+    "!",
+    " Is",
+    " there",
+    " something",
+    " I",
+    " can",
+    " help",
+    " you",
+    " with",
+    " or",
+    " would",
+    " you",
+    " like",
+    " to",
+    " chat",
+    " about",
+    " something",
+    " in",
+    " particular",
+    "?",
+    " I",
+    "'m",
+    " all",
+    " ears",
+    " (",
+    "or",
+    " in",
+    " this",
+    " case",
+    ",",
+    " all",
+    " text",
+    ")!"
+  ],
+  "data_removed": false,
+  "error": null,
+  "source": "api",
+  "status": "succeeded",
+  "created_at": "2025-11-03T22:43:56.573Z",
+  "started_at": "2025-11-03T22:43:56.578750586Z",
+  "completed_at": "2025-11-03T22:43:57.402950866Z",
+  "urls": {
+    "cancel": "https://api.replicate.com/v1/predictions/5f4g1h9e3nrj00ct9fjbxs9wz8/cancel",
+    "get": "https://api.replicate.com/v1/predictions/5f4g1h9e3nrj00ct9fjbxs9wz8",
+    "web": "https://replicate.com/p/5f4g1h9e3nrj00ct9fjbxs9wz8"
+  },
+  "metrics": {
+    "batch_size": 3.0027687333103597,
+    "input_token_count": 24,
+    "output_token_count": 44,
+    "predict_time": 0.824200279,
+    "predict_time_share": 0.27398014068603516,
+    "time_to_first_token": 0.038646233,
+    "tokens_per_second": 56.0114230510882,
+    "total_time": 0.829950866
+  }
+}
diff --git a/tests/Fixtures/replicate/speech-to-text-mp3-1.json b/tests/Fixtures/replicate/speech-to-text-mp3-1.json
new file mode 100644
index 000000000..5a08d9b6f
--- /dev/null
+++ b/tests/Fixtures/replicate/speech-to-text-mp3-1.json
@@ -0,0 +1,21 @@
+{
+    "id": "4f1q2sdbwsrm80ct9fsv9k978c",
+    "model": "vaibhavs10\/incredibly-fast-whisper",
+    "version": "3ab86df6c8f54c11309d4d1f930ac292bad43ace52d10c80d87eb258b3c9f79c",
+    "input": {
+        "audio": "data:audio\/mpeg;base64,...",
+        "task": "transcribe"
+    },
+    "logs": "",
+    "output": null,
+    "data_removed": false,
+    "error": null,
+    "source": "api",
+    "status": "starting",
+    "created_at": "2025-11-03T23:00:51.814Z",
+    "urls": {
+        "cancel": "https:\/\/api.replicate.com\/v1\/predictions\/4f1q2sdbwsrm80ct9fsv9k978c\/cancel",
+        "get": "https:\/\/api.replicate.com\/v1\/predictions\/4f1q2sdbwsrm80ct9fsv9k978c",
+        "web": "https:\/\/replicate.com\/p\/4f1q2sdbwsrm80ct9fsv9k978c"
+    }
+}
\ No newline at end of file
diff --git a/tests/Fixtures/replicate/speech-to-text-mp3-2.json b/tests/Fixtures/replicate/speech-to-text-mp3-2.json
new file mode 100644
index 000000000..36fb66622
--- /dev/null
+++ b/tests/Fixtures/replicate/speech-to-text-mp3-2.json
@@ -0,0 +1,535 @@
+{
+    "id": "4f1q2sdbwsrm80ct9fsv9k978c",
+    "model": "vaibhavs10\/incredibly-fast-whisper",
+    "version": "3ab86df6c8f54c11309d4d1f930ac292bad43ace52d10c80d87eb258b3c9f79c",
+    "input": {
+        "audio": "data:audio\/mpeg;base64,...",
+        "task": "transcribe"
+    },
+    "logs": "Voila!\u2728 Your file has been transcribed!\n",
+    "output": {
+        "chunks": [
+            {
+                "text": " know by by that conference um so we'll uh i think that's a good excuse to be on the newsletter too",
+                "timestamp": [
+                    0,
+                    8.38
+                ]
+            },
+            {
+                "text": " is like yeah find out about 1.0 like as soon as possible yep totally yeah i like that idea then",
+                "timestamp": [
+                    8.38,
+                    15.24
+                ]
+            },
+            {
+                "text": " that gets you not as much pressure for taking 1.0 before you go to laracon it gets an added",
+                "timestamp": [
+                    15.24,
+                    23.88
+                ]
+            },
+            {
+                "text": " benefit of getting people on the newsletter",
+                "timestamp": [
+                    23.88,
+                    25.88
+                ]
+            },
+            {
+                "text": " and a little bit more exposure in there and then you know in the meantime before you can i don't",
+                "timestamp": [
+                    25.88,
+                    32.16
+                ]
+            },
+            {
+                "text": " know maybe start drafting up newsletter launch email number one yeah get ready yeah i might",
+                "timestamp": [
+                    32.16,
+                    40.42
+                ]
+            },
+            {
+                "text": " right now i'm using button down email for everything may consider switching over to Bento just because Aaron loves it.",
+                "timestamp": [
+                    40.42,
+                    49.6
+                ]
+            },
+            {
+                "text": " And I like using things that people recommend.",
+                "timestamp": [
+                    49.76,
+                    52.46
+                ]
+            },
+            {
+                "text": " So yeah, I might, I might switch providers right before I pull the trigger on it.",
+                "timestamp": [
+                    52.7,
+                    57.78
+                ]
+            },
+            {
+                "text": " So that's, that's something I'm gonna have to look at and maybe I'll, I'll work on, on the plane or something.",
+                "timestamp": [
+                    57.82,
+                    62.4
+                ]
+            },
+            {
+                "text": " I'm on my way out to Denver.",
+                "timestamp": [
+                    63.14,
+                    64.18
+                ]
+            },
+            {
+                "text": " I don't know. It's just getting close, man. I'm on my way out to Denver. I don't know.",
+                "timestamp": [
+                    64.48,
+                    65.02
+                ]
+            },
+            {
+                "text": " It's just getting close, man.",
+                "timestamp": [
+                    65.14,
+                    66.18
+                ]
+            },
+            {
+                "text": " I leave on Sunday.",
+                "timestamp": [
+                    66.2,
+                    66.96
+                ]
+            },
+            {
+                "text": " It's Thursday.",
+                "timestamp": [
+                    67.54,
+                    68.38
+                ]
+            },
+            {
+                "text": " So this is all coming up really quick and there's a lot I want to do.",
+                "timestamp": [
+                    68.7,
+                    72.68
+                ]
+            },
+            {
+                "text": " So maybe I don't sleep tonight.",
+                "timestamp": [
+                    72.88,
+                    75.02
+                ]
+            },
+            {
+                "text": " Yeah.",
+                "timestamp": [
+                    76.06,
+                    76.16
+                ]
+            },
+            {
+                "text": " But even if you just like captured email addresses, like you can always export them, import them into the new place anyways.",
+                "timestamp": [
+                    76.22,
+                    82.24
+                ]
+            },
+            {
+                "text": " Yep.",
+                "timestamp": [
+                    83.24,
+                    83.44
+                ]
+            },
+            {
+                "text": " Yeah.",
+                "timestamp": [
+                    83.82,
+                    84.06
+                ]
+            },
+            {
+                "text": " A hundred percent. So cool, man. Moving on from newsletters. port them into the new place anyways. Yep. Yeah, 100%.",
+                "timestamp": [
+                    84.16,
+                    85.18
+                ]
+            },
+            {
+                "text": " So cool, man.",
+                "timestamp": [
+                    85.18,
+                    85.82
+                ]
+            },
+            {
+                "text": " Moving on from newsletters, I think you'd added something here about cloud custom commands.",
+                "timestamp": [
+                    86.02,
+                    91.8
+                ]
+            },
+            {
+                "text": " And that's something that we talked about, I think, last week a little bit.",
+                "timestamp": [
+                    91.92,
+                    94.64
+                ]
+            },
+            {
+                "text": " And we may be starting to find some excuses for using custom slash commands and stuff.",
+                "timestamp": [
+                    95.04,
+                    100.92
+                ]
+            },
+            {
+                "text": " So I'd love to hear about your experience here.",
+                "timestamp": [
+                    100.92,
+                    103.98
+                ]
+            },
+            {
+                "text": " Yeah, definitely definitely so this was",
+                "timestamp": [
+                    104.72,
+                    106.66
+                ]
+            },
+            {
+                "text": " definitely prompted from last week's conversation and as i'm working through the week i was like",
+                "timestamp": [
+                    106.66,
+                    112.6
+                ]
+            },
+            {
+                "text": " okay what can i possibly make a custom command for so i basically started off with um two things",
+                "timestamp": [
+                    112.6,
+                    120.74
+                ]
+            },
+            {
+                "text": " i created like a refresh repo command which which basically like you run it, it scans,",
+                "timestamp": [
+                    120.74,
+                    128.3
+                ]
+            },
+            {
+                "text": " it basically like re-initiates the, the cloud MD file. It reads what's there. It looks at the",
+                "timestamp": [
+                    128.3,
+                    137.82
+                ]
+            },
+            {
+                "text": " application, sees any new like dependencies or commands or whatever the case is and updates anything that it deems important to add back to",
+                "timestamp": [
+                    137.82,
+                    148.12
+                ]
+            },
+            {
+                "text": " that markdown file and it's just nice if you have like a repo that has a lot of development a lot",
+                "timestamp": [
+                    148.12,
+                    153.98
+                ]
+            },
+            {
+                "text": " of people working in it and you don't always want to capture every single new like merge or rebase",
+                "timestamp": [
+                    153.98,
+                    161.5
+                ]
+            },
+            {
+                "text": " like every other day to it so maybe your brew updates running that like the",
+                "timestamp": [
+                    161.5,
+                    167.16
+                ]
+            },
+            {
+                "text": " repo refresh like once a week or something like that would be good because then you don't have",
+                "timestamp": [
+                    167.16,
+                    171.6
+                ]
+            },
+            {
+                "text": " to type out like oh rescan the repo and do this and that and the other thing like now this will",
+                "timestamp": [
+                    171.6,
+                    176.66
+                ]
+            },
+            {
+                "text": " just take care of it so that was i think the first one that i made so that one's been really helpful",
+                "timestamp": [
+                    176.66,
+                    182.12
+                ]
+            },
+            {
+                "text": " refreshing everything because the first time i ran it, it picked up like,",
+                "timestamp": [
+                    182.12,
+                    186.16
+                ]
+            },
+            {
+                "text": " I think the last time I refreshed it was,",
+                "timestamp": [
+                    186.16,
+                    188.68
+                ]
+            },
+            {
+                "text": " I don't know, three, four weeks ago.",
+                "timestamp": [
+                    188.68,
+                    190.36
+                ]
+            },
+            {
+                "text": " So it found a whole bunch more stuff.",
+                "timestamp": [
+                    190.36,
+                    191.98
+                ]
+            },
+            {
+                "text": " It updated architecture changes, a whole bunch of things to it.",
+                "timestamp": [
+                    191.98,
+                    197.02
+                ]
+            },
+            {
+                "text": " So that's been really helpful.",
+                "timestamp": [
+                    197.02,
+                    198.3
+                ]
+            },
+            {
+                "text": " Probably going to run that on a weekly basis",
+                "timestamp": [
+                    198.3,
+                    200.98
+                ]
+            },
+            {
+                "text": " now on all the repos that we're working on. The other one is what I just named like a branch context is like,",
+                "timestamp": [
+                    200.98,
+                    210.64
+                ]
+            },
+            {
+                "text": " I'm either switching to a branch that I'm working on or pulling down for a",
+                "timestamp": [
+                    210.68,
+                    215.44
+                ]
+            },
+            {
+                "text": " review or something like that of like the repo refreshes for main branch.",
+                "timestamp": [
+                    215.44,
+                    221.02
+                ]
+            },
+            {
+                "text": " And then when I'm going into something else like scan for changes,",
+                "timestamp": [
+                    221.02,
+                    224.26
+                ]
+            },
+            {
+                "text": " tell me what's in the last handful of commits and kind of refresh your understanding of what has changed in",
+                "timestamp": [
+                    224.48,
+                    230.46
+                ]
+            },
+            {
+                "text": " the repo in here. And then we can either work towards something else together, or I'm asking",
+                "timestamp": [
+                    230.46,
+                    236.46
+                ]
+            },
+            {
+                "text": " you for why did this change or why did that change or whatever the case is. It's just refreshing the",
+                "timestamp": [
+                    236.46,
+                    242.02
+                ]
+            },
+            {
+                "text": " immediate context for that branch. So that's been helpful as well.",
+                "timestamp": [
+                    242.02,
+                    246.06
+                ]
+            },
+            {
+                "text": " That's sick.",
+                "timestamp": [
+                    246.42,
+                    246.92
+                ]
+            },
+            {
+                "text": " Yeah.",
+                "timestamp": [
+                    247.66,
+                    247.86
+                ]
+            },
+            {
+                "text": " So I guess I'll pause there because then I have another whole workflow.",
+                "timestamp": [
+                    248.26,
+                    251.26
+                ]
+            },
+            {
+                "text": " So any questions on either one of those or any specific details?",
+                "timestamp": [
+                    251.26,
+                    257
+                ]
+            },
+            {
+                "text": " No, man.",
+                "timestamp": [
+                    257.54,
+                    257.92
+                ]
+            },
+            {
+                "text": " I think that sounds great.",
+                "timestamp": [
+                    258.06,
+                    258.88
+                ]
+            },
+            {
+                "text": " Those are very useful things to have as slash commands.",
+                "timestamp": [
+                    259.08,
+                    261.44
+                ]
+            },
+            {
+                "text": " things to have is slash commands yeah yeah that's it's i'm still i'm still personally finding like my own use cases for them i haven't i haven't dove too far into it yet",
+                "timestamp": [
+                    267.26,
+                    272.98
+                ]
+            },
+            {
+                "text": " but maybe after lara con i'll i'll spend a little time you know coming up with some stuff but i i",
+                "timestamp": [
+                    272.98,
+                    280.02
+                ]
+            },
+            {
+                "text": " think those are like i think it serves like some decent inspiration for some some other slash commands i'm thinking up right now so yeah what else you got um yeah so the next one which i do a",
+                "timestamp": [
+                    280.02,
+                    292.46
+                ]
+            },
+            {
+                "text": " lot is pr reviews for others and coming into like a new tech stack like there's some things i am just",
+                "timestamp": [
+                    292.46,
+                    298.56
+                ]
+            },
+            {
+                "text": " not aware about or like best practices or whatever so there's a default slash review command that comes out of the box in cloud",
+                "timestamp": [
+                    298.56,
+                    308.32
+                ]
+            },
+            {
+                "text": " code.",
+                "timestamp": [
+                    308.32,
+                    308.66
+                ]
+            },
+            {
+                "text": " And that gives a decent understanding.",
+                "timestamp": [
+                    308.8,
+                    312.24
+                ]
+            },
+            {
+                "text": " Like it'll tell you,",
+                "timestamp": [
+                    312.38,
+                    313.36
+                ]
+            },
+            {
+                "text": " you know,",
+                "timestamp": [
+                    313.42,
+                    313.96
+                ]
+            },
+            {
+                "text": " what's changed,",
+                "timestamp": [
+                    313.98,
+                    314.82
+                ]
+            },
+            {
+                "text": " what maybe some of the,",
+                "timestamp": [
+                    314.96,
+                    316.58
+                ]
+            }
+        ],
+        "text": " know by by that conference um so we'll uh i think that's a good excuse to be on the newsletter too is like yeah find out about 1.0 like as soon as possible yep totally yeah i like that idea then that gets you not as much pressure for taking 1.0 before you go to laracon it gets an added benefit of getting people on the newsletter and a little bit more exposure in there and then you know in the meantime before you can i don't know maybe start drafting up newsletter launch email number one yeah get ready yeah i might right now i'm using button down email for everything may consider switching over to Bento just because Aaron loves it. And I like using things that people recommend. So yeah, I might, I might switch providers right before I pull the trigger on it. So that's, that's something I'm gonna have to look at and maybe I'll, I'll work on, on the plane or something. I'm on my way out to Denver. I don't know. It's just getting close, man. I'm on my way out to Denver. I don't know. It's just getting close, man. I leave on Sunday. It's Thursday. So this is all coming up really quick and there's a lot I want to do. So maybe I don't sleep tonight. Yeah. But even if you just like captured email addresses, like you can always export them, import them into the new place anyways. Yep. Yeah. A hundred percent. So cool, man. Moving on from newsletters. port them into the new place anyways. Yep. Yeah, 100%. So cool, man. Moving on from newsletters, I think you'd added something here about cloud custom commands. And that's something that we talked about, I think, last week a little bit. And we may be starting to find some excuses for using custom slash commands and stuff. So I'd love to hear about your experience here. Yeah, definitely definitely so this was definitely prompted from last week's conversation and as i'm working through the week i was like okay what can i possibly make a custom command for so i basically started off with um two things i created like a refresh repo command which which basically like you run it, it scans, it basically like re-initiates the, the cloud MD file. It reads what's there. It looks at the application, sees any new like dependencies or commands or whatever the case is and updates anything that it deems important to add back to that markdown file and it's just nice if you have like a repo that has a lot of development a lot of people working in it and you don't always want to capture every single new like merge or rebase like every other day to it so maybe your brew updates running that like the repo refresh like once a week or something like that would be good because then you don't have to type out like oh rescan the repo and do this and that and the other thing like now this will just take care of it so that was i think the first one that i made so that one's been really helpful refreshing everything because the first time i ran it, it picked up like, I think the last time I refreshed it was, I don't know, three, four weeks ago. So it found a whole bunch more stuff. It updated architecture changes, a whole bunch of things to it. So that's been really helpful. Probably going to run that on a weekly basis now on all the repos that we're working on. The other one is what I just named like a branch context is like, I'm either switching to a branch that I'm working on or pulling down for a review or something like that of like the repo refreshes for main branch. And then when I'm going into something else like scan for changes, tell me what's in the last handful of commits and kind of refresh your understanding of what has changed in the repo in here. And then we can either work towards something else together, or I'm asking you for why did this change or why did that change or whatever the case is. It's just refreshing the immediate context for that branch. So that's been helpful as well. That's sick. Yeah. So I guess I'll pause there because then I have another whole workflow. So any questions on either one of those or any specific details? No, man. I think that sounds great. Those are very useful things to have as slash commands. things to have is slash commands yeah yeah that's it's i'm still i'm still personally finding like my own use cases for them i haven't i haven't dove too far into it yet but maybe after lara con i'll i'll spend a little time you know coming up with some stuff but i i think those are like i think it serves like some decent inspiration for some some other slash commands i'm thinking up right now so yeah what else you got um yeah so the next one which i do a lot is pr reviews for others and coming into like a new tech stack like there's some things i am just not aware about or like best practices or whatever so there's a default slash review command that comes out of the box in cloud code. And that gives a decent understanding. Like it'll tell you, you know, what's changed, what maybe some of the,"
+    },
+    "data_removed": false,
+    "error": null,
+    "source": "api",
+    "status": "succeeded",
+    "created_at": "2025-11-03T23:00:51.814Z",
+    "started_at": "2025-11-03T23:00:52.189785656Z",
+    "completed_at": "2025-11-03T23:00:58.006454501Z",
+    "urls": {
+        "cancel": "https:\/\/api.replicate.com\/v1\/predictions\/4f1q2sdbwsrm80ct9fsv9k978c\/cancel",
+        "get": "https:\/\/api.replicate.com\/v1\/predictions\/4f1q2sdbwsrm80ct9fsv9k978c",
+        "web": "https:\/\/replicate.com\/p\/4f1q2sdbwsrm80ct9fsv9k978c"
+    },
+    "metrics": {
+        "predict_time": 5.816668844,
+        "total_time": 6.192454501
+    }
+}
\ No newline at end of file
diff --git a/tests/Fixtures/replicate/speech-to-text-wav-1.json b/tests/Fixtures/replicate/speech-to-text-wav-1.json
new file mode 100644
index 000000000..5c8078915
--- /dev/null
+++ b/tests/Fixtures/replicate/speech-to-text-wav-1.json
@@ -0,0 +1,21 @@
+{
+    "id": "w57accczhhrme0ct9fsvy71m5w",
+    "model": "vaibhavs10\/incredibly-fast-whisper",
+    "version": "3ab86df6c8f54c11309d4d1f930ac292bad43ace52d10c80d87eb258b3c9f79c",
+    "input": {
+        "audio": "data:audio\/x-wav;base64,...",
+        "task": "transcribe"
+    },
+    "logs": "",
+    "output": null,
+    "data_removed": false,
+    "error": null,
+    "source": "api",
+    "status": "starting",
+    "created_at": "2025-11-03T23:00:48.652Z",
+    "urls": {
+        "cancel": "https:\/\/api.replicate.com\/v1\/predictions\/w57accczhhrme0ct9fsvy71m5w\/cancel",
+        "get": "https:\/\/api.replicate.com\/v1\/predictions\/w57accczhhrme0ct9fsvy71m5w",
+        "web": "https:\/\/replicate.com\/p\/w57accczhhrme0ct9fsvy71m5w"
+    }
+}
\ No newline at end of file
diff --git a/tests/Fixtures/replicate/speech-to-text-wav-2.json b/tests/Fixtures/replicate/speech-to-text-wav-2.json
new file mode 100644
index 000000000..3e8be968b
--- /dev/null
+++ b/tests/Fixtures/replicate/speech-to-text-wav-2.json
@@ -0,0 +1,38 @@
+{
+    "id": "w57accczhhrme0ct9fsvy71m5w",
+    "model": "vaibhavs10\/incredibly-fast-whisper",
+    "version": "3ab86df6c8f54c11309d4d1f930ac292bad43ace52d10c80d87eb258b3c9f79c",
+    "input": {
+        "audio": "data:audio\/x-wav;base64,...",
+        "task": "transcribe"
+    },
+    "logs": "Voila!\u2728 Your file has been transcribed!\n",
+    "output": {
+        "chunks": [
+            {
+                "text": " Kids are talking by the door.",
+                "timestamp": [
+                    0,
+                    3.74
+                ]
+            }
+        ],
+        "text": " Kids are talking by the door."
+    },
+    "data_removed": false,
+    "error": null,
+    "source": "api",
+    "status": "succeeded",
+    "created_at": "2025-11-03T23:00:48.652Z",
+    "started_at": "2025-11-03T23:00:49.204781396Z",
+    "completed_at": "2025-11-03T23:00:49.528991479Z",
+    "urls": {
+        "cancel": "https:\/\/api.replicate.com\/v1\/predictions\/w57accczhhrme0ct9fsvy71m5w\/cancel",
+        "get": "https:\/\/api.replicate.com\/v1\/predictions\/w57accczhhrme0ct9fsvy71m5w",
+        "web": "https:\/\/replicate.com\/p\/w57accczhhrme0ct9fsvy71m5w"
+    },
+    "metrics": {
+        "predict_time": 0.324210084,
+        "total_time": 0.876991479
+    }
+}
\ No newline at end of file
diff --git a/tests/Fixtures/replicate/structured-json-mode-1.json b/tests/Fixtures/replicate/structured-json-mode-1.json
new file mode 100644
index 000000000..a512e1b97
--- /dev/null
+++ b/tests/Fixtures/replicate/structured-json-mode-1.json
@@ -0,0 +1,22 @@
+{
+    "id": "0gb2pjnmnhrme0ct9hca6dfr98",
+    "model": "meta\/meta-llama-3.1-405b-instruct",
+    "version": "hidden",
+    "input": {
+        "max_tokens": 500,
+        "prompt": "What time is the tigers game today and should I wear a coat? Respond ONLY with valid JSON that matches this schema: {\"weather\": \"string\", \"game_time\": \"string\", \"coat_required\": boolean}"
+    },
+    "logs": "",
+    "output": null,
+    "data_removed": false,
+    "error": null,
+    "source": "api",
+    "status": "starting",
+    "created_at": "2025-11-04T00:51:13.196Z",
+    "urls": {
+        "cancel": "https:\/\/api.replicate.com\/v1\/predictions\/0gb2pjnmnhrme0ct9hca6dfr98\/cancel",
+        "get": "https:\/\/api.replicate.com\/v1\/predictions\/0gb2pjnmnhrme0ct9hca6dfr98",
+        "stream": "https:\/\/stream-b.svc.ric1.c.replicate.net\/v1\/streams\/jn75y6rccu76zna77fpdoar7xq4fbrzp4xewqhpht5g2syshvj2q",
+        "web": "https:\/\/replicate.com\/p\/0gb2pjnmnhrme0ct9hca6dfr98"
+    }
+}
\ No newline at end of file
diff --git a/tests/Fixtures/replicate/structured-json-mode-2.json b/tests/Fixtures/replicate/structured-json-mode-2.json
new file mode 100644
index 000000000..a29c5ac59
--- /dev/null
+++ b/tests/Fixtures/replicate/structured-json-mode-2.json
@@ -0,0 +1,133 @@
+{
+    "id": "0gb2pjnmnhrme0ct9hca6dfr98",
+    "model": "meta\/meta-llama-3.1-405b-instruct",
+    "version": "hidden",
+    "input": {
+        "max_tokens": 500,
+        "prompt": "What time is the tigers game today and should I wear a coat? Respond ONLY with valid JSON that matches this schema: {\"weather\": \"string\", \"game_time\": \"string\", \"coat_required\": boolean}"
+    },
+    "logs": "",
+    "output": [
+        "Unfortunately",
+        ",",
+        " I",
+        " don",
+        "'t",
+        " have",
+        " real",
+        "-time",
+        " access",
+        " to",
+        " current",
+        " events",
+        " or",
+        " weather",
+        " conditions",
+        ".",
+        " However",
+        ",",
+        " I",
+        " can",
+        " provide",
+        " a",
+        " response",
+        " in",
+        " the",
+        " requested",
+        " JSON",
+        " format",
+        " with",
+        " some",
+        " placeholder",
+        " values",
+        ":\n\n",
+        "``",
+        "`\n",
+        "{\n",
+        " ",
+        " \"",
+        "weather",
+        "\":",
+        " \"",
+        "S",
+        "unny",
+        "\",\n",
+        " ",
+        " \"",
+        "game",
+        "_time",
+        "\":",
+        " \"",
+        "1",
+        ":",
+        "00",
+        " PM",
+        " EST",
+        "\",\n",
+        " ",
+        " \"",
+        "coat",
+        "_required",
+        "\":",
+        " false",
+        "\n",
+        "}\n",
+        "``",
+        "`\n\n",
+        "Please",
+        " note",
+        " that",
+        " this",
+        " response",
+        " is",
+        " fictional",
+        " and",
+        " not",
+        " based",
+        " on",
+        " actual",
+        " data",
+        ".",
+        " For",
+        " accurate",
+        " information",
+        ",",
+        " I",
+        " recommend",
+        " checking",
+        " the",
+        " official",
+        " website",
+        " of",
+        " the",
+        " Tigers",
+        " team",
+        " or",
+        " a",
+        " reliable",
+        " weather",
+        " service",
+        "."
+    ],
+    "data_removed": false,
+    "error": null,
+    "source": "api",
+    "status": "succeeded",
+    "created_at": "2025-11-04T00:51:13.196Z",
+    "started_at": "2025-11-04T00:51:13.203039516Z",
+    "completed_at": "2025-11-04T00:51:18.458222807Z",
+    "urls": {
+        "cancel": "https:\/\/api.replicate.com\/v1\/predictions\/0gb2pjnmnhrme0ct9hca6dfr98\/cancel",
+        "get": "https:\/\/api.replicate.com\/v1\/predictions\/0gb2pjnmnhrme0ct9hca6dfr98",
+        "stream": "https:\/\/stream-b.svc.ric1.c.replicate.net\/v1\/streams\/jn75y6rccu76zna77fpdoar7xq4fbrzp4xewqhpht5g2syshvj2q",
+        "web": "https:\/\/replicate.com\/p\/0gb2pjnmnhrme0ct9hca6dfr98"
+    },
+    "metrics": {
+        "input_token_count": 65,
+        "output_token_count": 101,
+        "predict_time": 5.255183285,
+        "time_to_first_token": 0.967300547,
+        "tokens_per_second": 23.55474861868762,
+        "total_time": 5.262222807
+    }
+}
\ No newline at end of file
diff --git a/tests/Fixtures/replicate/structured-simple-1.json b/tests/Fixtures/replicate/structured-simple-1.json
new file mode 100644
index 000000000..9d019d5a6
--- /dev/null
+++ b/tests/Fixtures/replicate/structured-simple-1.json
@@ -0,0 +1,22 @@
+{
+    "id": "5ydxqjpf9srma0ct9hcb4yatsm",
+    "model": "meta\/meta-llama-3.1-405b-instruct",
+    "version": "hidden",
+    "input": {
+        "max_tokens": 500,
+        "prompt": "Tell me about Albert Einstein. Respond ONLY with valid JSON that matches this schema: {\"name\": \"string\", \"role\": \"string\"}"
+    },
+    "logs": "",
+    "output": null,
+    "data_removed": false,
+    "error": null,
+    "source": "api",
+    "status": "starting",
+    "created_at": "2025-11-04T00:51:20.014Z",
+    "urls": {
+        "cancel": "https:\/\/api.replicate.com\/v1\/predictions\/5ydxqjpf9srma0ct9hcb4yatsm\/cancel",
+        "get": "https:\/\/api.replicate.com\/v1\/predictions\/5ydxqjpf9srma0ct9hcb4yatsm",
+        "stream": "https:\/\/stream-b.svc.ric1.c.replicate.net\/v1\/streams\/5zpbwnvwcs5rxxgbd6tz24on4jcc6hvzf77tpqjl6keqqmlvp2za",
+        "web": "https:\/\/replicate.com\/p\/5ydxqjpf9srma0ct9hcb4yatsm"
+    }
+}
\ No newline at end of file
diff --git a/tests/Fixtures/replicate/structured-simple-2.json b/tests/Fixtures/replicate/structured-simple-2.json
new file mode 100644
index 000000000..b5dc82a52
--- /dev/null
+++ b/tests/Fixtures/replicate/structured-simple-2.json
@@ -0,0 +1,50 @@
+{
+    "id": "5ydxqjpf9srma0ct9hcb4yatsm",
+    "model": "meta\/meta-llama-3.1-405b-instruct",
+    "version": "hidden",
+    "input": {
+        "max_tokens": 500,
+        "prompt": "Tell me about Albert Einstein. Respond ONLY with valid JSON that matches this schema: {\"name\": \"string\", \"role\": \"string\"}"
+    },
+    "logs": "",
+    "output": [
+        "{\"",
+        "name",
+        "\":",
+        " \"",
+        "Albert",
+        " Einstein",
+        "\",",
+        " \"",
+        "role",
+        "\":",
+        " \"",
+        "The",
+        "oretical",
+        " Phys",
+        "ic",
+        "ist",
+        "\"}"
+    ],
+    "data_removed": false,
+    "error": null,
+    "source": "api",
+    "status": "succeeded",
+    "created_at": "2025-11-04T00:51:20.014Z",
+    "started_at": "2025-11-04T00:51:20.036951769Z",
+    "completed_at": "2025-11-04T00:51:20.95123293Z",
+    "urls": {
+        "cancel": "https:\/\/api.replicate.com\/v1\/predictions\/5ydxqjpf9srma0ct9hcb4yatsm\/cancel",
+        "get": "https:\/\/api.replicate.com\/v1\/predictions\/5ydxqjpf9srma0ct9hcb4yatsm",
+        "stream": "https:\/\/stream-b.svc.ric1.c.replicate.net\/v1\/streams\/5zpbwnvwcs5rxxgbd6tz24on4jcc6hvzf77tpqjl6keqqmlvp2za",
+        "web": "https:\/\/replicate.com\/p\/5ydxqjpf9srma0ct9hcb4yatsm"
+    },
+    "metrics": {
+        "input_token_count": 50,
+        "output_token_count": 18,
+        "predict_time": 0.914281161,
+        "time_to_first_token": 0.427102528,
+        "tokens_per_second": 36.94743320157064,
+        "total_time": 0.93723293
+    }
+}
\ No newline at end of file
diff --git a/tests/Fixtures/replicate/text-to-speech-basic-1.json b/tests/Fixtures/replicate/text-to-speech-basic-1.json
new file mode 100644
index 000000000..d567189e4
--- /dev/null
+++ b/tests/Fixtures/replicate/text-to-speech-basic-1.json
@@ -0,0 +1,22 @@
+{
+    "id": "gxh7q2my95rgc0ct9g7tr2w9cr",
+    "model": "jaaari\/kokoro-82m",
+    "version": "f559560eb822dc509045f3921a1921234918b91739db4bf3daab2169b71c7a13",
+    "input": {
+        "text": "Hello world!",
+        "voice": "af_alloy"
+    },
+    "logs": "",
+    "output": null,
+    "data_removed": false,
+    "error": null,
+    "source": "api",
+    "status": "starting",
+    "created_at": "2025-11-03T23:31:23.337Z",
+    "urls": {
+        "cancel": "https:\/\/api.replicate.com\/v1\/predictions\/gxh7q2my95rgc0ct9g7tr2w9cr\/cancel",
+        "get": "https:\/\/api.replicate.com\/v1\/predictions\/gxh7q2my95rgc0ct9g7tr2w9cr",
+        "stream": "https:\/\/stream.replicate.com\/v1\/files\/fddq-qrpzjxytfv2piya7jlz3jxfccosl5k5csjag73cue6cpphyvju6q",
+        "web": "https:\/\/replicate.com\/p\/gxh7q2my95rgc0ct9g7tr2w9cr"
+    }
+}
\ No newline at end of file
diff --git a/tests/Fixtures/replicate/text-to-speech-basic-2.json b/tests/Fixtures/replicate/text-to-speech-basic-2.json
new file mode 100644
index 000000000..3a843dbcf
--- /dev/null
+++ b/tests/Fixtures/replicate/text-to-speech-basic-2.json
@@ -0,0 +1,28 @@
+{
+    "id": "gxh7q2my95rgc0ct9g7tr2w9cr",
+    "model": "jaaari\/kokoro-82m",
+    "version": "f559560eb822dc509045f3921a1921234918b91739db4bf3daab2169b71c7a13",
+    "input": {
+        "text": "Hello world!",
+        "voice": "af_alloy"
+    },
+    "logs": "Processing text (12 chars) with voice='af_alloy' at speed=1.0\n",
+    "output": "https:\/\/replicate.delivery\/czjl\/BBTn3jvZMhIoJRGFDEVFH5NeuVnwPSJ6yCFNqB3y6wwla3yKA\/output.wav",
+    "data_removed": false,
+    "error": null,
+    "source": "api",
+    "status": "succeeded",
+    "created_at": "2025-11-03T23:31:23.337Z",
+    "started_at": "2025-11-03T23:31:23.352623999Z",
+    "completed_at": "2025-11-03T23:31:23.96356142Z",
+    "urls": {
+        "cancel": "https:\/\/api.replicate.com\/v1\/predictions\/gxh7q2my95rgc0ct9g7tr2w9cr\/cancel",
+        "get": "https:\/\/api.replicate.com\/v1\/predictions\/gxh7q2my95rgc0ct9g7tr2w9cr",
+        "stream": "https:\/\/stream.replicate.com\/v1\/files\/fddq-qrpzjxytfv2piya7jlz3jxfccosl5k5csjag73cue6cpphyvju6q",
+        "web": "https:\/\/replicate.com\/p\/gxh7q2my95rgc0ct9g7tr2w9cr"
+    },
+    "metrics": {
+        "predict_time": 0.610937454,
+        "total_time": 0.62656142
+    }
+}
\ No newline at end of file
diff --git a/tests/Fixtures/replicate/text-to-speech-different-voice-1.json b/tests/Fixtures/replicate/text-to-speech-different-voice-1.json
new file mode 100644
index 000000000..35c1698c0
--- /dev/null
+++ b/tests/Fixtures/replicate/text-to-speech-different-voice-1.json
@@ -0,0 +1,22 @@
+{
+    "id": "ycn6jmn7dhrga0ct9g7tcerq78",
+    "model": "jaaari\/kokoro-82m",
+    "version": "f559560eb822dc509045f3921a1921234918b91739db4bf3daab2169b71c7a13",
+    "input": {
+        "text": "Testing echo voice",
+        "voice": "af_bella"
+    },
+    "logs": "",
+    "output": null,
+    "data_removed": false,
+    "error": null,
+    "source": "api",
+    "status": "starting",
+    "created_at": "2025-11-03T23:31:25.676Z",
+    "urls": {
+        "cancel": "https:\/\/api.replicate.com\/v1\/predictions\/ycn6jmn7dhrga0ct9g7tcerq78\/cancel",
+        "get": "https:\/\/api.replicate.com\/v1\/predictions\/ycn6jmn7dhrga0ct9g7tcerq78",
+        "stream": "https:\/\/stream.replicate.com\/v1\/files\/fddq-adq3w32ed3kuc63337nzopuga2rarb2iiglu7gv6zivdyg6mra3a",
+        "web": "https:\/\/replicate.com\/p\/ycn6jmn7dhrga0ct9g7tcerq78"
+    }
+}
\ No newline at end of file
diff --git a/tests/Fixtures/replicate/text-to-speech-different-voice-2.json b/tests/Fixtures/replicate/text-to-speech-different-voice-2.json
new file mode 100644
index 000000000..a394a78f9
--- /dev/null
+++ b/tests/Fixtures/replicate/text-to-speech-different-voice-2.json
@@ -0,0 +1,28 @@
+{
+    "id": "ycn6jmn7dhrga0ct9g7tcerq78",
+    "model": "jaaari\/kokoro-82m",
+    "version": "f559560eb822dc509045f3921a1921234918b91739db4bf3daab2169b71c7a13",
+    "input": {
+        "text": "Testing echo voice",
+        "voice": "af_bella"
+    },
+    "logs": "Processing text (18 chars) with voice='af_bella' at speed=1.0\n",
+    "output": "https:\/\/replicate.delivery\/czjl\/qOcZefVidKqH9kSKfslNXX6avZ4Zl1zgpPAYZUOTfa80U7WWB\/output.wav",
+    "data_removed": false,
+    "error": null,
+    "source": "api",
+    "status": "succeeded",
+    "created_at": "2025-11-03T23:31:25.676Z",
+    "started_at": "2025-11-03T23:31:25.691010209Z",
+    "completed_at": "2025-11-03T23:31:25.827174994Z",
+    "urls": {
+        "cancel": "https:\/\/api.replicate.com\/v1\/predictions\/ycn6jmn7dhrga0ct9g7tcerq78\/cancel",
+        "get": "https:\/\/api.replicate.com\/v1\/predictions\/ycn6jmn7dhrga0ct9g7tcerq78",
+        "stream": "https:\/\/stream.replicate.com\/v1\/files\/fddq-adq3w32ed3kuc63337nzopuga2rarb2iiglu7gv6zivdyg6mra3a",
+        "web": "https:\/\/replicate.com\/p\/ycn6jmn7dhrga0ct9g7tcerq78"
+    },
+    "metrics": {
+        "predict_time": 0.13616481,
+        "total_time": 0.151174994
+    }
+}
\ No newline at end of file
diff --git a/tests/Fixtures/replicate/text-to-speech-long-text-1.json b/tests/Fixtures/replicate/text-to-speech-long-text-1.json
new file mode 100644
index 000000000..1879c8253
--- /dev/null
+++ b/tests/Fixtures/replicate/text-to-speech-long-text-1.json
@@ -0,0 +1,22 @@
+{
+    "id": "prv1025gk1rga0ct9g7rmktdaw",
+    "model": "jaaari\/kokoro-82m",
+    "version": "f559560eb822dc509045f3921a1921234918b91739db4bf3daab2169b71c7a13",
+    "input": {
+        "text": "This is a longer piece of text to test the text-to-speech capabilities.",
+        "voice": "af_sky"
+    },
+    "logs": "",
+    "output": null,
+    "data_removed": false,
+    "error": null,
+    "source": "api",
+    "status": "starting",
+    "created_at": "2025-11-03T23:31:28.024Z",
+    "urls": {
+        "cancel": "https:\/\/api.replicate.com\/v1\/predictions\/prv1025gk1rga0ct9g7rmktdaw\/cancel",
+        "get": "https:\/\/api.replicate.com\/v1\/predictions\/prv1025gk1rga0ct9g7rmktdaw",
+        "stream": "https:\/\/stream.replicate.com\/v1\/files\/fddq-amvwplfdc2znriou7sgcqus3zurxfosqod4i7hh75p4bv6zcbxqq",
+        "web": "https:\/\/replicate.com\/p\/prv1025gk1rga0ct9g7rmktdaw"
+    }
+}
\ No newline at end of file
diff --git a/tests/Fixtures/replicate/text-to-speech-long-text-2.json b/tests/Fixtures/replicate/text-to-speech-long-text-2.json
new file mode 100644
index 000000000..00db51af4
--- /dev/null
+++ b/tests/Fixtures/replicate/text-to-speech-long-text-2.json
@@ -0,0 +1,28 @@
+{
+    "id": "prv1025gk1rga0ct9g7rmktdaw",
+    "model": "jaaari\/kokoro-82m",
+    "version": "f559560eb822dc509045f3921a1921234918b91739db4bf3daab2169b71c7a13",
+    "input": {
+        "text": "This is a longer piece of text to test the text-to-speech capabilities.",
+        "voice": "af_sky"
+    },
+    "logs": "Processing text (71 chars) with voice='af_sky' at speed=1.0\n",
+    "output": "https:\/\/replicate.delivery\/czjl\/WjODbdmaJa7QN1HMb4DCo8lZ7mSef4WTozYnANEp1NHQ1ulVA\/output.wav",
+    "data_removed": false,
+    "error": null,
+    "source": "api",
+    "status": "succeeded",
+    "created_at": "2025-11-03T23:31:28.024Z",
+    "started_at": "2025-11-03T23:31:28.074385217Z",
+    "completed_at": "2025-11-03T23:31:28.685849357Z",
+    "urls": {
+        "cancel": "https:\/\/api.replicate.com\/v1\/predictions\/prv1025gk1rga0ct9g7rmktdaw\/cancel",
+        "get": "https:\/\/api.replicate.com\/v1\/predictions\/prv1025gk1rga0ct9g7rmktdaw",
+        "stream": "https:\/\/stream.replicate.com\/v1\/files\/fddq-amvwplfdc2znriou7sgcqus3zurxfosqod4i7hh75p4bv6zcbxqq",
+        "web": "https:\/\/replicate.com\/p\/prv1025gk1rga0ct9g7rmktdaw"
+    },
+    "metrics": {
+        "predict_time": 0.611464158,
+        "total_time": 0.661849357
+    }
+}
\ No newline at end of file
diff --git a/tests/Pest.php b/tests/Pest.php
index a6a563f60..a6e71a95c 100644
--- a/tests/Pest.php
+++ b/tests/Pest.php
@@ -20,6 +20,7 @@
 uses()->group('mistral')->in('Providers/Mistral');
 uses()->group('ollama')->in('Providers/Ollama');
 uses()->group('openai')->in('Providers/OpenAI');
+uses()->group('replicate')->in('Providers/Replicate');
 uses()->group('xai')->in('Providers/XAI');
 
 /*
diff --git a/tests/Providers/Replicate/RecordReplicateFixtures.php b/tests/Providers/Replicate/RecordReplicateFixtures.php
new file mode 100644
index 000000000..5f65e13bd
--- /dev/null
+++ b/tests/Providers/Replicate/RecordReplicateFixtures.php
@@ -0,0 +1,56 @@
+<?php
+
+declare(strict_types=1);
+
+namespace Tests\Providers\Replicate;
+
+use Prism\Prism\Facades\Prism;
+
+beforeEach(function (): void {
+    $apiKey = env('REPLICATE_API_KEY');
+    if (! $apiKey || str_starts_with((string) $apiKey, 'r8_test')) {
+        $this->markTestSkipped('Real REPLICATE_API_KEY not configured');
+    }
+    config()->set('prism.providers.replicate.api_key', $apiKey);
+    config()->set('prism.providers.replicate.polling_interval', 1000); // 1 second
+    config()->set('prism.providers.replicate.max_wait_time', 120); // 2 minutes
+});
+
+describe('Record Replicate Fixtures', function (): void {
+    it('records a real text generation response', function (): void {
+        $response = Prism::text()
+            ->using('replicate', 'meta/meta-llama-3.1-8b-instruct')
+            ->withPrompt('Hello, world!')
+            ->withMaxTokens(50)
+            ->generate();
+
+        expect($response->text)->not->toBeEmpty()
+            ->and($response->steps)->toHaveCount(1)
+            ->and($response->steps[0]->meta->id)->not->toBeEmpty();
+
+        // Output for manual verification
+        dump([
+            'text' => $response->text,
+            'meta' => [
+                'id' => $response->steps[0]->meta->id,
+                'model' => $response->steps[0]->meta->model,
+            ],
+            'metrics' => $response->steps[0]->additionalContent['metrics'] ?? null,
+        ]);
+    })->skip('Run manually with: ./vendor/bin/pest tests/Providers/Replicate/RecordReplicateFixtures.php');
+
+    it('records text generation with system prompt', function (): void {
+        $response = Prism::text()
+            ->using('replicate', 'meta/meta-llama-3.1-8b-instruct')
+            ->withSystemPrompt('You are a helpful assistant.')
+            ->withPrompt('Who are you?')
+            ->withMaxTokens(50)
+            ->generate();
+
+        expect($response->text)->not->toBeEmpty();
+
+        dump([
+            'text' => $response->text,
+        ]);
+    })->skip('Run manually with: ./vendor/bin/pest tests/Providers/Replicate/RecordReplicateFixtures.php');
+});
diff --git a/tests/Providers/Replicate/ReplicateAudioTest.php b/tests/Providers/Replicate/ReplicateAudioTest.php
new file mode 100644
index 000000000..7003b6a94
--- /dev/null
+++ b/tests/Providers/Replicate/ReplicateAudioTest.php
@@ -0,0 +1,134 @@
+<?php
+
+declare(strict_types=1);
+
+namespace Tests\Providers\Replicate;
+
+use Illuminate\Support\Facades\Http;
+use Prism\Prism\Facades\Prism;
+use Prism\Prism\ValueObjects\Media\Audio;
+use Tests\Fixtures\FixtureResponse;
+
+beforeEach(function (): void {
+    config()->set('prism.providers.replicate.api_key', env('REPLICATE_API_KEY', 'r8_test1234'));
+    config()->set('prism.providers.replicate.polling_interval', 10);
+    config()->set('prism.providers.replicate.max_wait_time', 10);
+});
+
+describe('Text-to-Speech for Replicate', function (): void {
+    it('can generate audio from text', function (): void {
+        $createResponse = json_decode(file_get_contents(__DIR__.'/../../Fixtures/replicate/text-to-speech-basic-1.json'), true);
+        $completedResponse = json_decode(file_get_contents(__DIR__.'/../../Fixtures/replicate/text-to-speech-basic-2.json'), true);
+        $predictionId = $createResponse['id'];
+        $audioUrl = $completedResponse['output'];
+
+        Http::fake([
+            'https://api.replicate.com/v1/predictions' => Http::response($createResponse, 201),
+            "https://api.replicate.com/v1/predictions/{$predictionId}" => Http::response($completedResponse, 200),
+            $audioUrl => Http::response('fake-audio-content', 200),
+        ]);
+
+        $response = Prism::audio()
+            ->using('replicate', 'jaaari/kokoro-82m')
+            ->withInput('Hello world!')
+            ->withVoice('af_alloy')
+            ->asAudio();
+
+        expect($response->audio)->not->toBeNull()
+            ->and($response->audio->hasBase64())->toBeTrue()
+            ->and($response->audio->base64)->not->toBeEmpty();
+    });
+
+    it('can generate audio with different voice', function (): void {
+        $createResponse = json_decode(file_get_contents(__DIR__.'/../../Fixtures/replicate/text-to-speech-different-voice-1.json'), true);
+        $completedResponse = json_decode(file_get_contents(__DIR__.'/../../Fixtures/replicate/text-to-speech-different-voice-2.json'), true);
+        $predictionId = $createResponse['id'];
+        $audioUrl = $completedResponse['output'];
+
+        Http::fake([
+            'https://api.replicate.com/v1/predictions' => Http::response($createResponse, 201),
+            "https://api.replicate.com/v1/predictions/{$predictionId}" => Http::response($completedResponse, 200),
+            $audioUrl => Http::response('fake-audio-content', 200),
+        ]);
+
+        $response = Prism::audio()
+            ->using('replicate', 'jaaari/kokoro-82m')
+            ->withInput('Testing echo voice')
+            ->withVoice('af_bella')
+            ->asAudio();
+
+        expect($response->audio)->not->toBeNull()
+            ->and($response->audio->hasBase64())->toBeTrue();
+    });
+
+    it('can generate audio with provider options', function (): void {
+        $createResponse = json_decode(file_get_contents(__DIR__.'/../../Fixtures/replicate/text-to-speech-long-text-1.json'), true);
+        $completedResponse = json_decode(file_get_contents(__DIR__.'/../../Fixtures/replicate/text-to-speech-long-text-2.json'), true);
+        $predictionId = $createResponse['id'];
+        $audioUrl = $completedResponse['output'];
+
+        Http::fake([
+            'https://api.replicate.com/v1/predictions' => Http::response($createResponse, 201),
+            "https://api.replicate.com/v1/predictions/{$predictionId}" => Http::response($completedResponse, 200),
+            $audioUrl => Http::response('fake-audio-content', 200),
+        ]);
+
+        Prism::audio()
+            ->using('replicate', 'jaaari/kokoro-82m')
+            ->withInput('This is a longer piece of text to test the text-to-speech capabilities.')
+            ->withVoice('af_sky')
+            ->withProviderOptions([
+                'voice' => 'af_sky',
+                'speed' => 1.0,
+            ])
+            ->asAudio();
+
+        Http::assertSent(function ($request): bool {
+            if (! str_contains((string) $request->url(), 'predictions')) {
+                return false;
+            }
+
+            $body = json_decode((string) $request->body(), true);
+
+            return isset($body['input']['text'])
+                && isset($body['input']['voice'])
+                && $body['input']['voice'] === 'af_sky';
+        });
+    });
+});
+
+describe('Speech-to-Text for Replicate', function (): void {
+    it('can transcribe WAV audio file from data URL', function (): void {
+        FixtureResponse::fakeResponseSequence('*', 'replicate/speech-to-text-wav');
+
+        $audioPath = __DIR__.'/../../Fixtures/sample-audio.wav';
+        $audioContent = file_get_contents($audioPath);
+        $base64Audio = base64_encode($audioContent);
+        $dataUrl = 'data:audio/wav;base64,'.$base64Audio;
+
+        $response = Prism::audio()
+            ->using('replicate', 'vaibhavs10/incredibly-fast-whisper')
+            ->withInput(new Audio(url: $dataUrl))
+            ->asText();
+
+        expect($response->text)->toContain('Kids')
+            ->and($response->additionalContent['metrics']['predict_time'])->toBeGreaterThan(0);
+    });
+
+    it('can transcribe MP3 audio file from data URL', function (): void {
+        FixtureResponse::fakeResponseSequence('*', 'replicate/speech-to-text-mp3');
+
+        $audioPath = __DIR__.'/../../Fixtures/slightly-caffeinated-36.mp3';
+        $audioContent = file_get_contents($audioPath);
+        $base64Audio = base64_encode($audioContent);
+        $dataUrl = 'data:audio/mpeg;base64,'.$base64Audio;
+
+        $response = Prism::audio()
+            ->using('replicate', 'vaibhavs10/incredibly-fast-whisper')
+            ->withInput(new Audio(url: $dataUrl))
+            ->asText();
+
+        expect($response->text)->not->toBeEmpty()
+            ->and($response->additionalContent['metrics']['predict_time'])->toBeGreaterThan(0);
+    });
+});
diff --git a/tests/Providers/Replicate/ReplicateEmbeddingsTest.php b/tests/Providers/Replicate/ReplicateEmbeddingsTest.php
new file mode 100644
index 000000000..6934bf352
--- /dev/null
+++ b/tests/Providers/Replicate/ReplicateEmbeddingsTest.php
@@ -0,0 +1,62 @@
+<?php
+
+declare(strict_types=1);
+
+namespace Tests\Providers\Replicate;
+
+use Prism\Prism\Facades\Prism;
+use Prism\Prism\ValueObjects\Embedding;
+use Tests\Fixtures\FixtureResponse;
+
+beforeEach(function (): void {
+    config()->set('prism.providers.replicate.api_key', env('REPLICATE_API_KEY', 'r8_test1234'));
+    config()->set('prism.providers.replicate.polling_interval', 10);
+    config()->set('prism.providers.replicate.max_wait_time', 10);
+});
+
+describe('Embeddings for Replicate', function (): void {
+    it('returns embeddings from input', function (): void {
+        FixtureResponse::fakeResponseSequence('*', 'replicate/embeddings-single-input');
+
+        $response = Prism::embeddings()
+            ->using('replicate', 'mark3labs/embeddings-gte-base')
+            ->fromInput('The food was delicious and the waiter...')
+            ->asEmbeddings();
+
+        expect($response->embeddings)->toBeArray()
+            ->and($response->embeddings)->toHaveCount(1)
+            ->and($response->embeddings[0])->toBeInstanceOf(Embedding::class)
+            ->and($response->embeddings[0]->embedding)->toBeArray()
+            ->and($response->embeddings[0]->embedding)->not->toBeEmpty()
+            ->and($response->usage->tokens)->toBeGreaterThan(0);
+    });
+
+    it('works with multiple embeddings', function (): void {
+        FixtureResponse::fakeResponseSequence('*', 'replicate/embeddings-multiple-inputs');
+
+        $response = Prism::embeddings()
+            ->using('replicate', 'mark3labs/embeddings-gte-base')
+            ->fromArray([
+                'The food was delicious.',
+                'The drinks were not so good',
+            ])
+            ->asEmbeddings();
+
+        expect($response->embeddings)->toBeArray()
+            ->and($response->embeddings)->toHaveCount(2)
+            ->and($response->embeddings[0]->embedding)->toBeArray()
+            ->and($response->embeddings[1]->embedding)->toBeArray()
+            ->and($response->usage->tokens)->toBeGreaterThan(0);
+    });
+
+    it('includes model information in meta', function (): void {
+        FixtureResponse::fakeResponseSequence('*', 'replicate/embeddings-single-input');
+
+        $response = Prism::embeddings()
+            ->using('replicate', 'mark3labs/embeddings-gte-base')
+            ->fromInput('Test input')
+            ->asEmbeddings();
+
+        expect($response->meta->model)->toBe('mark3labs/embeddings-gte-base');
+    });
+});
diff --git a/tests/Providers/Replicate/ReplicateImagesTest.php b/tests/Providers/Replicate/ReplicateImagesTest.php
new file mode 100644
index 000000000..f8d473807
--- /dev/null
+++ b/tests/Providers/Replicate/ReplicateImagesTest.php
@@ -0,0 +1,89 @@
+<?php
+
+declare(strict_types=1);
+
+namespace Tests\Providers\Replicate;
+
+use Illuminate\Support\Facades\Http;
+use Prism\Prism\Facades\Prism;
+use Tests\Fixtures\FixtureResponse;
+
+beforeEach(function (): void {
+    config()->set('prism.providers.replicate.api_key', env('REPLICATE_API_KEY', 'r8_test1234'));
+    config()->set('prism.providers.replicate.polling_interval', 10);
+    config()->set('prism.providers.replicate.max_wait_time', 10);
+});
+
+describe('Image Generation for Replicate', function (): void {
+    it('can generate an image with flux-schnell', function (): void {
+        $createResponse = json_decode(file_get_contents(__DIR__.'/../../Fixtures/replicate/generate-image-basic-1.json'), true);
+        $completedResponse = json_decode(file_get_contents(__DIR__.'/../../Fixtures/replicate/generate-image-basic-2.json'), true);
+        $predictionId = $createResponse['id'];
+        $imageUrl = $completedResponse['output'][0];
+
+        Http::fake([
+            'https://api.replicate.com/v1/predictions' => Http::response($createResponse, 201),
+            "https://api.replicate.com/v1/predictions/{$predictionId}" => Http::response($completedResponse, 200),
+            $imageUrl => Http::response('fake-image-content', 200),
+        ]);
+
+        $response = Prism::image()
+            ->using('replicate', 'black-forest-labs/flux-schnell')
+            ->withPrompt('A cute baby sea otter floating on its back in calm blue water')
+            ->generate();
+
+        expect($response->firstImage())->not->toBeNull()
+            ->and($response->firstImage()->hasUrl())->toBeTrue()
+            ->and($response->firstImage()->url)->not->toBeEmpty()
+            ->and($response->firstImage()->hasBase64())->toBeTrue()
+            ->and($response->firstImage()->base64)->not->toBeEmpty()
+            ->and($response->imageCount())->toBe(1);
+    });
+
+    it('can generate an image with provider options', function (): void {
+        FixtureResponse::fakeResponseSequence('*', 'replicate/generate-image-with-options');
+
+        Prism::image()
+            ->using('replicate', 'black-forest-labs/flux-schnell')
+            ->withPrompt('A mountain landscape at sunset')
+            ->withProviderOptions([
+                'aspect_ratio' => '16:9',
+                'output_format' => 'png',
+            ])
+            ->generate();
+
+        Http::assertSent(function ($request): bool {
+            if (! str_contains((string) $request->url(), 'predictions')) {
+                return false;
+            }
+
+            $body = json_decode((string) $request->body(), true);
+
+            return isset($body['input']['prompt'])
+                && $body['input']['prompt'] === 'A mountain landscape at sunset'
+                && isset($body['input']['aspect_ratio'])
+                && $body['input']['aspect_ratio'] === '16:9';
+        });
+    });
+
+    it('includes meta information in response', function (): void {
+        $createResponse = json_decode(file_get_contents(__DIR__.'/../../Fixtures/replicate/generate-image-basic-1.json'), true);
+        $completedResponse = json_decode(file_get_contents(__DIR__.'/../../Fixtures/replicate/generate-image-basic-2.json'), true);
+        $predictionId = $createResponse['id'];
+        $imageUrl = $completedResponse['output'][0];
+
+        Http::fake([
+            'https://api.replicate.com/v1/predictions' => Http::response($createResponse, 201),
+            "https://api.replicate.com/v1/predictions/{$predictionId}" => Http::response($completedResponse, 200),
+            $imageUrl => Http::response('fake-image-content', 200),
+        ]);
+
+        $response = Prism::image()
+            ->using('replicate', 'black-forest-labs/flux-schnell')
+            ->withPrompt('A cute baby sea otter floating on its back in calm blue water')
+            ->generate();
+
+        expect($response->meta->id)->not->toBeEmpty()
+            ->and($response->meta->model)->toBe('black-forest-labs/flux-schnell');
+    });
+});
diff --git a/tests/Providers/Replicate/ReplicateSSEStreamTest.php b/tests/Providers/Replicate/ReplicateSSEStreamTest.php
new file mode 100644
index 000000000..73399f37a
--- /dev/null
+++ b/tests/Providers/Replicate/ReplicateSSEStreamTest.php
@@ -0,0 +1,138 @@
+<?php
+
+declare(strict_types=1);
+
+namespace Tests\Providers\Replicate;
+
+use Prism\Prism\Facades\Prism;
+use Prism\Prism\Streaming\Events\StreamEndEvent;
+use Prism\Prism\Streaming\Events\StreamStartEvent;
+use Prism\Prism\Streaming\Events\TextCompleteEvent;
+use Prism\Prism\Streaming\Events\TextDeltaEvent;
+use Prism\Prism\Streaming\Events\TextStartEvent;
+
+beforeEach(function (): void {
+    config()->set('prism.providers.replicate.api_key', env('REPLICATE_API_KEY'));
+    config()->set('prism.providers.replicate.polling_interval', 1000);
+    config()->set('prism.providers.replicate.max_wait_time', 120);
+});
+
+describe('Real-time SSE Streaming for Replicate', function (): void {
+    it('can stream text in real-time using SSE', function (): void {
+        // This test requires a real Replicate API key and makes real API calls
+        if (! env('REPLICATE_API_KEY') || env('REPLICATE_API_KEY') === 'r8_test1234') {
+            $this->markTestSkipped('Requires real REPLICATE_API_KEY environment variable');
+        }
+
+        $response = Prism::text()
+            ->using('replicate', 'meta/meta-llama-3-8b-instruct')
+            ->withPrompt('Say hello in 5 words or less')
+            ->withMaxTokens(20)
+            ->asStream();
+
+        $text = '';
+        $events = [];
+        $deltaCount = 0;
+        $receivedStreamStart = false;
+        $receivedTextStart = false;
+        $receivedTextDelta = false;
+        $receivedTextComplete = false;
+        $receivedStreamEnd = false;
+
+        foreach ($response as $event) {
+            $events[] = $event;
+
+            if ($event instanceof StreamStartEvent) {
+                $receivedStreamStart = true;
+            }
+
+            if ($event instanceof TextStartEvent) {
+                $receivedTextStart = true;
+            }
+
+            if ($event instanceof TextDeltaEvent) {
+                $text .= $event->delta;
+                $deltaCount++;
+                $receivedTextDelta = true;
+            }
+
+            if ($event instanceof TextCompleteEvent) {
+                $receivedTextComplete = true;
+            }
+
+            if ($event instanceof StreamEndEvent) {
+                $receivedStreamEnd = true;
+            }
+        }
+
+        // Verify all events were received
+        expect($receivedStreamStart)->toBeTrue('Should receive StreamStartEvent')
+            ->and($receivedTextStart)->toBeTrue('Should receive TextStartEvent')
+            ->and($receivedTextDelta)->toBeTrue('Should receive at least one TextDeltaEvent')
+            ->and($receivedTextComplete)->toBeTrue('Should receive TextCompleteEvent')
+            ->and($receivedStreamEnd)->toBeTrue('Should receive StreamEndEvent');
+
+        // Verify text was generated
+        expect($text)->not->toBeEmpty('Should have generated some text')
+            ->and($deltaCount)->toBeGreaterThan(0, 'Should have received multiple deltas');
+    })->group('integration', 'sse', 'slow');
+
+    it('receives tokens in real-time without waiting for completion', function (): void {
+        // This test verifies that tokens arrive progressively, not all at once
+        if (! env('REPLICATE_API_KEY') || env('REPLICATE_API_KEY') === 'r8_test1234') {
+            $this->markTestSkipped('Requires real REPLICATE_API_KEY environment variable');
+        }
+
+        $response = Prism::text()
+            ->using('replicate', 'meta/meta-llama-3-8b-instruct')
+            ->withPrompt('Count from 1 to 10')
+            ->withMaxTokens(50)
+            ->asStream();
+
+        $timestamps = [];
+        $firstDeltaTime = null;
+        $lastDeltaTime = null;
+
+        foreach ($response as $event) {
+            if ($event instanceof TextDeltaEvent) {
+                $currentTime = microtime(true);
+                $timestamps[] = $currentTime;
+
+                if ($firstDeltaTime === null) {
+                    $firstDeltaTime = $currentTime;
+                }
+
+                $lastDeltaTime = $currentTime;
+            }
+        }
+
+        // Verify we received multiple delta events (proof of streaming, not batch)
+        expect($timestamps)->toHaveCount(count($timestamps))
+            ->and(count($timestamps))->toBeGreaterThan(1, 'Should receive multiple token deltas for streaming');
+
+        // If tokens were buffered and sent all at once (simulated streaming),
+        // we would likely get very few deltas. Real SSE typically sends many small chunks.
+        // For a "count from 1 to 10" prompt, we should get multiple deltas.
+        expect(count($timestamps))->toBeGreaterThan(5, 'Real SSE should produce many small token chunks');
+    })->group('integration', 'sse', 'slow');
+
+    it('handles SSE stream errors gracefully', function (): void {
+        // Test that errors in the stream are properly handled
+        if (! env('REPLICATE_API_KEY') || env('REPLICATE_API_KEY') === 'r8_test1234') {
+            $this->markTestSkipped('Requires real REPLICATE_API_KEY environment variable');
+        }
+
+        // Use an invalid model to trigger an error
+        expect(function (): void {
+            $response = Prism::text()
+                ->using('replicate', 'invalid/model-does-not-exist')
+                ->withPrompt('This should fail')
+                ->asStream();
+
+            // Try to consume the stream
+            foreach ($response as $event) {
+                // Should throw before we get here
+            }
+        })->toThrow(\Prism\Prism\Exceptions\PrismException::class);
+    })->group('integration', 'sse', 'slow');
+})->group('integration');
diff --git a/tests/Providers/Replicate/ReplicateStreamTest.php b/tests/Providers/Replicate/ReplicateStreamTest.php
new file mode 100644
index 000000000..2a9cc6ce6
--- /dev/null
+++ b/tests/Providers/Replicate/ReplicateStreamTest.php
@@ -0,0 +1,127 @@
+<?php
+
+declare(strict_types=1);
+
+namespace Tests\Providers\Replicate;
+
+use Prism\Prism\Facades\Prism;
+use Prism\Prism\Streaming\Events\StreamEndEvent;
+use Prism\Prism\Streaming\Events\StreamStartEvent;
+use Prism\Prism\Streaming\Events\TextCompleteEvent;
+use Prism\Prism\Streaming\Events\TextDeltaEvent;
+use Prism\Prism\Streaming\Events\TextStartEvent;
+use Tests\Fixtures\FixtureResponse;
+
+beforeEach(function (): void {
+    config()->set('prism.providers.replicate.api_key', env('REPLICATE_API_KEY', 'r8_test1234'));
+    config()->set('prism.providers.replicate.polling_interval', 10);
+    config()->set('prism.providers.replicate.max_wait_time', 10);
+});
+
+describe('Streaming for Replicate', function (): void {
+    it('can generate text with a basic stream', function (): void {
+        FixtureResponse::fakeResponseSequence('*', 'replicate/generate-text-with-a-prompt');
+
+        $response = Prism::text()
+            ->using('replicate', 'meta/meta-llama-3.1-405b-instruct')
+            ->withPrompt('Hello, world!')
+            ->asStream();
+
+        $text = '';
+        $events = [];
+        $deltaCount = 0;
+
+        foreach ($response as $event) {
+            $events[] = $event;
+
+            if ($event instanceof TextDeltaEvent) {
+                $text .= $event->delta;
+                $deltaCount++;
+            }
+        }
+
+        expect($events)->not->toBeEmpty()
+            ->and($text)->not->toBeEmpty()
+            ->and($deltaCount)->toBeGreaterThan(0);
+    });
+
+    it('emits all expected stream events', function (): void {
+        FixtureResponse::fakeResponseSequence('*', 'replicate/generate-text-with-a-prompt');
+
+        $response = Prism::text()
+            ->using('replicate', 'meta/meta-llama-3.1-405b-instruct')
+            ->withPrompt('Hello, world!')
+            ->asStream();
+
+        $hasStreamStart = false;
+        $hasTextStart = false;
+        $hasTextDelta = false;
+        $hasTextComplete = false;
+        $hasStreamEnd = false;
+
+        foreach ($response as $event) {
+            if ($event instanceof StreamStartEvent) {
+                $hasStreamStart = true;
+            }
+            if ($event instanceof TextStartEvent) {
+                $hasTextStart = true;
+            }
+            if ($event instanceof TextDeltaEvent) {
+                $hasTextDelta = true;
+            }
+            if ($event instanceof TextCompleteEvent) {
+                $hasTextComplete = true;
+            }
+            if ($event instanceof StreamEndEvent) {
+                $hasStreamEnd = true;
+            }
+        }
+
+        expect($hasStreamStart)->toBeTrue()
+            ->and($hasTextStart)->toBeTrue()
+            ->and($hasTextDelta)->toBeTrue()
+            ->and($hasTextComplete)->toBeTrue()
+            ->and($hasStreamEnd)->toBeTrue();
+    });
+
+    it('includes usage information in stream end event', function (): void {
+        FixtureResponse::fakeResponseSequence('*', 'replicate/generate-text-with-a-prompt');
+
+        $response = Prism::text()
+            ->using('replicate', 'meta/meta-llama-3.1-405b-instruct')
+            ->withPrompt('Hello, world!')
+            ->asStream();
+
+        $streamEndEvent = null;
+
+        foreach ($response as $event) {
+            if ($event instanceof StreamEndEvent) {
+                $streamEndEvent = $event;
+            }
+        }
+
+        expect($streamEndEvent)->not->toBeNull()
+            ->and($streamEndEvent->usage->promptTokens)->toBeGreaterThan(0)
+            ->and($streamEndEvent->usage->completionTokens)->toBeGreaterThan(0);
+    });
+
+    it('reconstructs full text from deltas', function (): void {
+        FixtureResponse::fakeResponseSequence('*', 'replicate/generate-text-with-system-prompt');
+
+        $response = Prism::text()
+            ->using('replicate', 'meta/meta-llama-3.1-405b-instruct')
+            ->withSystemPrompt('You are a helpful assistant')
+            ->withPrompt('Say hello')
+            ->asStream();
+
+        $streamedText = '';
+
+        foreach ($response as $event) {
+            if ($event instanceof TextDeltaEvent) {
+                $streamedText .= $event->delta;
+            }
+        }
+
+        expect($streamedText)->not->toBeEmpty();
+    });
+});
diff --git a/tests/Providers/Replicate/ReplicateStructuredTest.php b/tests/Providers/Replicate/ReplicateStructuredTest.php
new file mode 100644
index 000000000..ea7703a0b
--- /dev/null
+++ b/tests/Providers/Replicate/ReplicateStructuredTest.php
@@ -0,0 +1,95 @@
+<?php
+
+declare(strict_types=1);
+
+namespace Tests\Providers\Replicate;
+
+use Prism\Prism\Facades\Prism;
+use Prism\Prism\Schema\BooleanSchema;
+use Prism\Prism\Schema\ObjectSchema;
+use Prism\Prism\Schema\StringSchema;
+use Tests\Fixtures\FixtureResponse;
+
+beforeEach(function (): void {
+    config()->set('prism.providers.replicate.api_key', env('REPLICATE_API_KEY', 'r8_test1234'));
+    config()->set('prism.providers.replicate.polling_interval', 10);
+    config()->set('prism.providers.replicate.max_wait_time', 10);
+});
+
+describe('Structured Output for Replicate', function (): void {
+    it('returns structured output with JSON mode', function (): void {
+        FixtureResponse::fakeResponseSequence('*', 'replicate/structured-json-mode');
+
+        $schema = new ObjectSchema(
+            'weather_info',
+            'weather and game information',
+            [
+                new StringSchema('weather', 'The weather forecast'),
+                new StringSchema('game_time', 'The game time'),
+                new BooleanSchema('coat_required', 'whether a coat is required'),
+            ],
+            ['weather', 'game_time', 'coat_required']
+        );
+
+        $response = Prism::structured()
+            ->withSchema($schema)
+            ->using('replicate', 'meta/meta-llama-3.1-405b-instruct')
+            ->withPrompt('What time is the tigers game today and should I wear a coat?')
+            ->asStructured();
+
+        expect($response->structured)->toBeArray()
+            ->and($response->structured)->toHaveKeys([
+                'weather',
+                'game_time',
+                'coat_required',
+            ])
+            ->and($response->structured['weather'])->toBeString()
+            ->and($response->structured['game_time'])->toBeString()
+            ->and($response->structured['coat_required'])->toBeBool();
+    });
+
+    it('can handle simple structured output', function (): void {
+        FixtureResponse::fakeResponseSequence('*', 'replicate/structured-simple');
+
+        $schema = new ObjectSchema(
+            'person',
+            'person information',
+            [
+                new StringSchema('name', 'The person name'),
+                new StringSchema('role', 'The person role'),
+            ],
+            ['name', 'role']
+        );
+
+        $response = Prism::structured()
+            ->withSchema($schema)
+            ->using('replicate', 'meta/meta-llama-3.1-405b-instruct')
+            ->withPrompt('Tell me about Albert Einstein')
+            ->asStructured();
+
+        expect($response->structured)->toBeArray()
+            ->and($response->structured)->toHaveKeys(['name', 'role']);
+    });
+
+    it('includes usage information in response', function (): void {
+        FixtureResponse::fakeResponseSequence('*', 'replicate/structured-json-mode');
+
+        $schema = new ObjectSchema(
+            'output',
+            'output object',
+            [
+                new StringSchema('result', 'The result'),
+            ],
+            ['result']
+        );
+
+        $response = Prism::structured()
+            ->withSchema($schema)
+            ->using('replicate', 'meta/meta-llama-3.1-405b-instruct')
+            ->withPrompt('Say hello')
+            ->asStructured();
+
+        expect($response->usage->promptTokens)->toBeGreaterThan(0)
+            ->and($response->usage->completionTokens)->toBeGreaterThan(0);
+    });
+});
diff --git a/tests/Providers/Replicate/ReplicateTextTest.php b/tests/Providers/Replicate/ReplicateTextTest.php
new file mode 100644
index 000000000..1f54725cd
--- /dev/null
+++ b/tests/Providers/Replicate/ReplicateTextTest.php
@@ -0,0 +1,53 @@
+<?php
+
+declare(strict_types=1);
+
+namespace Tests\Providers\Replicate;
+
+use Prism\Prism\Facades\Prism;
+use Tests\Fixtures\FixtureResponse;
+
+beforeEach(function (): void {
+    config()->set('prism.providers.replicate.api_key', env('REPLICATE_API_KEY', 'r8_test1234'));
+    config()->set('prism.providers.replicate.polling_interval', 10); // Fast polling for tests
+    config()->set('prism.providers.replicate.max_wait_time', 10);
+});
+
+describe('Text generation for Replicate', function (): void {
+    it('can generate text with a prompt', function (): void {
+        FixtureResponse::fakeResponseSequence('*', 'replicate/generate-text-with-a-prompt');
+
+        $response = Prism::text()
+            ->using('replicate', 'meta/meta-llama-3.1-405b-instruct')
+            ->withPrompt('Hello, world!')
+            ->withMaxTokens(100)
+            ->generate();
+
+        expect($response->text)->toContain('Hello')
+            ->and($response->steps[0]->additionalContent['metrics']['predict_time'])->toBeGreaterThan(0)
+            ->and($response->steps[0]->additionalContent['metrics']['total_time'])->toBeGreaterThan(0);
+    });
+
+    it('can generate text with system prompt', function (): void {
+        FixtureResponse::fakeResponseSequence('*', 'replicate/generate-text-with-system-prompt');
+
+        $response = Prism::text()
+            ->using('replicate', 'meta/meta-llama-3.1-405b-instruct')
+            ->withSystemPrompt('You are a helpful assistant.')
+            ->withPrompt('Who are you?')
+            ->generate();
+
+        expect($response->text)->toContain('Hello');
+    });
+
+    it('handles model with version specified', function (): void {
+        FixtureResponse::fakeResponseSequence('*', 'replicate/generate-text-with-version');
+
+        $response = Prism::text()
+            ->using('replicate', 'meta/meta-llama-3.1-405b-instruct:v1')
+            ->withPrompt('Hello!')
+            ->generate();
+
+        expect($response->text)->toContain('Hello');
+    });
+});