Add Image Support to Embeddings API (#789)

cyppe · sixlive · web-flow · commit 267ca1957c31 · 2025-12-22T10:26:44.000-05:00
Co-authored-by: TJ Miller &lt;tj@themillers.co&gt;
diff --git a/.gitignore b/.gitignore
@@ -12,3 +12,4 @@ phpunit.xml
 ray.php
 CLAUDE.md
 .phpunit.result.cache
+.ddev/
diff --git a/composer.json b/composer.json
@@ -38,7 +38,7 @@
     "pestphp/pest-plugin-laravel": "^3.0",
     "phpstan/extension-installer": "^1.3",
     "phpstan/phpstan-deprecation-rules": "^2.0",
-    "rector/rector": "2.2.7",
+    "rector/rector": "2.2.14",
     "projektgopher/whisky": "^0.7.0",
     "orchestra/testbench": "^10",
     "mockery/mockery": "^1.6",
diff --git a/docs/core-concepts/embeddings.md b/docs/core-concepts/embeddings.md
@@ -1,6 +1,6 @@
 # Embeddings
 
-Transform your text into powerful vector representations! Embeddings let you add semantic search, recommendation systems, and other advanced natural language features to your applications.
+Transform your content into powerful vector representations! Embeddings let you add semantic search, recommendation systems, and other advanced features to your applications - whether you're working with text or images.
 
 ## Quick Start
 
@@ -86,6 +86,71 @@ $response = Prism::embeddings()
 > [!NOTE]
 > Make sure your file exists and is readable. The generator will throw a helpful `PrismException` if there's any issue accessing the file.
 
+## Image Embeddings
+
+Some providers support image embeddings, enabling powerful use cases like visual similarity search, cross-modal retrieval, and multimodal applications. Prism makes it easy to generate embeddings from images using the same fluent API.
+
+> [!IMPORTANT]
+> Image embeddings require a provider and model that supports image input (such as CLIP-based models or multimodal embedding models like BGE-VL). Check your provider's documentation to confirm image embedding support.
+
+### Single Image
+
+Generate an embedding from a single image:
+
+```php
+use Prism\Prism\Facades\Prism;
+use Prism\Prism\ValueObjects\Media\Image;
+
+$response = Prism::embeddings()
+    ->using('provider', 'model')
+    ->fromImage(Image::fromLocalPath('/path/to/product.jpg'))
+    ->asEmbeddings();
+
+$embedding = $response->embeddings[0]->embedding;
+```
+
+### Multiple Images
+
+Process multiple images in a single request:
+
+```php
+use Prism\Prism\Facades\Prism;
+use Prism\Prism\ValueObjects\Media\Image;
+
+$response = Prism::embeddings()
+    ->using('provider', 'model')
+    ->fromImages([
+        Image::fromLocalPath('/path/to/image1.jpg'),
+        Image::fromUrl('https://example.com/image2.png'),
+    ])
+    ->asEmbeddings();
+
+foreach ($response->embeddings as $embedding) {
+    // Process each image embedding
+    $vector = $embedding->embedding;
+}
+```
+
+### Multimodal: Text + Image
+
+Combine text and images for cross-modal search scenarios. This is particularly useful for applications like "find products similar to this image that match this description":
+
+```php
+use Prism\Prism\Facades\Prism;
+use Prism\Prism\ValueObjects\Media\Image;
+
+$response = Prism::embeddings()
+    ->using('provider', 'model')
+    ->fromInput('Find similar products in red')
+    ->fromImage(Image::fromBase64($productImage, 'image/png'))
+    ->asEmbeddings();
+```
+
+You can chain `fromImage()` and `fromInput()` in any order - Prism handles both gracefully.
+
+> [!TIP]
+> The `Image` class supports multiple input sources: `fromLocalPath()`, `fromUrl()`, `fromBase64()`, `fromStoragePath()`, and `fromRawContent()`. See the [Images documentation](/input-modalities/images.html) for details.
+
 ## Common Settings
 
 Just like with text generation, you can fine-tune your embeddings requests:
@@ -146,7 +211,7 @@ try {
 }
 ```
 
-## Pro Tips 🌟
+## Pro Tips
 
 **Vector Storage**: Consider using a vector database like Milvus, Qdrant, or pgvector to store and query your embeddings efficiently.
 
diff --git a/src/Embeddings/PendingRequest.php b/src/Embeddings/PendingRequest.php
@@ -9,6 +9,7 @@
 use Prism\Prism\Concerns\ConfiguresProviders;
 use Prism\Prism\Concerns\HasProviderOptions;
 use Prism\Prism\Exceptions\PrismException;
+use Prism\Prism\ValueObjects\Media\Image;
 
 class PendingRequest
 {
@@ -19,6 +20,9 @@ class PendingRequest
     /** @var array<string> */
     protected array $inputs = [];
 
+    /** @var array<Image> */
+    protected array $images = [];
+
     public function fromInput(string $input): self
     {
         $this->inputs[] = $input;
@@ -53,6 +57,33 @@ public function fromFile(string $path): self
         return $this;
     }
 
+    /**
+     * Add an image for embedding generation.
+     *
+     * Note: Not all providers support image embeddings. Check the provider's
+     * documentation to ensure the model you're using supports image input.
+     * Common providers that support image embeddings include CLIP-based models
+     * and multimodal embedding models like BGE-VL.
+     */
+    public function fromImage(Image $image): self
+    {
+        $this->images[] = $image;
+
+        return $this;
+    }
+
+    /**
+     * Add multiple images for embedding generation.
+     *
+     * @param  array<Image>  $images
+     */
+    public function fromImages(array $images): self
+    {
+        $this->images = array_merge($this->images, $images);
+
+        return $this;
+    }
+
     /**
      * @deprecated Use `asEmbeddings` instead.
      */
@@ -63,8 +94,8 @@ public function generate(): Response
 
     public function asEmbeddings(): Response
     {
-        if ($this->inputs === []) {
-            throw new PrismException('Embeddings input is required');
+        if ($this->inputs === [] && $this->images === []) {
+            throw new PrismException('Embeddings input is required (text or images)');
         }
 
         $request = $this->toRequest();
@@ -82,6 +113,7 @@ protected function toRequest(): Request
             model: $this->model,
             providerKey: $this->providerKey(),
             inputs: $this->inputs,
+            images: $this->images,
             clientOptions: $this->clientOptions,
             clientRetry: $this->clientRetry,
             providerOptions: $this->providerOptions
diff --git a/src/Embeddings/Request.php b/src/Embeddings/Request.php
@@ -8,13 +8,15 @@
 use Prism\Prism\Concerns\ChecksSelf;
 use Prism\Prism\Concerns\HasProviderOptions;
 use Prism\Prism\Contracts\PrismRequest;
+use Prism\Prism\ValueObjects\Media\Image;
 
 class Request implements PrismRequest
 {
     use ChecksSelf, HasProviderOptions;
 
     /**
      * @param  array<string>  $inputs
+     * @param  array<Image>  $images
      * @param  array<string, mixed>  $clientOptions
      * @param  array{0: array<int, int>|int, 1?: Closure|int, 2?: ?callable, 3?: bool}  $clientRetry
      * @param  array<string, mixed>  $providerOptions
@@ -23,6 +25,7 @@ public function __construct(
         protected string $model,
         protected string $providerKey,
         protected array $inputs,
+        protected array $images,
         protected array $clientOptions,
         protected array $clientRetry,
         array $providerOptions = [],
@@ -54,6 +57,32 @@ public function inputs(): array
         return $this->inputs;
     }
 
+    /**
+     * Get image inputs for embedding generation.
+     *
+     * @return array<Image>
+     */
+    public function images(): array
+    {
+        return $this->images;
+    }
+
+    /**
+     * Check if the request contains image inputs.
+     */
+    public function hasImages(): bool
+    {
+        return $this->images !== [];
+    }
+
+    /**
+     * Check if the request contains text inputs.
+     */
+    public function hasInputs(): bool
+    {
+        return $this->inputs !== [];
+    }
+
     #[\Override]
     public function model(): string
     {
diff --git a/src/Facades/Prism.php b/src/Facades/Prism.php
@@ -48,7 +48,7 @@ public function __construct(
                 private readonly PrismFake $fake
             ) {}
 
-            public function resolve(ProviderEnum|string $name, array $providerConfig = []): Provider
+            public function resolve(ProviderEnum|string $name, array $providerConfig = []): PrismFake
             {
                 $this->fake->setProviderConfig($providerConfig);
 
diff --git a/src/ValueObjects/Media/Media.php b/src/ValueObjects/Media/Media.php
@@ -256,8 +256,6 @@ public function mimeType(): ?string
     }
 
     /**
-     * Get a file resource suitable for HTTP multipart uploads
-     *
      * @return resource
      */
     public function resource()
@@ -290,7 +288,6 @@ public function fetchUrlContent(): void
             return;
         }
 
-        /** @var \Illuminate\Http\Client\Response $response */
         $response = Http::get($this->url);
         $content = $response->body();
 
diff --git a/tests/Embeddings/ImageEmbeddingsTest.php b/tests/Embeddings/ImageEmbeddingsTest.php

Original file line number	Diff line number	Diff line change
`@@ -48,7 +48,7 @@ public function __construct(`
`48`	`48`	`private readonly PrismFake $fake`
`49`	`49`	`) {}`
`50`	`50`
`51`		`- public function resolve(ProviderEnum\|string $name, array $providerConfig = []): Provider`
	`51`	`+ public function resolve(ProviderEnum\|string $name, array $providerConfig = []): PrismFake`
`52`	`52`	`{`
`53`	`53`	`$this->fake->setProviderConfig($providerConfig);`
`54`	`54`
Original file line number	Diff line number	Diff line change
`@@ -256,8 +256,6 @@ public function mimeType(): ?string`
`256`	`256`	`}`
`257`	`257`
`258`	`258`	`/**`
`259`		`- * Get a file resource suitable for HTTP multipart uploads`
`260`		`- *`
`261`	`259`	`* @return resource`
`262`	`260`	`*/`
`263`	`261`	`public function resource()`
`@@ -290,7 +288,6 @@ public function fetchUrlContent(): void`
`290`	`288`	`return;`
`291`	`289`	`}`
`292`	`290`
`293`		`- /** @var \Illuminate\Http\Client\Response $response */`
`294`	`291`	`$response = Http::get($this->url);`
`295`	`292`	`$content = $response->body();`
`296`	`293`