Skip to content

Commit a44e3f7

Browse files
committed
feature #46 feat: add Gemini Embeddings (valtzu)
This PR was merged into the main branch. Discussion ---------- feat: add Gemini Embeddings | Q | A | ------------- | --- | Bug fix? | no | New feature? | yes | Docs? | | Issues | | License | MIT Cherry picking php-llm/llm-chain#347 > Add Embeddings support for Google Gemini. > > Only batch embedding implemented. > > Related to #28 Commits ------- 17e25d1 feat: add Gemini Embeddings (#347)
2 parents bc2185e + 17e25d1 commit a44e3f7

File tree

9 files changed

+381
-3
lines changed

9 files changed

+381
-3
lines changed

examples/google/embeddings.php

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <[email protected]>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
use Symfony\AI\Platform\Bridge\Google\Embeddings;
13+
use Symfony\AI\Platform\Bridge\Google\PlatformFactory;
14+
use Symfony\AI\Platform\Response\VectorResponse;
15+
use Symfony\Component\Dotenv\Dotenv;
16+
17+
require_once dirname(__DIR__).'/vendor/autoload.php';
18+
(new Dotenv())->loadEnv(dirname(__DIR__).'/.env');
19+
20+
if (empty($_ENV['GOOGLE_API_KEY'])) {
21+
echo 'Please set the GOOGLE_API_KEY environment variable.'.\PHP_EOL;
22+
exit(1);
23+
}
24+
25+
$platform = PlatformFactory::create($_ENV['GOOGLE_API_KEY']);
26+
$embeddings = new Embeddings();
27+
28+
$response = $platform->request($embeddings, <<<TEXT
29+
Once upon a time, there was a country called Japan. It was a beautiful country with a lot of mountains and rivers.
30+
The people of Japan were very kind and hardworking. They loved their country very much and took care of it. The
31+
country was very peaceful and prosperous. The people lived happily ever after.
32+
TEXT);
33+
34+
assert($response instanceof VectorResponse);
35+
36+
echo 'Dimensions: '.$response->getContent()[0]->getDimensions().\PHP_EOL;
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <[email protected]>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
use Doctrine\DBAL\DriverManager;
13+
use Doctrine\DBAL\Tools\DsnParser;
14+
use PhpLlm\LlmChain\Chain\Chain;
15+
use PhpLlm\LlmChain\Chain\Toolbox\ChainProcessor;
16+
use PhpLlm\LlmChain\Chain\Toolbox\Tool\SimilaritySearch;
17+
use PhpLlm\LlmChain\Chain\Toolbox\Toolbox;
18+
use PhpLlm\LlmChain\Platform\Bridge\Google\Embeddings;
19+
use PhpLlm\LlmChain\Platform\Bridge\Google\Embeddings\TaskType;
20+
use PhpLlm\LlmChain\Platform\Bridge\Google\Gemini;
21+
use PhpLlm\LlmChain\Platform\Bridge\Google\PlatformFactory;
22+
use PhpLlm\LlmChain\Platform\Message\Message;
23+
use PhpLlm\LlmChain\Platform\Message\MessageBag;
24+
use PhpLlm\LlmChain\Store\Bridge\MariaDB\Store;
25+
use PhpLlm\LlmChain\Store\Document\Metadata;
26+
use PhpLlm\LlmChain\Store\Document\TextDocument;
27+
use PhpLlm\LlmChain\Store\Indexer;
28+
use Symfony\Component\Dotenv\Dotenv;
29+
use Symfony\Component\Uid\Uuid;
30+
31+
require_once dirname(__DIR__, 2).'/vendor/autoload.php';
32+
(new Dotenv())->loadEnv(dirname(__DIR__, 2).'/.env');
33+
34+
if (empty($_ENV['GOOGLE_API_KEY']) || empty($_ENV['MARIADB_URI'])) {
35+
echo 'Please set GOOGLE_API_KEY and MARIADB_URI environment variables.'.\PHP_EOL;
36+
exit(1);
37+
}
38+
39+
// initialize the store
40+
$store = Store::fromDbal(
41+
connection: DriverManager::getConnection((new DsnParser())->parse($_ENV['MARIADB_URI'])),
42+
tableName: 'my_table',
43+
indexName: 'my_index',
44+
vectorFieldName: 'embedding',
45+
);
46+
47+
// our data
48+
$movies = [
49+
['title' => 'Inception', 'description' => 'A skilled thief is given a chance at redemption if he can successfully perform inception, the act of planting an idea in someone\'s subconscious.', 'director' => 'Christopher Nolan'],
50+
['title' => 'The Matrix', 'description' => 'A hacker discovers the world he lives in is a simulated reality and joins a rebellion to overthrow its controllers.', 'director' => 'The Wachowskis'],
51+
['title' => 'The Godfather', 'description' => 'The aging patriarch of an organized crime dynasty transfers control of his empire to his reluctant son.', 'director' => 'Francis Ford Coppola'],
52+
];
53+
54+
// create embeddings and documents
55+
foreach ($movies as $i => $movie) {
56+
$documents[] = new TextDocument(
57+
id: Uuid::v4(),
58+
content: 'Title: '.$movie['title'].\PHP_EOL.'Director: '.$movie['director'].\PHP_EOL.'Description: '.$movie['description'],
59+
metadata: new Metadata($movie),
60+
);
61+
}
62+
63+
// initialize the table
64+
$store->initialize(['dimensions' => 768]);
65+
66+
// create embeddings for documents
67+
$platform = PlatformFactory::create($_ENV['GOOGLE_API_KEY']);
68+
$embeddings = new Embeddings(options: ['dimensions' => 768, 'task_type' => TaskType::SemanticSimilarity]);
69+
$indexer = new Indexer($platform, $embeddings, $store);
70+
$indexer->index($documents);
71+
72+
$model = new Gemini(Gemini::GEMINI_2_FLASH_LITE);
73+
74+
$similaritySearch = new SimilaritySearch($platform, $embeddings, $store);
75+
$toolbox = Toolbox::create($similaritySearch);
76+
$processor = new ChainProcessor($toolbox);
77+
$chain = new Chain($platform, $model, [$processor], [$processor]);
78+
79+
$messages = new MessageBag(
80+
Message::forSystem('Please answer all user questions only using SimilaritySearch function.'),
81+
Message::ofUser('Which movie fits the theme of the mafia?')
82+
);
83+
$response = $chain->call($messages);
84+
85+
echo $response->getContent().\PHP_EOL;

src/platform/doc/index.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ usually defined by the specific models and their documentation.
7878
* `Amazon's Nova`_ with `AWS Bedrock`_ as Platform
7979
* `Mistral's Mistral`_ with `Mistral`_ as Platform
8080
* **Embeddings Models**
81+
* `Google's Text Embeddings`_ with `Google`_
8182
* `OpenAI's Text Embeddings`_ with `OpenAI`_ and `Azure`_ as Platform
8283
* `Voyage's Embeddings`_ with `Voyage`_ as Platform
8384
* `Mistral Embed`_ with `Mistral`_ as Platform
@@ -274,6 +275,7 @@ which can be useful to speed up the processing::
274275
.. _`Amazon's Nova`: https://nova.amazon.com
275276
.. _`Mistral's Mistral`: https://www.mistral.ai/
276277
.. _`Mistral`: https://www.mistral.ai/
278+
.. _`Google's Text Embeddings`: https://ai.google.dev/gemini-api/docs/embeddings
277279
.. _`OpenAI's Text Embeddings`: https://platform.openai.com/docs/guides/embeddings/embedding-models
278280
.. _`Voyage's Embeddings`: https://docs.voyageai.com/docs/embeddings
279281
.. _`Voyage`: https://www.voyageai.com/
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <[email protected]>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
namespace Symfony\AI\Platform\Bridge\Google;
13+
14+
use Symfony\AI\Platform\Bridge\Google\Embeddings\TaskType;
15+
use Symfony\AI\Platform\Capability;
16+
use Symfony\AI\Platform\Model;
17+
18+
/**
19+
* @author Valtteri R <[email protected]>
20+
*/
21+
class Embeddings extends Model
22+
{
23+
/** Supported dimensions: 3072, 1536, or 768 */
24+
public const GEMINI_EMBEDDING_EXP_03_07 = 'gemini-embedding-exp-03-07';
25+
/** Fixed 768 dimensions */
26+
public const TEXT_EMBEDDING_004 = 'text-embedding-004';
27+
/** Fixed 768 dimensions */
28+
public const EMBEDDING_001 = 'embedding-001';
29+
30+
/**
31+
* @param array{dimensions?: int, task_type?: TaskType|string} $options
32+
*/
33+
public function __construct(string $name = self::GEMINI_EMBEDDING_EXP_03_07, array $options = [])
34+
{
35+
parent::__construct($name, [Capability::INPUT_MULTIPLE], $options);
36+
}
37+
}
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <[email protected]>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
namespace Symfony\AI\Platform\Bridge\Google\Embeddings;
13+
14+
use Symfony\AI\Platform\Bridge\Google\Embeddings;
15+
use Symfony\AI\Platform\Exception\RuntimeException;
16+
use Symfony\AI\Platform\Model;
17+
use Symfony\AI\Platform\ModelClientInterface;
18+
use Symfony\AI\Platform\Response\VectorResponse;
19+
use Symfony\AI\Platform\ResponseConverterInterface;
20+
use Symfony\AI\Platform\Vector\Vector;
21+
use Symfony\Contracts\HttpClient\HttpClientInterface;
22+
use Symfony\Contracts\HttpClient\ResponseInterface;
23+
24+
/**
25+
* @author Valtteri R <[email protected]>
26+
*/
27+
final readonly class ModelClient implements ModelClientInterface, ResponseConverterInterface
28+
{
29+
public function __construct(
30+
private HttpClientInterface $httpClient,
31+
#[\SensitiveParameter]
32+
private string $apiKey,
33+
) {
34+
}
35+
36+
public function supports(Model $model): bool
37+
{
38+
return $model instanceof Embeddings;
39+
}
40+
41+
public function request(Model $model, array|string $payload, array $options = []): ResponseInterface
42+
{
43+
$url = \sprintf('https://generativelanguage.googleapis.com/v1beta/models/%s:%s', $model->getName(), 'batchEmbedContents');
44+
$modelOptions = $model->getOptions();
45+
46+
return $this->httpClient->request('POST', $url, [
47+
'headers' => [
48+
'x-goog-api-key' => $this->apiKey,
49+
],
50+
'json' => [
51+
'requests' => array_map(
52+
static fn (string $text) => array_filter([
53+
'model' => 'models/'.$model->getName(),
54+
'content' => ['parts' => [['text' => $text]]],
55+
'outputDimensionality' => $modelOptions['dimensions'] ?? null,
56+
'taskType' => $modelOptions['task_type'] ?? null,
57+
'title' => $options['title'] ?? null,
58+
]),
59+
\is_array($payload) ? $payload : [$payload],
60+
),
61+
],
62+
]);
63+
}
64+
65+
public function convert(ResponseInterface $response, array $options = []): VectorResponse
66+
{
67+
$data = $response->toArray();
68+
69+
if (!isset($data['embeddings'])) {
70+
throw new RuntimeException('Response does not contain data');
71+
}
72+
73+
return new VectorResponse(
74+
...array_map(
75+
static fn (array $item): Vector => new Vector($item['values']),
76+
$data['embeddings'],
77+
),
78+
);
79+
}
80+
}
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <[email protected]>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
namespace Symfony\AI\Platform\Bridge\Google\Embeddings;
13+
14+
enum TaskType: string
15+
{
16+
/** Unset value, which will default to one of the other enum values. */
17+
public const TaskTypeUnspecified = 'TASK_TYPE_UNSPECIFIED';
18+
/** Specifies the given text is a query in a search/retrieval setting. */
19+
public const RetrievalQuery = 'RETRIEVAL_QUERY';
20+
/** Specifies the given text is a document from the corpus being searched. */
21+
public const RetrievalDocument = 'RETRIEVAL_DOCUMENT';
22+
/** Specifies the given text will be used for STS. */
23+
public const SemanticSimilarity = 'SEMANTIC_SIMILARITY';
24+
/** Specifies that the given text will be classified. */
25+
public const Classification = 'CLASSIFICATION';
26+
/** Specifies that the embeddings will be used for clustering. */
27+
public const Clustering = 'CLUSTERING';
28+
/** Specifies that the given text will be used for question answering. */
29+
public const QuestionAnswering = 'QUESTION_ANSWERING';
30+
/** Specifies that the given text will be used for fact verification. */
31+
public const FactVerification = 'FACT_VERIFICATION';
32+
/** Specifies that the given text will be used for code retrieval. */
33+
public const CodeRetrievalQuery = 'CODE_RETRIEVAL_QUERY';
34+
}

src/platform/src/Bridge/Google/PlatformFactory.php

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
use Symfony\AI\Platform\Bridge\Google\Contract\ToolCallMessageNormalizer;
1717
use Symfony\AI\Platform\Bridge\Google\Contract\ToolNormalizer;
1818
use Symfony\AI\Platform\Bridge\Google\Contract\UserMessageNormalizer;
19+
use Symfony\AI\Platform\Bridge\Google\Embeddings\ModelClient;
1920
use Symfony\AI\Platform\Contract;
2021
use Symfony\AI\Platform\Platform;
2122
use Symfony\Component\HttpClient\EventSourceHttpClient;
@@ -33,8 +34,9 @@ public static function create(
3334
): Platform {
3435
$httpClient = $httpClient instanceof EventSourceHttpClient ? $httpClient : new EventSourceHttpClient($httpClient);
3536
$responseHandler = new ModelHandler($httpClient, $apiKey);
37+
$embeddings = new ModelClient($httpClient, $apiKey);
3638

37-
return new Platform([$responseHandler], [$responseHandler], Contract::create(
39+
return new Platform([$responseHandler, $embeddings], [$responseHandler, $embeddings], Contract::create(
3840
new AssistantMessageNormalizer(),
3941
new MessageBagNormalizer(),
4042
new ToolNormalizer(),

src/platform/src/ModelClientInterface.php

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@ interface ModelClientInterface
2121
public function supports(Model $model): bool;
2222

2323
/**
24-
* @param array<string, mixed> $payload
25-
* @param array<string, mixed> $options
24+
* @param array<string|int, mixed> $payload
25+
* @param array<string, mixed> $options
2626
*/
2727
public function request(Model $model, array|string $payload, array $options = []): ResponseInterface;
2828
}

0 commit comments

Comments
 (0)