Skip to content

Commit 66731d0

Browse files
valtzuchr-hertel
authored andcommitted
feat: add MariaDB store (#342)
Related to #28
1 parent 3e0e872 commit 66731d0

File tree

6 files changed

+264
-0
lines changed

6 files changed

+264
-0
lines changed

compose.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
services:
2+
mariadb:
3+
image: mariadb:11.7
4+
environment:
5+
MARIADB_ALLOW_EMPTY_ROOT_PASSWORD: 1
6+
MARIADB_DATABASE: my_database
7+
ports:
8+
- "3309:3306"

examples/.env

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,3 +64,6 @@ RUN_EXPENSIVE_EXAMPLES=false
6464

6565
# For using Gemini
6666
GOOGLE_API_KEY=
67+
68+
# For MariaDB store. Server defined in compose.yaml
69+
MARIADB_URI=pdo-mysql://[email protected]:3309/my_database

examples/composer.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,10 @@
44
"type": "project",
55
"license": "MIT",
66
"require": {
7+
"ext-pdo": "*",
78
"async-aws/bedrock-runtime": "^1.1",
89
"codewithkyrian/transformers": "^0.5.3",
10+
"doctrine/dbal": "^3.3|^4.0",
911
"probots-io/pinecone-php": "^1.1",
1012
"symfony/ai-agent": "@dev",
1113
"symfony/ai-platform": "@dev",
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <[email protected]>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
use Doctrine\DBAL\DriverManager;
13+
use Doctrine\DBAL\Tools\DsnParser;
14+
use Symfony\AI\Agent\Toolbox\Tool\SimilaritySearch;
15+
use Symfony\AI\Agent\Toolbox\Toolbox;
16+
use Symfony\AI\Platform\Bridge\OpenAI\Embeddings;
17+
use Symfony\AI\Platform\Bridge\OpenAI\GPT;
18+
use Symfony\AI\Platform\Bridge\OpenAI\PlatformFactory;
19+
use Symfony\AI\Platform\Message\Message;
20+
use Symfony\AI\Platform\Message\MessageBag;
21+
use Symfony\AI\Store\Bridge\MariaDB\Store;
22+
use Symfony\AI\Store\Document\Metadata;
23+
use Symfony\AI\Store\Document\TextDocument;
24+
use Symfony\AI\Store\Indexer;
25+
use Symfony\Component\Dotenv\Dotenv;
26+
use Symfony\Component\Uid\Uuid;
27+
28+
require_once dirname(__DIR__, 2).'/vendor/autoload.php';
29+
(new Dotenv())->loadEnv(dirname(__DIR__, 2).'/.env');
30+
31+
if (empty($_ENV['OPENAI_API_KEY']) || empty($_ENV['MARIADB_URI'])) {
32+
echo 'Please set OPENAI_API_KEY and MARIADB_URI environment variables.'.\PHP_EOL;
33+
exit(1);
34+
}
35+
36+
// initialize the store
37+
$store = Store::fromDbal(
38+
connection: DriverManager::getConnection((new DsnParser())->parse($_ENV['MARIADB_URI'])),
39+
tableName: 'my_table',
40+
indexName: 'my_index',
41+
vectorFieldName: 'embedding',
42+
);
43+
44+
// our data
45+
$movies = [
46+
['title' => 'Inception', 'description' => 'A skilled thief is given a chance at redemption if he can successfully perform inception, the act of planting an idea in someone\'s subconscious.', 'director' => 'Christopher Nolan'],
47+
['title' => 'The Matrix', 'description' => 'A hacker discovers the world he lives in is a simulated reality and joins a rebellion to overthrow its controllers.', 'director' => 'The Wachowskis'],
48+
['title' => 'The Godfather', 'description' => 'The aging patriarch of an organized crime dynasty transfers control of his empire to his reluctant son.', 'director' => 'Francis Ford Coppola'],
49+
];
50+
51+
// create embeddings and documents
52+
foreach ($movies as $i => $movie) {
53+
$documents[] = new TextDocument(
54+
id: Uuid::v4(),
55+
content: 'Title: '.$movie['title'].\PHP_EOL.'Director: '.$movie['director'].\PHP_EOL.'Description: '.$movie['description'],
56+
metadata: new Metadata($movie),
57+
);
58+
}
59+
60+
// initialize the table
61+
$store->initialize();
62+
63+
// create embeddings for documents
64+
$platform = PlatformFactory::create($_ENV['OPENAI_API_KEY']);
65+
$indexer = new Indexer($platform, $embeddings = new Embeddings(), $store);
66+
$indexer->index($documents);
67+
68+
$model = new GPT(GPT::GPT_4O_MINI);
69+
70+
$similaritySearch = new SimilaritySearch($platform, $embeddings, $store);
71+
$toolbox = Toolbox::create($similaritySearch);
72+
$processor = new ChainProcessor($toolbox);
73+
$chain = new Chain($platform, $model, [$processor], [$processor]);
74+
75+
$messages = new MessageBag(
76+
Message::forSystem('Please answer all user questions only using SimilaritySearch function.'),
77+
Message::ofUser('Which movie fits the theme of the mafia?')
78+
);
79+
$response = $chain->call($messages);
80+
81+
echo $response->getContent().\PHP_EOL;

src/store/composer.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,15 +30,19 @@
3030
"webmozart/assert": "^1.11"
3131
},
3232
"require-dev": {
33+
"ext-pdo": "*",
3334
"codewithkyrian/chromadb-php": "^0.2.1 || ^0.3 || ^0.4",
35+
"doctrine/dbal": "^3.3 || ^4.0",
3436
"mongodb/mongodb": "^1.21 || ^2.0",
3537
"phpstan/phpstan": "^2.0",
3638
"phpstan/phpstan-webmozart-assert": "^2.0",
3739
"phpunit/phpunit": "^11.5",
3840
"probots-io/pinecone-php": "^1.0"
3941
},
4042
"suggest": {
43+
"ext-pdo": "For using MariaDB as retrieval vector store.",
4144
"codewithkyrian/chromadb-php": "For using the ChromaDB as retrieval vector store.",
45+
"doctrine/dbal": "For using MariaDB via Doctrine as retrieval vector store",
4246
"mongodb/mongodb": "For using MongoDB Atlas as retrieval vector store.",
4347
"probots-io/pinecone-php": "For using the Pinecone as retrieval vector store."
4448
},
Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <[email protected]>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
namespace Symfony\AI\Store\Bridge\MariaDB;
13+
14+
use Doctrine\DBAL\Connection;
15+
use Doctrine\DBAL\Exception as DBALException;
16+
use Symfony\AI\Platform\Vector\Vector;
17+
use Symfony\AI\Store\Document\Metadata;
18+
use Symfony\AI\Store\Document\VectorDocument;
19+
use Symfony\AI\Store\Exception\InvalidArgumentException;
20+
use Symfony\AI\Store\InitializableStoreInterface;
21+
use Symfony\AI\Store\VectorStoreInterface;
22+
use Symfony\Component\Uid\Uuid;
23+
24+
/**
25+
* Requires MariaDB >=11.7.
26+
*
27+
* @see https://mariadb.org/rag-with-mariadb-vector/
28+
*
29+
* @author Valtteri R <[email protected]>
30+
*/
31+
final readonly class Store implements VectorStoreInterface, InitializableStoreInterface
32+
{
33+
/**
34+
* @param string $tableName The name of the table
35+
* @param string $indexName The name of the vector search index
36+
* @param string $vectorFieldName The name of the field in the index that contains the vector
37+
*/
38+
public function __construct(
39+
private \PDO $connection,
40+
private string $tableName,
41+
private string $indexName,
42+
private string $vectorFieldName,
43+
) {
44+
}
45+
46+
public static function fromPdo(\PDO $connection, string $tableName, string $indexName = 'embedding', string $vectorFieldName = 'embedding'): self
47+
{
48+
return new self($connection, $tableName, $indexName, $vectorFieldName);
49+
}
50+
51+
/**
52+
* @throws DBALException
53+
*/
54+
public static function fromDbal(Connection $connection, string $tableName, string $indexName = 'embedding', string $vectorFieldName = 'embedding'): self
55+
{
56+
$pdo = $connection->getNativeConnection();
57+
58+
if (!$pdo instanceof \PDO) {
59+
throw new InvalidArgumentException('Only DBAL connections using PDO driver are supported.');
60+
}
61+
62+
return self::fromPdo($pdo, $tableName, $indexName, $vectorFieldName);
63+
}
64+
65+
public function add(VectorDocument ...$documents): void
66+
{
67+
$statement = $this->connection->prepare(
68+
\sprintf(
69+
<<<'SQL'
70+
INSERT INTO %1$s (id, metadata, %2$s)
71+
VALUES (:id, :metadata, VEC_FromText(:vector))
72+
ON DUPLICATE KEY UPDATE metadata = :metadata, %2$s = VEC_FromText(:vector)
73+
SQL,
74+
$this->tableName,
75+
$this->vectorFieldName,
76+
),
77+
);
78+
79+
foreach ($documents as $document) {
80+
$operation = [
81+
'id' => $document->id->toBinary(),
82+
'metadata' => json_encode($document->metadata->getArrayCopy()),
83+
'vector' => json_encode($document->vector->getData()),
84+
];
85+
86+
$statement->execute($operation);
87+
}
88+
}
89+
90+
/**
91+
* @param array{
92+
* limit?: positive-int,
93+
* } $options
94+
*/
95+
public function query(Vector $vector, array $options = [], ?float $minScore = null): array
96+
{
97+
$statement = $this->connection->prepare(
98+
\sprintf(
99+
<<<'SQL'
100+
SELECT id, VEC_ToText(%1$s) embedding, metadata, VEC_DISTANCE_EUCLIDEAN(%1$s, VEC_FromText(:embedding)) AS score
101+
FROM %2$s
102+
%3$s
103+
ORDER BY score ASC
104+
LIMIT %4$d
105+
SQL,
106+
$this->vectorFieldName,
107+
$this->tableName,
108+
null !== $minScore ? 'WHERE VEC_DISTANCE_EUCLIDEAN(%1$s, VEC_FromText(:embedding)) >= :minScore' : '',
109+
$options['limit'] ?? 5,
110+
),
111+
);
112+
113+
$params = ['embedding' => json_encode($vector->getData())];
114+
115+
if (null !== $minScore) {
116+
$params['minScore'] = $minScore;
117+
}
118+
119+
$documents = [];
120+
121+
$statement->execute($params);
122+
123+
foreach ($statement->fetchAll(\PDO::FETCH_ASSOC) as $result) {
124+
$documents[] = new VectorDocument(
125+
id: Uuid::fromBinary($result['id']),
126+
vector: new Vector(json_decode((string) $result['embedding'], true)),
127+
metadata: new Metadata(json_decode($result['metadata'] ?? '{}', true)),
128+
score: $result['score'],
129+
);
130+
}
131+
132+
return $documents;
133+
}
134+
135+
/**
136+
* @param array{} $options
137+
*/
138+
public function initialize(array $options = []): void
139+
{
140+
if ([] !== $options) {
141+
throw new InvalidArgumentException('No supported options');
142+
}
143+
144+
$serverVersion = $this->connection->getAttribute(\PDO::ATTR_SERVER_VERSION);
145+
146+
if (!str_contains((string) $serverVersion, 'MariaDB') || version_compare($serverVersion, '11.7.0') < 0) {
147+
throw new InvalidArgumentException('You need MariaDB >=11.7 to use this feature');
148+
}
149+
150+
$this->connection->exec(
151+
\sprintf(
152+
<<<'SQL'
153+
CREATE TABLE IF NOT EXISTS %1$s (
154+
id BINARY(16) NOT NULL PRIMARY KEY,
155+
metadata JSON,
156+
%2$s VECTOR(1536) NOT NULL,
157+
VECTOR INDEX %3$s (%2$s)
158+
)
159+
SQL,
160+
$this->tableName,
161+
$this->vectorFieldName,
162+
$this->indexName,
163+
),
164+
);
165+
}
166+
}

0 commit comments

Comments
 (0)