Skip to content

Commit 97a4373

Browse files
committed
feat: pgvector
1 parent 937c515 commit 97a4373

File tree

13 files changed

+293
-28
lines changed

13 files changed

+293
-28
lines changed

.github/workflows/phpunit.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ jobs:
4949

5050
services:
5151
postgres:
52-
image: postgres
52+
image: ankane/pgvector
5353
env:
5454
POSTGRES_PASSWORD: postgres
5555
ports:

README.md

Lines changed: 57 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@ composer require tpetry/laravel-postgresql-enhanced
4141
- [Compression](#compression)
4242
- [Initial](#initial)
4343
- [Column Types](#column-types)
44+
- [Arrays](#arrays)
45+
- [Ranges](#ranges)
4446
- [Bit Strings](#bit-strings)
4547
- [Case Insensitive Text](#case-insensitive-text)
4648
- [Full Text Search](#full-text-search)
@@ -49,8 +51,7 @@ composer require tpetry/laravel-postgresql-enhanced
4951
- [IP Networks](#ip-networks)
5052
- [International Product Numbers](#international-product-numbers)
5153
- [Label Tree](#label-tree)
52-
- [Ranges](#ranges)
53-
- [Arrays](#arrays)
54+
- [Vector](#vector)
5455
- [XML](#xml)
5556
- [Query](#query)
5657
- [Explain](#explain)
@@ -60,6 +61,7 @@ composer require tpetry/laravel-postgresql-enhanced
6061
- [Common Table Expressions (CTE)](#common-table-expressions-cte)
6162
- [Lazy By Cursor](#lazy-by-cursor)
6263
- [Where Clauses](#where-clauses)
64+
- [Order By](#order-by)
6365
- [Eloquent](#eloquent)
6466
- [Casts](#casts)
6567
- [Refresh Data on Save](#refresh-data-on-save)
@@ -644,6 +646,36 @@ Schema::table('users', function (Blueprint $table): void {
644646

645647
### Column Types
646648

649+
#### Arrays
650+
The array data types store multiple values in one single column. They can be used e.g. to store multiple tag ids of categories a product belongs to.
651+
```php
652+
// @see https://www.postgresql.org/docs/current/arrays.html
653+
$table->integerArray(string $column);
654+
```
655+
656+
> **Note**
657+
> While PostgreSQL array types are powerful, only the integer array is supported.
658+
> It is the sole array type with additional PostgreSQL enhancements for manipulation and querying compared to JSON columns.
659+
> The [intarray](https://www.postgresql.org/docs/current/intarray.html) extensions provides extensive features that can be used to e.g. [store and query tags](https://tapoueh.org/blog/2013/10/denormalizing-tags/) with advanced boolean logic.
660+
661+
#### Ranges
662+
The range data types store a range of values with optional start and end values. They can be used e.g. to describe the duration a meeting room is booked.
663+
```php
664+
// @see https://www.postgresql.org/docs/current/rangetypes.html
665+
$table->bigIntegerRange(string $column);
666+
$table->bigIntegerMultiRange(string $column);
667+
$table->dateRange(string $column);
668+
$table->dateMultiRange(string $column);
669+
$table->decimalRange(string $column);
670+
$table->decimalMultiRange(string $column);
671+
$table->integerRange(string $column);
672+
$table->integerMultiRange(string $column);
673+
$table->timestampRange(string $column);
674+
$table->timestampMultiRange(string $column);
675+
$table->timestampTzRange(string $column);
676+
$table->timestampTzMultiRange(string $column);
677+
```
678+
647679
#### Bit Strings
648680
The bit string data types store strings of 0s and 1s. They can be used to e.g. store bitmaps.
649681
```php
@@ -721,35 +753,19 @@ $table->labelTree(string $column);
721753
> **Note**
722754
> You need to enable the `ltree` extension with `Schema::createExtensionIfNotExists('ltree')` or `Schema::createExtension('ltree')` before.
723755
724-
#### Ranges
725-
The range data types store a range of values with optional start and end values. They can be used e.g. to describe the duration a meeting room is booked.
756+
#### Vector
757+
The vector type can be used to store and search for embeddings created by AI providers like OpenAI.
726758
```php
727-
// @see https://www.postgresql.org/docs/current/rangetypes.html
728-
$table->bigIntegerRange(string $column);
729-
$table->bigIntegerMultiRange(string $column);
730-
$table->dateRange(string $column);
731-
$table->dateMultiRange(string $column);
732-
$table->decimalRange(string $column);
733-
$table->decimalMultiRange(string $column);
734-
$table->integerRange(string $column);
735-
$table->integerMultiRange(string $column);
736-
$table->timestampRange(string $column);
737-
$table->timestampMultiRange(string $column);
738-
$table->timestampTzRange(string $column);
739-
$table->timestampTzMultiRange(string $column);
759+
// @see https://github.com/pgvector/pgvector
760+
$table->xml(string $column, int $dimensions = 1536);
740761
```
741762

742-
#### Arrays
743-
The array data types store multiple values in one single column. They can be used e.g. to store multiple tag ids of categories a product belongs to.
744-
```php
745-
// @see https://www.postgresql.org/docs/current/arrays.html
746-
$table->integerArray(string $column);
747-
```
763+
> **Note**
764+
> You need to enable the `vector` extension with `Schema::createExtensionIfNotExists('vector')` or `Schema::createExtension('vector')` before.
748765
749-
> **Note**
750-
> While PostgreSQL array types are powerful, only the integer array is supported.
751-
> It is the sole array type with additional PostgreSQL enhancements for manipulation and querying compared to JSON columns.
752-
> The [intarray](https://www.postgresql.org/docs/current/intarray.html) extensions provides extensive features that can be used to e.g. [store and query tags](https://tapoueh.org/blog/2013/10/denormalizing-tags/) with advanced boolean logic.
766+
> **Note**
767+
> The `vector` extension is not a standard PostgreSQL extension but available with most PostgreSQL cloud services.
768+
> You can check for support with the following query: `SELECT * FROM pg_available_extensions WHERE name = 'vector'`
753769
754770
#### XML
755771
The xml data type can be used to store an xml document.
@@ -1059,6 +1075,19 @@ $query->orWhereIntegerArrayMatches($column, string $query);
10591075
$query->whereIntegerArrayMatches('tags', '3&4&(5|6)&!7');
10601076
```
10611077

1078+
### Order By
1079+
1080+
#### Vector Similarity
1081+
1082+
With the `orderByVectorSimilarity` method you can compare a column storing embeddings to other embeddings.
1083+
1084+
```php
1085+
$query->orderByVectorSimilarity($column, $vector, string $distance = 'cosine'|'l2');
1086+
1087+
// The five rows with the highest similarity to the provided embeddings.
1088+
$query->orderByVectorSimilarity('embeddings', [0.9569, 0.1113, 0.0107])->limit(5);
1089+
```
1090+
10621091
## Eloquent
10631092

10641093
### Casts
@@ -1069,6 +1098,7 @@ To make those types usable, these casts can be used with your eloquent models:
10691098
| Type | Cast |
10701099
|----------------|-------------------------------------------------------------|
10711100
| `integerArray` | `Tpetry\PostgresqlEnhanced\Eloquent\Casts\IntegerArrayCast` |
1101+
| `vector` | `Tpetry\PostgresqlEnhanced\Eloquent\Casts\VectorArray` |
10721102

10731103
### Refresh Data on Save
10741104

src/Eloquent/Casts/VectorArray.php

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace Tpetry\PostgresqlEnhanced\Eloquent\Casts;
6+
7+
use Illuminate\Contracts\Database\Eloquent\CastsAttributes;
8+
9+
class VectorArray implements CastsAttributes
10+
{
11+
/**
12+
* Transform the attribute from the underlying model values.
13+
*
14+
* @param \Illuminate\Database\Eloquent\Model $model
15+
* @param ?string $value
16+
*
17+
* @return ?array<int, float>
18+
*/
19+
public function get($model, string $key, mixed $value, array $attributes): ?array
20+
{
21+
if (null === $value) {
22+
return null;
23+
}
24+
25+
return json_decode($value, flags: \JSON_THROW_ON_ERROR);
26+
}
27+
28+
/**
29+
* Transform the attribute to its underlying model values.
30+
*
31+
* @param \Illuminate\Database\Eloquent\Model $model
32+
* @param array<int, float>|\Illuminate\Support\Collection<int, float>|null $value
33+
*/
34+
public function set($model, string $key, mixed $value, array $attributes): ?string
35+
{
36+
if (null === $value) {
37+
return null;
38+
}
39+
40+
return json_encode($value, flags: \JSON_THROW_ON_ERROR);
41+
}
42+
}

src/PostgresqlEnhancedServiceProvider.php

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
use Tpetry\PostgresqlEnhanced\Types\TsvectorType;
4444
use Tpetry\PostgresqlEnhanced\Types\UpcType;
4545
use Tpetry\PostgresqlEnhanced\Types\VarbitType;
46+
use Tpetry\PostgresqlEnhanced\Types\VectorType;
4647
use Tpetry\PostgresqlEnhanced\Types\XmlType;
4748

4849
class PostgresqlEnhancedServiceProvider extends ServiceProvider
@@ -76,6 +77,7 @@ class PostgresqlEnhancedServiceProvider extends ServiceProvider
7677
TsvectorType::class,
7778
UpcType::class,
7879
VarbitType::class,
80+
VectorType::class,
7981
XmlType::class,
8082
];
8183

src/Query/Builder.php

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ class Builder extends BaseBuilder
1717
use BuilderExplain;
1818
use BuilderLateralJoin;
1919
use BuilderLazyByCursor;
20+
use BuilderOrder;
2021
use BuilderReturning;
2122
use BuilderWhere;
2223

src/Query/BuilderOrder.php

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace Tpetry\PostgresqlEnhanced\Query;
6+
7+
use Illuminate\Contracts\Support\Arrayable;
8+
use Illuminate\Database\Query\Expression;
9+
use InvalidArgumentException;
10+
11+
trait BuilderOrder
12+
{
13+
/**
14+
* Add a vector-similarity "order by" clause to the query.
15+
*
16+
* @param \Illuminate\Database\Query\Expression|string $column
17+
* @param array<int, float>|\Illuminate\Support\Collection<int, float> $vector
18+
*/
19+
public function orderByVectorSimilarity($column, $vector, string $distance = 'cosine'): static
20+
{
21+
$operator = match ($distance) {
22+
'cosine' => '<=>',
23+
'l2' => '<->',
24+
default => throw new InvalidArgumentException("Unknown distance function '{$distance}'."),
25+
};
26+
$column = new Expression("({$this->getGrammar()->wrap($column)} {$operator} ?)");
27+
28+
if ($vector instanceof Arrayable) {
29+
$vector = $vector->toArray();
30+
}
31+
$this->addBinding(json_encode($vector, flags: \JSON_THROW_ON_ERROR), $this->unions ? 'unionOrder' : 'order');
32+
33+
$this->{$this->unions ? 'unionOrders' : 'orders'}[] = [
34+
'column' => $column,
35+
'direction' => 'asc',
36+
];
37+
38+
return $this;
39+
}
40+
}

src/Query/Grammar.php

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ class Grammar extends PostgresGrammar
2727
'~', '&', '|', '#', '<<', '>>', '<<=', '>>=',
2828
'&&', '@>', '<@', '?', '?|', '?&', '||', '-', '@?', '@@', '#-',
2929
'is distinct from', 'is not distinct from',
30+
'<->', '<=>', '<#>',
3031
];
3132

3233
/**

src/Schema/BlueprintTypes.php

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,14 @@ public function varbit(string $column, ?int $length = null): ColumnDefinition
248248
return $this->addColumn('varbit', $column, compact('length'));
249249
}
250250

251+
/**
252+
* Create a new vector column on the table.
253+
*/
254+
public function vector(string $column, int $dimensions = 1536): ColumnDefinition
255+
{
256+
return $this->addColumn('vector', $column, compact('dimensions'));
257+
}
258+
251259
/**
252260
* Create a new xml column on the table.
253261
*/

src/Schema/Grammars/GrammarTypes.php

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,14 @@ protected function typeVarbit(Fluent $column): string
275275
};
276276
}
277277

278+
/**
279+
* Create the column definition for a vector type.
280+
*/
281+
protected function typeVector(Fluent $column): string
282+
{
283+
return "vector({$column['dimensions']})";
284+
}
285+
278286
/**
279287
* Create the column definition for a xml type.
280288
*/

src/Types/VectorType.php

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace Tpetry\PostgresqlEnhanced\Types;
6+
7+
use Doctrine\DBAL\Platforms\AbstractPlatform;
8+
use Doctrine\DBAL\Types\StringType;
9+
10+
class VectorType extends StringType
11+
{
12+
/**
13+
* Gets an array of database types that map to this Doctrine type.
14+
*/
15+
public function getMappedDatabaseTypes(AbstractPlatform $platform)
16+
{
17+
return match ($platform->getName()) {
18+
'pgsql', 'postgres', 'postgresql' => [$this->getName()],
19+
default => [],
20+
};
21+
}
22+
23+
/**
24+
* Gets the name of this type.
25+
*/
26+
public function getName()
27+
{
28+
return 'vector';
29+
}
30+
31+
/**
32+
* Gets the SQL declaration snippet for a column of this type.
33+
*/
34+
public function getSQLDeclaration(array $column, AbstractPlatform $platform)
35+
{
36+
return match (isset($column['dimensions'])) {
37+
true => "vector({$column['dimensions']})",
38+
false => 'vector',
39+
};
40+
}
41+
}

0 commit comments

Comments
 (0)