From 26035bb97bc1e3b1d2ead9d89d9139de1f314cc8 Mon Sep 17 00:00:00 2001 From: Mitchell Adair Date: Thu, 6 Mar 2025 15:35:46 -0500 Subject: [PATCH 1/5] add vector column/index doc, update singlestore doc to rm mysql-core --- src/content/docs/column-types/singlestore.mdx | 56 +++++++++++++++++++ src/content/docs/indexes-constraints.mdx | 27 ++++++++- 2 files changed, 82 insertions(+), 1 deletion(-) diff --git a/src/content/docs/column-types/singlestore.mdx b/src/content/docs/column-types/singlestore.mdx index 766e3f84..b100709f 100644 --- a/src/content/docs/column-types/singlestore.mdx +++ b/src/content/docs/column-types/singlestore.mdx @@ -560,6 +560,62 @@ CREATE TABLE `table` ( ## --- +### vector + +
+```typescript +import { singlestoreTable, vector } from "drizzle-orm/singlestore-core"; + +const table = singlestoreTable('table', { + embedding: vector("embedding", { dimensions: 10 }), +}); +``` + +```sql +CREATE TABLE `table` ( + `embedding` vector(10) +); +``` +
+ +You can specify `elementType` in order to change what element type the vector +consists of. This can be one of `I8`, `I16`, `I32`, `I64`, `F32`, or `F64` + +
+```typescript +import { singlestoreTable, vector } from "drizzle-orm/singlestore-core"; + +const table = singlestoreTable('table', { + embedding: vector("embedding", { dimensions: 10, elementType: 'I8' }), +}); +``` + +```sql +CREATE TABLE `table` ( + `embedding` vector(10, 'I8') +); +``` +
+ +###### Helper functions + +There are two helper functions useful for vector search queries in SingleStore: + +
+```typescript +import { dotProduct, euclideanDistance } from 'drizzle-orm/singlestore-core/expressions'; + +euclideanDistance(table.column, [3, 1, 2]); +dotProduct(table.column, [3, 1, 2]); +``` +```sql +table.column <-> '[3, 1, 2]' +table.column <*> '[3, 1, 2]` +``` +
+ +## --- + ### Customizing data type Every column builder has a `.$type()` method, which allows you to customize the data type of the column. This is useful, for example, with unknown or branded types. diff --git a/src/content/docs/indexes-constraints.mdx b/src/content/docs/indexes-constraints.mdx index ac0db753..c073d10b 100644 --- a/src/content/docs/indexes-constraints.mdx +++ b/src/content/docs/indexes-constraints.mdx @@ -1118,7 +1118,7 @@ index('name')
- ```typescript copy {9-10} + ```typescript copy {8-9} import { int, text, index, uniqueIndex, singlestoreTable } from "drizzle-orm/singlestore-core"; export const user = singlestoreTable("user", { @@ -1139,5 +1139,30 @@ index('name') CREATE UNIQUE INDEX `email_idx` ON `user` (`email`); ```
+ + SingleStore also supports indexes on vector columns: + +
+ ```typescript copy {8-9} + import { int, singlestoreTable, vector, vectorIndex } from "drizzle-orm/singlestore-core"; + + export const embeddings = singlestoreTable("embeddings", { + id: int("id").primaryKey().autoincrement(), + embedding: vector("embedding", { dimensions: 10 }).notNull(), + embedding2: vector("embedding2", { dimensions: 10 }).notNull(), + }, (table) => [ + vectorIndex("vIdx1").on(table.embedding), + vectorIndex("vIdx2", "IVF_PQ").on(table.embedding2).metricType("EUCLIDEAN_DISTANCE").nbits(16), + ]); + ``` + ```sql {5-6} + CREATE TABLE `embeddings` ( + ... + ); + + ALTER TABLE `embeddings` ADD VECTOR INDEX `vIdx1` (`embedding`) INDEX_OPTIONS '{"index_type":"AUTO"}'; + ALTER TABLE `embeddings` ADD VECTOR INDEX `vIdx2` (`embedding2`) INDEX_OPTIONS '{"index_type":"IVF_PQ","metric_type":"EUCLIDEAN_DISTANCE","nbits":16}'; + ``` +
From 1cb16c5f3b2e586255f17ae20598a53e767fa989 Mon Sep 17 00:00:00 2001 From: Mitchell Adair Date: Thu, 6 Mar 2025 15:46:26 -0500 Subject: [PATCH 2/5] quote consistency --- src/content/docs/column-types/singlestore.mdx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/content/docs/column-types/singlestore.mdx b/src/content/docs/column-types/singlestore.mdx index b100709f..1543b003 100644 --- a/src/content/docs/column-types/singlestore.mdx +++ b/src/content/docs/column-types/singlestore.mdx @@ -567,7 +567,7 @@ CREATE TABLE `table` ( import { singlestoreTable, vector } from "drizzle-orm/singlestore-core"; const table = singlestoreTable('table', { - embedding: vector("embedding", { dimensions: 10 }), + embedding: vector('embedding', { dimensions: 10 }), }); ``` @@ -586,7 +586,7 @@ consists of. This can be one of `I8`, `I16`, `I32`, `I64`, `F32`, or `F64` import { singlestoreTable, vector } from "drizzle-orm/singlestore-core"; const table = singlestoreTable('table', { - embedding: vector("embedding", { dimensions: 10, elementType: 'I8' }), + embedding: vector('embedding', { dimensions: 10, elementType: 'I8' }), }); ``` From 883c67a8b67ced753504e083c9726cb5f76aa346 Mon Sep 17 00:00:00 2001 From: Mitchell Adair Date: Thu, 6 Mar 2025 16:55:17 -0500 Subject: [PATCH 3/5] fix sql output for default entityType --- src/content/docs/column-types/singlestore.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/content/docs/column-types/singlestore.mdx b/src/content/docs/column-types/singlestore.mdx index 1543b003..fa5204d6 100644 --- a/src/content/docs/column-types/singlestore.mdx +++ b/src/content/docs/column-types/singlestore.mdx @@ -573,7 +573,7 @@ const table = singlestoreTable('table', { ```sql CREATE TABLE `table` ( - `embedding` vector(10) + `embedding` vector(10, 'F32') ); ``` From b33b5a59d1a31cdf010646590201a76562fa9766 Mon Sep 17 00:00:00 2001 From: Mitchell Adair Date: Mon, 10 Mar 2025 16:01:05 -0400 Subject: [PATCH 4/5] fix sql output for vector col --- src/content/docs/column-types/singlestore.mdx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/content/docs/column-types/singlestore.mdx b/src/content/docs/column-types/singlestore.mdx index fa5204d6..20093e46 100644 --- a/src/content/docs/column-types/singlestore.mdx +++ b/src/content/docs/column-types/singlestore.mdx @@ -573,7 +573,7 @@ const table = singlestoreTable('table', { ```sql CREATE TABLE `table` ( - `embedding` vector(10, 'F32') + `embedding` vector(10, F32) ); ``` @@ -592,7 +592,7 @@ const table = singlestoreTable('table', { ```sql CREATE TABLE `table` ( - `embedding` vector(10, 'I8') + `embedding` vector(10, I8) ); ``` From 602762f1ca23f404efb954a2a3709c60f8a5dda6 Mon Sep 17 00:00:00 2001 From: Mitchell Adair Date: Thu, 27 Mar 2025 14:57:21 -0400 Subject: [PATCH 5/5] update vector similarity search guide --- src/content/docs/guides/_map.json | 2 +- .../docs/guides/vector-similarity-search.mdx | 420 ++++++++++++------ 2 files changed, 279 insertions(+), 143 deletions(-) diff --git a/src/content/docs/guides/_map.json b/src/content/docs/guides/_map.json index 8c67c0f9..32f440e4 100644 --- a/src/content/docs/guides/_map.json +++ b/src/content/docs/guides/_map.json @@ -13,7 +13,7 @@ ["empty-array-default-value", "Empty array as a default value"], ["update-many-with-different-value", "Update many with different value for each row"], ["unique-case-insensitive-email", "Unique and Case-Insensitive Email Handling"], - ["vector-similarity-search", "Vector similarity search with pgvector extension"], + ["vector-similarity-search", "Vector similarity search"], ["postgresql-full-text-search", "PostgreSQL full-text search"], ["d1-http-with-drizzle-kit", "Cloudflare D1 HTTP API with Drizzle Kit"], ["point-datatype-psql", "Point datatype in PostgreSQL"], diff --git a/src/content/docs/guides/vector-similarity-search.mdx b/src/content/docs/guides/vector-similarity-search.mdx index 4ce29398..30670c97 100644 --- a/src/content/docs/guides/vector-similarity-search.mdx +++ b/src/content/docs/guides/vector-similarity-search.mdx @@ -1,8 +1,10 @@ --- -title: Vector similarity search with pgvector extension +title: Vector similarity search slug: vector-similarity-search --- +import Tab from '@mdx/Tab.astro'; +import Tabs from '@mdx/Tabs.astro'; import Section from "@mdx/Section.astro"; import IsSupportedChipGroup from "@mdx/IsSupportedChipGroup.astro"; import Prerequisites from "@mdx/Prerequisites.astro"; @@ -10,145 +12,279 @@ import CodeTabs from '@mdx/CodeTabs.astro'; import CodeTab from '@mdx/CodeTab.astro'; import Npm from "@mdx/Npm.astro"; - -- Get started with [PostgreSQL](/docs/get-started-postgresql) -- [Select statement](/docs/select) -- [Indexes](/docs/indexes-constraints#indexes) -- [sql operator](/docs/sql) -- [pgvector extension](/docs/extensions/pg#pg_vector) -- [Drizzle kit](/docs/kit-overview) -- You should have installed the `openai` [package](https://www.npmjs.com/package/openai) for generating embeddings. - - openai - -- You should have `drizzle-orm@0.31.0` and `drizzle-kit@0.22.0` or higher. - - -To implement vector similarity search in PostgreSQL with Drizzle ORM, you can use the `pgvector` extension. This extension provides a set of functions to work with vectors and perform similarity search. - -As for now, Drizzle doesn't create extension automatically, so you need to create it manually. Create an empty migration file and add SQL query: - -
-```bash -npx drizzle-kit generate --custom -``` - -```sql -CREATE EXTENSION vector; -``` -
- -To perform similarity search, you need to create a table with a vector column and an `HNSW` or `IVFFlat` index on this column for better performance: - - - - ```ts copy {10, 13} - import { index, pgTable, serial, text, vector } from 'drizzle-orm/pg-core'; - - export const guides = pgTable( - 'guides', - { - id: serial('id').primaryKey(), - title: text('title').notNull(), - description: text('description').notNull(), - url: text('url').notNull(), - embedding: vector('embedding', { dimensions: 1536 }), - }, - (table) => [ - index('embeddingIndex').using('hnsw', table.embedding.op('vector_cosine_ops')), + + + + - Get started with [PostgreSQL](/docs/get-started-postgresql) + - [Select statement](/docs/select) + - [Indexes](/docs/indexes-constraints#indexes) + - [sql operator](/docs/sql) + - [pgvector extension](/docs/extensions/pg#pg_vector) + - [Drizzle kit](/docs/kit-overview) + - You should have installed the `openai` [package](https://www.npmjs.com/package/openai) for generating embeddings. + + openai + + - You should have `drizzle-orm@0.31.0` and `drizzle-kit@0.22.0` or higher. + + + To implement vector similarity search in PostgreSQL with Drizzle ORM, you can use the `pgvector` extension. This extension provides a set of functions to work with vectors and perform similarity search. + + As for now, Drizzle doesn't create extension automatically, so you need to create it manually. Create an empty migration file and add SQL query: + +
+ ```bash + npx drizzle-kit generate --custom + ``` + + ```sql + CREATE EXTENSION vector; + ``` +
+ + To perform similarity search, you need to create a table with a vector column and an `HNSW` or `IVFFlat` index on this column for better performance: + + + + ```ts copy {10, 13} + import { index, pgTable, serial, text, vector } from 'drizzle-orm/pg-core'; + + export const guides = pgTable( + 'guides', + { + id: serial('id').primaryKey(), + title: text('title').notNull(), + description: text('description').notNull(), + url: text('url').notNull(), + embedding: vector('embedding', { dimensions: 1536 }), + }, + (table) => [ + index('embeddingIndex').using('hnsw', table.embedding.op('vector_cosine_ops')), + ] + ); + ``` + + ```sql + CREATE TABLE IF NOT EXISTS "guides" ( + "id" serial PRIMARY KEY NOT NULL, + "title" text NOT NULL, + "description" text NOT NULL, + "url" text NOT NULL, + "embedding" vector(1536) + ); + --> statement-breakpoint + CREATE INDEX IF NOT EXISTS "embeddingIndex" ON "guides" USING hnsw (embedding vector_cosine_ops); + ``` + + + The `embedding` column is used to store vector embeddings of the guide descriptions. Vector embedding is just a representation of some data. It converts different types of data into a common format (vectors) that language models can process. This allows us to perform mathematical operations, such as measuring the distance between two vectors, to determine how similar or different two data items are. + + In this example we will use `OpenAI` model to generate [embeddings](https://platform.openai.com/docs/guides/embeddings) for the description: + ```ts copy + import OpenAI from 'openai'; + + const openai = new OpenAI({ + apiKey: process.env['OPENAI_API_KEY'], + }); + + export const generateEmbedding = async (value: string): Promise => { + const input = value.replaceAll('\n', ' '); + + const { data } = await openai.embeddings.create({ + model: 'text-embedding-ada-002', + input, + }); + + return data[0].embedding; + }; + ``` + + To search for similar guides by embedding, you can use `gt` and `sql` operators with `cosineDistance` function to calculate the similarity between the `embedding` column and the generated embedding: + +
+ ```ts copy {10,15,16} + import { cosineDistance, desc, gt, sql } from 'drizzle-orm'; + import { generateEmbedding } from './embedding'; + import { guides } from './schema'; + + const db = drizzle(...); + + const findSimilarGuides = async (description: string) => { + const embedding = await generateEmbedding(description); + + const similarity = sql`1 - (${cosineDistance(guides.embedding, embedding)})`; + + const similarGuides = await db + .select({ name: guides.title, url: guides.url, similarity }) + .from(guides) + .where(gt(similarity, 0.5)) + .orderBy((t) => desc(t.similarity)) + .limit(4); + + return similarGuides; + }; + ``` + + ```ts + const description = 'Guides on using Drizzle ORM with different platforms'; + + const similarGuides = await findSimilarGuides(description); + ``` + + ```json + [ + { + name: 'Drizzle with Turso', + url: '/docs/tutorials/drizzle-with-turso', + similarity: 0.8642314333984994 + }, + { + name: 'Drizzle with Supabase Database', + url: '/docs/tutorials/drizzle-with-supabase', + similarity: 0.8593631126014918 + }, + { + name: 'Drizzle with Neon Postgres', + url: '/docs/tutorials/drizzle-with-neon', + similarity: 0.8541051184461372 + }, + { + name: 'Drizzle with Vercel Edge Functions', + url: '/docs/tutorials/drizzle-with-vercel-edge-functions', + similarity: 0.8481551084241092 + } + ] + ``` +
+
+ + + - Get started with [SingleStore](/docs/get-started-singlestore) + - [Select statement](/docs/select) + - [Indexes](/docs/indexes-constraints#indexes) + - [sql operator](/docs/sql) + - [Drizzle kit](/docs/kit-overview) + - You should have installed the `openai` [package](https://www.npmjs.com/package/openai) for generating embeddings. + + openai + + - You should have `drizzle-orm@0.31.0` and `drizzle-kit@0.22.0` or higher. + + + To perform similarity search, you need to create a table with a vector column and a vector index on this column for better performance: + + + + ```ts copy {10, 13} + import { serial, singlestoreTable, text, vector, vectorIndex } from 'drizzle-orm/singlestore-core'; + + export const guides = singlestoreTable( + 'guides', + { + id: serial('id').primaryKey(), + title: text('title').notNull(), + description: text('description').notNull(), + url: text('url').notNull(), + embedding: vector('embedding', { dimensions: 1536 }), + }, + (table) => [ + vectorIndex('embeddingIndex').on(table.embedding), + ] + ); + ``` + + ```sql + CREATE TABLE IF NOT EXISTS `guides` ( + `id` serial PRIMARY KEY NOT NULL, + `title` text NOT NULL, + `description` text NOT NULL, + `url` text NOT NULL, + `embedding` vector(1536, 'F32') + ); + --> statement-breakpoint + ALTER TABLE `guides` ADD VECTOR INDEX `embeddingIndex` (`embedding`) INDEX_OPTIONS '{"index_type":"AUTO"}'; + ``` + + + The `embedding` column is used to store vector embeddings of the guide descriptions. Vector embedding is just a representation of some data. It converts different types of data into a common format (vectors) that language models can process. This allows us to perform mathematical operations, such as measuring the distance between two vectors, to determine how similar or different two data items are. + + In this example we will use `OpenAI` model to generate [embeddings](https://platform.openai.com/docs/guides/embeddings) for the description: + ```ts copy + import OpenAI from 'openai'; + + const openai = new OpenAI({ + apiKey: process.env['OPENAI_API_KEY'], + }); + + export const generateEmbedding = async (value: string): Promise => { + const input = value.replaceAll('\n', ' '); + + const { data } = await openai.embeddings.create({ + model: 'text-embedding-ada-002', + input, + }); + + return data[0].embedding; + }; + ``` + + To search for similar guides by embedding, you can use `gt` and `sql` operators with the `dotProduct` function to calculate the similarity between the `embedding` column and the generated embedding: + +
+ ```ts copy {11,16,17} + import { desc, gt, sql } from 'drizzle-orm'; + import { dotProduct } from 'drizzle-orm/singlestore-core/expressions'; + import { generateEmbedding } from './embedding'; + import { guides } from './schema'; + + const db = drizzle(...); + + const findSimilarGuides = async (description: string) => { + const embedding = await generateEmbedding(description); + + const similarity = dotProduct(guides.embedding, embedding); + + const similarGuides = await db + .select({ name: guides.title, url: guides.url, similarity }) + .from(guides) + .where(gt(similarity, 0.5)) + .orderBy((t) => desc(t.similarity)) + .limit(4); + + return similarGuides; + }; + ``` + + ```ts + const description = 'Guides on using Drizzle ORM with different platforms'; + + const similarGuides = await findSimilarGuides(description); + ``` + + ```json + [ + { + name: 'Drizzle with Turso', + url: '/docs/tutorials/drizzle-with-turso', + similarity: 0.8642314333984994 + }, + { + name: 'Drizzle with Supabase Database', + url: '/docs/tutorials/drizzle-with-supabase', + similarity: 0.8593631126014918 + }, + { + name: 'Drizzle with Neon Postgres', + url: '/docs/tutorials/drizzle-with-neon', + similarity: 0.8541051184461372 + }, + { + name: 'Drizzle with Vercel Edge Functions', + url: '/docs/tutorials/drizzle-with-vercel-edge-functions', + similarity: 0.8481551084241092 + } ] - ); - ``` - - ```sql - CREATE TABLE IF NOT EXISTS "guides" ( - "id" serial PRIMARY KEY NOT NULL, - "title" text NOT NULL, - "description" text NOT NULL, - "url" text NOT NULL, - "embedding" vector(1536) - ); - --> statement-breakpoint - CREATE INDEX IF NOT EXISTS "embeddingIndex" ON "guides" USING hnsw (embedding vector_cosine_ops); - ``` - - -The `embedding` column is used to store vector embeddings of the guide descriptions. Vector embedding is just a representation of some data. It converts different types of data into a common format (vectors) that language models can process. This allows us to perform mathematical operations, such as measuring the distance between two vectors, to determine how similar or different two data items are. - -In this example we will use `OpenAI` model to generate [embeddings](https://platform.openai.com/docs/guides/embeddings) for the description: -```ts copy -import OpenAI from 'openai'; - -const openai = new OpenAI({ - apiKey: process.env['OPENAI_API_KEY'], -}); - -export const generateEmbedding = async (value: string): Promise => { - const input = value.replaceAll('\n', ' '); - - const { data } = await openai.embeddings.create({ - model: 'text-embedding-ada-002', - input, - }); - - return data[0].embedding; -}; -``` - -To search for similar guides by embedding, you can use `gt` and `sql` operators with `cosineDistance` function to calculate the similarity between the `embedding` column and the generated embedding: - -
-```ts copy {10,15,16} -import { cosineDistance, desc, gt, sql } from 'drizzle-orm'; -import { generateEmbedding } from './embedding'; -import { guides } from './schema'; - -const db = drizzle(...); - -const findSimilarGuides = async (description: string) => { - const embedding = await generateEmbedding(description); - - const similarity = sql`1 - (${cosineDistance(guides.embedding, embedding)})`; - - const similarGuides = await db - .select({ name: guides.title, url: guides.url, similarity }) - .from(guides) - .where(gt(similarity, 0.5)) - .orderBy((t) => desc(t.similarity)) - .limit(4); - - return similarGuides; -}; -``` - -```ts -const description = 'Guides on using Drizzle ORM with different platforms'; - -const similarGuides = await findSimilarGuides(description); -``` - -```json -[ - { - name: 'Drizzle with Turso', - url: '/docs/tutorials/drizzle-with-turso', - similarity: 0.8642314333984994 - }, - { - name: 'Drizzle with Supabase Database', - url: '/docs/tutorials/drizzle-with-supabase', - similarity: 0.8593631126014918 - }, - { - name: 'Drizzle with Neon Postgres', - url: '/docs/tutorials/drizzle-with-neon', - similarity: 0.8541051184461372 - }, - { - name: 'Drizzle with Vercel Edge Functions', - url: '/docs/tutorials/drizzle-with-vercel-edge-functions', - similarity: 0.8481551084241092 - } -] -``` -
+ ``` +
+
+