Skip to content

Commit 2cede32

Browse files
authored
chore(s3vectors): change embd type name to s3vec (#524)
1 parent cd67098 commit 2cede32

File tree

6 files changed

+85
-69
lines changed

6 files changed

+85
-69
lines changed

docs/catalog/s3vectors.md

Lines changed: 33 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ You can also manually create the foreign table like below if you did not use `im
148148
```sql
149149
create foreign table s3_vectors.embeddings (
150150
key text not null,
151-
data embd not null,
151+
data s3vec not null,
152152
metadata jsonb
153153
)
154154
server s3_vectors_server
@@ -160,51 +160,51 @@ create foreign table s3_vectors.embeddings (
160160
```
161161
### Custom Data Types
162162

163-
#### embd
163+
#### s3vec
164164

165-
The `embd` type is a custom PostgreSQL data type designed to store and work with high-dimensional vectors for machine learning and AI applications.
165+
The `s3vec` type is a custom PostgreSQL data type designed to store and work with high-dimensional vectors for machine learning and AI applications.
166166

167167
**Structure:**
168168

169-
The `embd` type internally contains:
169+
The `s3vec` type internally contains:
170170

171171
- Vector data as an array of 32-bit floating point numbers (Float32)
172172
- Additional metadata fields for internal use
173173

174174
**Input Formats:**
175175

176-
The `embd` type accepts input in JSON array format:
176+
The `s3vec` type accepts input in JSON array format:
177177

178178
```sql
179179
-- Simple array format (most common)
180-
'[0.1, 0.2, 0.3, 0.4, 0.5]'::embd
180+
'[0.1, 0.2, 0.3, 0.4, 0.5]'::s3vec
181181

182182
-- Full JSON object format (advanced)
183-
'{"data": [0.1, 0.2, 0.3], "key": "vector_001"}'::embd
183+
'{"data": [0.1, 0.2, 0.3], "key": "vector_001"}'::s3vec
184184
```
185185

186186
**Output Format:**
187187

188-
When displayed, the `embd` type shows a summary format:
188+
When displayed, the `s3vec` type shows a summary format:
189189

190190
```
191-
embd:5 -- indicates an embedding with 5 dimensions
191+
s3vec:5 -- indicates an embedding with 5 dimensions
192192
```
193193

194194
**Usage Examples:**
195195

196196
See the following sections for complete examples:
197197

198-
- [Inserting Vectors](#inserting-vectors) - Examples of inserting data with `embd` type
198+
- [Inserting Vectors](#inserting-vectors) - Examples of inserting data with `s3vec` type
199199
- [Querying Vectors](#querying-vectors) - Basic queries and vector similarity search
200200
- [Vector Similarity Search with Filtering](#vector-similarity-search-with-filtering) - Advanced search with metadata filtering
201201
- [Advanced Example: Semantic Search](#advanced-example-semantic-search) - Complete semantic search implementation
202202

203203
**Operations:**
204204

205205
- **Vector similarity search**: Use the `<==>` operator for approximate nearest neighbor search
206-
- **Distance calculation**: Use `embd_distance()` function to get similarity scores
207-
- **Type casting**: Convert JSON arrays to `embd` type using `::embd` cast
206+
- **Distance calculation**: Use `s3vec_distance()` function to get similarity scores
207+
- **Type casting**: Convert JSON arrays to `s3vec` type using `::s3vec` cast
208208

209209
**Constraints:**
210210

@@ -214,19 +214,19 @@ See the following sections for complete examples:
214214

215215
### Functions
216216

217-
#### embd_distance(embd)
217+
#### s3vec_distance(s3vec)
218218

219219
Returns the distance score from the most recent vector similarity search operation.
220220

221221
**Syntax:**
222222

223223
```sql
224-
embd_distance(vector_data) -> real
224+
s3vec_distance(vector_data) -> real
225225
```
226226

227227
**Parameters:**
228228

229-
- `vector_data` - An `embd` type column containing vector data
229+
- `vector_data` - An `s3vec` type column containing vector data
230230

231231
**Returns:**
232232

@@ -236,9 +236,9 @@ embd_distance(vector_data) -> real
236236

237237
```sql
238238
-- Get similarity search results with distance scores
239-
select embd_distance(data) as distance, key, metadata
239+
select s3vec_distance(data) as distance, key, metadata
240240
from s3_vectors.embeddings
241-
where data <==> '[0.1, 0.2, 0.3, 0.4, 0.5]'::embd
241+
where data <==> '[0.1, 0.2, 0.3, 0.4, 0.5]'::s3vec
242242
order by 1
243243
limit 5;
244244
```
@@ -312,18 +312,18 @@ For exact key lookups:
312312

313313
3. **Vector similarity search**:
314314
```sql
315-
select embd_distance(data) as distance, *
315+
select s3vec_distance(data) as distance, *
316316
from s3_vectors.embeddings
317-
where data <==> '[0.1, 0.2, 0.3, ...]'::embd
317+
where data <==> '[0.1, 0.2, 0.3, ...]'::s3vec
318318
order by 1
319319
limit 10;
320320
```
321321

322322
4. **Vector search with metadata filtering**:
323323
```sql
324-
select embd_distance(data) as distance, *
324+
select s3vec_distance(data) as distance, *
325325
from s3_vectors.embeddings
326-
where data <==> '[0.1, 0.2, 0.3, ...]'::embd
326+
where data <==> '[0.1, 0.2, 0.3, ...]'::s3vec
327327
and metadata <==> '{"category": "product"}'::jsonb
328328
order by 1
329329
limit 5;
@@ -338,7 +338,7 @@ For exact key lookups:
338338
| Postgres Type | S3 Vectors Type |
339339
| ---------------- | -------------------------------------- |
340340
| text | String (for vector key) |
341-
| embd | Float32 vector data |
341+
| s3vec | Float32 vector data |
342342
| jsonb | Document metadata |
343343

344344
## Limitations
@@ -380,7 +380,7 @@ import foreign schema s3_vectors
380380
-- or, create the foreign table manually
381381
create foreign table if not exists s3_vectors.embeddings (
382382
key text not null,
383-
data embd not null,
383+
data s3vec not null,
384384
metadata jsonb
385385
)
386386
server s3_vectors_server
@@ -401,9 +401,9 @@ select * from s3_vectors.embeddings;
401401
select * from s3_vectors.embeddings where key = 'product_001';
402402

403403
-- Vector similarity search (top 5 similar vectors)
404-
select embd_distance(data) as distance, key, metadata
404+
select s3vec_distance(data) as distance, key, metadata
405405
from s3_vectors.embeddings
406-
where data <==> '[0.1, 0.2, 0.3, 0.4, 0.5]'::embd
406+
where data <==> '[0.1, 0.2, 0.3, 0.4, 0.5]'::s3vec
407407
order by 1
408408
limit 5;
409409
```
@@ -415,15 +415,15 @@ limit 5;
415415
insert into s3_vectors.embeddings (key, data, metadata)
416416
values (
417417
'product_001',
418-
'[0.1, 0.2, 0.3, 0.4, 0.5]'::embd,
418+
'[0.1, 0.2, 0.3, 0.4, 0.5]'::s3vec,
419419
'{"category": "electronics", "price": 299.99}'::jsonb
420420
);
421421

422422
-- Insert multiple vectors
423423
insert into s3_vectors.embeddings (key, data, metadata)
424424
values
425-
('product_002', '[0.2, 0.3, 0.4, 0.5, 0.6]'::embd, '{"category": "books"}'::jsonb),
426-
('product_003', '[0.3, 0.4, 0.5, 0.6, 0.7]'::embd, '{"category": "clothing"}'::jsonb);
425+
('product_002', '[0.2, 0.3, 0.4, 0.5, 0.6]'::s3vec, '{"category": "books"}'::jsonb),
426+
('product_003', '[0.3, 0.4, 0.5, 0.6, 0.7]'::s3vec, '{"category": "clothing"}'::jsonb);
427427
```
428428

429429
### Deleting Vectors
@@ -440,9 +440,9 @@ delete from s3_vectors.embeddings;
440440

441441
```sql
442442
-- Find similar vectors with metadata filtering
443-
select embd_distance(data) as distance, key, metadata
443+
select s3vec_distance(data) as distance, key, metadata
444444
from s3_vectors.embeddings
445-
where data <==> '[0.1, 0.2, 0.3, 0.4, 0.5]'::embd
445+
where data <==> '[0.1, 0.2, 0.3, 0.4, 0.5]'::s3vec
446446
and metadata <==> '{"category": "electronics"}'::jsonb
447447
order by 1
448448
limit 3;
@@ -454,15 +454,15 @@ limit 3;
454454
-- Create a function to convert text to embeddings (pseudo-code)
455455
-- This would typically use an external embedding service
456456
create or replace function text_to_embedding(input_text text)
457-
returns embd
457+
returns s3vec
458458
language sql
459459
as $$
460460
-- This is a placeholder - you would implement actual text embedding logic
461-
select '[0.1, 0.2, 0.3, 0.4, 0.5]'::embd;
461+
select '[0.1, 0.2, 0.3, 0.4, 0.5]'::s3vec;
462462
$$;
463463

464464
-- Semantic search example
465-
select embd_distance(data) as distance, key, metadata
465+
select s3vec_distance(data) as distance, key, metadata
466466
from s3_vectors.embeddings
467467
where data <==> text_to_embedding('Find similar products')
468468
and metadata <==> '{"status": "active"}'::jsonb

wrappers/src/fdw/s3vectors_fdw/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,4 +10,5 @@ This is a foreign data wrapper for [AWS S3 Vectors](https://aws.amazon.com/s3/fe
1010

1111
| Version | Date | Notes |
1212
| ------- | ---------- | ---------------------------------------------------- |
13+
| 0.1.1 | 2025-11-17 | Changed 'embd' type name to 's3vec' |
1314
| 0.1.0 | 2025-09-14 | Initial version |

wrappers/src/fdw/s3vectors_fdw/mod.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#![allow(clippy::module_inception)]
22
mod conv;
3-
mod embd;
3+
mod s3vec;
44
mod s3vectors_fdw;
55
mod tests;
66

@@ -23,6 +23,9 @@ enum S3VectorsFdwError {
2323
#[error("query filter is not supported, check S3 Vectors wrapper documents for more details")]
2424
QueryNotSupported,
2525

26+
#[error("invalid s3vec value: {0}")]
27+
InvalidS3Vec(String),
28+
2629
#[error("invalid insert value {0}")]
2730
InvalidInsertValue(String),
2831

Lines changed: 24 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
use super::conv::document_to_json_value;
2+
use super::S3VectorsFdwError;
23
use aws_sdk_s3vectors::types::{GetOutputVector, ListOutputVector, QueryOutputVector, VectorData};
34
use pgrx::{pg_sys::bytea, prelude::*, stringinfo::StringInfo, JsonB};
45
use serde::{Deserialize, Serialize};
@@ -7,38 +8,38 @@ use std::ffi::CStr;
78

89
#[derive(Debug, Default, PostgresType, Serialize, Deserialize)]
910
#[inoutfuncs]
10-
pub(super) struct Embd {
11+
pub(super) struct S3Vec {
1112
pub key: String,
1213
pub data: Vec<f32>,
1314
pub metadata: Option<JsonValue>,
1415
pub distance: f32,
1516
}
1617

17-
impl InOutFuncs for Embd {
18-
const NULL_ERROR_MESSAGE: Option<&'static str> = Some("cannot insert NULL to embd column");
18+
impl InOutFuncs for S3Vec {
19+
const NULL_ERROR_MESSAGE: Option<&'static str> = Some("cannot insert NULL to s3vec column");
1920

2021
fn input(input: &CStr) -> Self {
2122
let value: JsonValue = serde_json::from_str(input.to_str().unwrap_or_default())
22-
.expect("embd input should be a valid JSON string");
23+
.expect("s3vec input should be a valid JSON string");
2324

2425
if value.is_array() {
2526
Self {
26-
data: serde_json::from_value(value).expect("embd data should be a float32 array"),
27+
data: serde_json::from_value(value).expect("s3vec data should be a float32 array"),
2728
..Default::default()
2829
}
2930
} else {
3031
let ret: Self =
31-
serde_json::from_value(value).expect("embd should be in valid JSON format");
32+
serde_json::from_value(value).expect("s3vec should be in valid JSON format");
3233
ret
3334
}
3435
}
3536

3637
fn output(&self, buffer: &mut StringInfo) {
37-
buffer.push_str(&format!("embd:{}", self.data.len()));
38+
buffer.push_str(&format!("s3vec:{}", self.data.len()));
3839
}
3940
}
4041

41-
impl From<&ListOutputVector> for Embd {
42+
impl From<&ListOutputVector> for S3Vec {
4243
fn from(v: &ListOutputVector) -> Self {
4344
let data = if let Some(VectorData::Float32(vector_data)) = &v.data {
4445
vector_data.clone()
@@ -56,7 +57,7 @@ impl From<&ListOutputVector> for Embd {
5657
}
5758
}
5859

59-
impl From<&GetOutputVector> for Embd {
60+
impl From<&GetOutputVector> for S3Vec {
6061
fn from(v: &GetOutputVector) -> Self {
6162
let data = if let Some(VectorData::Float32(vector_data)) = &v.data {
6263
vector_data.clone()
@@ -74,7 +75,7 @@ impl From<&GetOutputVector> for Embd {
7475
}
7576
}
7677

77-
impl From<&QueryOutputVector> for Embd {
78+
impl From<&QueryOutputVector> for S3Vec {
7879
fn from(v: &QueryOutputVector) -> Self {
7980
let data = if let Some(VectorData::Float32(vector_data)) = &v.data {
8081
vector_data.clone()
@@ -92,16 +93,23 @@ impl From<&QueryOutputVector> for Embd {
9293
}
9394
}
9495

95-
impl From<*mut bytea> for Embd {
96-
fn from(v: *mut bytea) -> Self {
96+
impl TryFrom<*mut bytea> for S3Vec {
97+
type Error = S3VectorsFdwError;
98+
99+
fn try_from(v: *mut bytea) -> Result<Self, Self::Error> {
100+
if v.is_null() {
101+
return Err(S3VectorsFdwError::InvalidS3Vec(
102+
"input bytea pointer is null".to_string(),
103+
));
104+
}
97105
let ret: Self = unsafe { pgrx::datum::cbor_decode(v) };
98-
ret
106+
Ok(ret)
99107
}
100108
}
101109

102110
#[pg_operator(immutable, parallel_safe)]
103111
#[opname(<==>)]
104-
fn embd_knn(_left: Embd, _right: Embd) -> bool {
112+
fn s3vec_knn(_left: S3Vec, _right: S3Vec) -> bool {
105113
// always return true here, actual calculation will be done in the wrapper
106114
true
107115
}
@@ -114,6 +122,6 @@ fn metadata_filter(_left: JsonB, _right: JsonB) -> bool {
114122
}
115123

116124
#[pg_extern]
117-
fn embd_distance(embd: Embd) -> f32 {
118-
embd.distance
125+
fn s3vec_distance(s3vec: S3Vec) -> f32 {
126+
s3vec.distance
119127
}

0 commit comments

Comments
 (0)