diff --git a/deno.lock b/deno.lock index 4335a6e66..6e42c9b5d 100644 --- a/deno.lock +++ b/deno.lock @@ -93,13 +93,14 @@ "npm:@commitlint/cli@^19.8.1", "npm:@commitlint/config-conventional@^19.8.1", "npm:@commitlint/cz-commitlint@^19.8.1", - "npm:@nx/eslint@21.5.2", - "npm:@nx/jest@21.5.2", - "npm:@nx/js@21.5.2", - "npm:@nx/playwright@21.5.2", - "npm:@nx/vite@21.5.2", - "npm:@nx/web@21.5.2", - "npm:@nx/webpack@21.5.2", + "npm:@nx/eslint-plugin@21.6.2", + "npm:@nx/eslint@21.6.2", + "npm:@nx/jest@21.6.2", + "npm:@nx/js@21.6.2", + "npm:@nx/playwright@21.6.2", + "npm:@nx/vite@21.6.2", + "npm:@nx/web@21.6.2", + "npm:@nx/webpack@21.6.2", "npm:@supabase/node-fetch@2.6.15", "npm:@swc/core@~1.5.7", "npm:@swc/helpers@~0.5.11", @@ -107,7 +108,7 @@ "npm:@types/jest@^29.5.14", "npm:@types/jsonwebtoken@^8.5.8", "npm:@types/node-fetch@^2.6.4", - "npm:@types/node@20", + "npm:@types/node@20.19.9", "npm:@typescript-eslint/eslint-plugin@^7.18.0", "npm:@typescript-eslint/parser@^7.18.0", "npm:@vitest/ui@3", @@ -123,8 +124,9 @@ "npm:jest-mock-server@0.1", "npm:jest@^29.7.0", "npm:jiti@2.4.2", + "npm:jsonc-eslint-parser@^2.1.0", "npm:jsonwebtoken@9", - "npm:nx@21.5.2", + "npm:nx@21.6.2", "npm:prettier@^3.6.2", "npm:rimraf@^6.0.1", "npm:semantic-release-plugin-update-version-in-files@^1.1.0", @@ -170,6 +172,7 @@ "npm:cpy-cli@5", "npm:jest@^28.1.0", "npm:node-abort-controller@^3.0.1", + "npm:prettier@^2.6.2", "npm:ts-jest@^28.0.3", "npm:tstyche@^4.3.0", "npm:type-fest@^4.32.0", diff --git a/nx.json b/nx.json index 1f0242833..b6b005b51 100644 --- a/nx.json +++ b/nx.json @@ -129,6 +129,7 @@ }, "test:integration": { "inputs": ["testing", "^production"], + "dependsOn": ["^build"], "cache": true, "outputs": ["{projectRoot}/coverage"] }, diff --git a/packages/core/auth-js/README.md b/packages/core/auth-js/README.md index 88d63419e..90c45de70 100644 --- a/packages/core/auth-js/README.md +++ b/packages/core/auth-js/README.md @@ -155,10 +155,4 @@ The Docker setup includes: We welcome contributions! Please see our [Contributing Guide](../../../CONTRIBUTING.md) for details on how to get started. -For major changes or if you're unsure about something, please open an issue first to discuss your proposed changes. - -## Sponsors - -We are building the features of Firebase using enterprise-grade, open source products. We support existing communities wherever possible, and if the products don't exist we build them and open source them ourselves. - -[![New Sponsor](https://user-images.githubusercontent.com/10214025/90518111-e74bbb00-e198-11ea-8f88-c9e3c1aa4b5b.png)](https://github.com/sponsors/supabase) +For major changes or if you're unsure about something, please open an issue first to discuss your proposed changes. \ No newline at end of file diff --git a/packages/core/postgrest-js/README.md b/packages/core/postgrest-js/README.md index 52f4a41b3..df9b1467a 100644 --- a/packages/core/postgrest-js/README.md +++ b/packages/core/postgrest-js/README.md @@ -143,8 +143,6 @@ npx nx test:update postgrest-js # Type checking only npx nx test:types postgrest-js -# Format checking -npx nx format:check postgrest-js ``` #### Test Infrastructure @@ -198,10 +196,4 @@ For major changes or if you're unsure about something, please open an issue firs ## License -This repo is licensed under MIT License. - -## Sponsors - -We are building the features of Firebase using enterprise-grade, open source products. We support existing communities wherever possible, and if the products don’t exist we build them and open source them ourselves. Thanks to these sponsors who are making the OSS ecosystem better for everyone. - -[![New Sponsor](https://user-images.githubusercontent.com/10214025/90518111-e74bbb00-e198-11ea-8f88-c9e3c1aa4b5b.png)](https://github.com/sponsors/supabase) +This repo is licensed under MIT License. \ No newline at end of file diff --git a/packages/core/storage-js/README.md b/packages/core/storage-js/README.md index 3d9c5c482..46a23bc80 100644 --- a/packages/core/storage-js/README.md +++ b/packages/core/storage-js/README.md @@ -10,7 +10,7 @@

Supabase Storage JS SDK

-

JavaScript SDK to interact with Supabase Storage.

+

JavaScript SDK to interact with Supabase Storage, including file storage and vector embeddings.

Guides @@ -27,8 +27,18 @@ [![Package](https://img.shields.io/npm/v/@supabase/storage-js)](https://www.npmjs.com/package/@supabase/storage-js) [![License: MIT](https://img.shields.io/npm/l/@supabase/supabase-js)](#license) [![pkg.pr.new](https://pkg.pr.new/badge/supabase/storage-js)](https://pkg.pr.new/~/supabase/storage-js) + +## Features + +- **File Storage**: Upload, download, list, move, and delete files +- **Access Control**: Public and private buckets with fine-grained permissions +- **Signed URLs**: Generate time-limited URLs for secure file access +- **Image Transformations**: On-the-fly image resizing and optimization +- **Vector Embeddings**: Store and query high-dimensional embeddings with similarity search +- **Analytics Buckets**: Iceberg table-based buckets optimized for analytical queries and data processing + ## Quick Start Guide ### Installing the module @@ -39,6 +49,33 @@ npm install @supabase/storage-js ### Connecting to the storage backend +There are two ways to use the Storage SDK: + +#### Option 1: Via Supabase Client (Recommended) + +If you're already using `@supabase/supabase-js`, access storage through the client: + +```js +import { createClient } from '@supabase/supabase-js' + +const supabase = createClient( + 'https://.supabase.co', + '' +) + +// Access storage +const storage = supabase.storage + +// Access different bucket types +const regularBucket = storage.from('my-bucket') +const vectorBucket = storage.vectors.from('embeddings-bucket') +const analyticsBucket = storage.analytics // Analytics API +``` + +#### Option 2: Standalone StorageClient + +For applications that only need storage functionality: + ```js import { StorageClient } from '@supabase/storage-js' @@ -49,8 +86,79 @@ const storageClient = new StorageClient(STORAGE_URL, { apikey: SERVICE_KEY, Authorization: `Bearer ${SERVICE_KEY}`, }) + +// Access different bucket types +const regularBucket = storageClient.from('my-bucket') +const vectorBucket = storageClient.vectors.from('embeddings-bucket') +const analyticsBucket = storageClient.analytics // Analytics API +``` + +> **When to use each approach:** +> - Use `supabase.storage` when working with other Supabase features (auth, database, etc.) +> - Use `new StorageClient()` for storage-only applications or when you need fine-grained control + +### Understanding Bucket Types + +Supabase Storage supports three types of buckets, each optimized for different use cases: + +#### 1. Regular Storage Buckets (File Storage) + +Standard buckets for storing files, images, videos, and other assets. + +```js +// Create regular storage bucket +const { data, error } = await storageClient.createBucket('my-files', { + public: false +}) + +// Upload files +await storageClient.from('my-files').upload('avatar.png', file) +``` + +**Use cases:** User uploads, media assets, documents, backups + +#### 2. Vector Buckets (Embeddings Storage) + +Specialized buckets for storing and querying high-dimensional vector embeddings. + +```js +// Create vector bucket +await storageClient.vectors.createBucket('embeddings-prod') + +// Create index and insert vectors +const bucket = storageClient.vectors.from('embeddings-prod') +await bucket.createIndex({ + indexName: 'documents', + dimension: 1536, + distanceMetric: 'cosine' +}) +``` + +**Use cases:** Semantic search, AI-powered recommendations, similarity matching + +**[See full Vector Embeddings documentation below](#vector-embeddings)** + +#### 3. Analytics Buckets + +Specialized buckets using Apache Iceberg table format, optimized for analytical queries and large-scale data processing. + +```js +// Create analytics bucket +await storageClient.analytics.createBucket('analytics-data') + +// List analytics buckets +const { data, error } = await storageClient.analytics.listBuckets() + +// Delete analytics bucket +await storageClient.analytics.deleteBucket('analytics-data') ``` +**Use cases:** Time-series data, analytical queries, data lakes, large-scale data processing, business intelligence + +**[See full Analytics Buckets documentation below](#analytics-buckets)** + +--- + ### Handling resources #### Handling Storage Buckets @@ -175,6 +283,791 @@ const storageClient = new StorageClient(STORAGE_URL, { const { data, error } = await storageClient.from('public-bucket').getPublicUrl('path/to/file') ``` +## Analytics Buckets + +Supabase Storage provides specialized analytics buckets using Apache Iceberg table format, optimized for analytical workloads and large-scale data processing. These buckets are designed for data lake architectures, time-series data, and business intelligence applications. + +### What are Analytics Buckets? + +Analytics buckets use the Apache Iceberg open table format, providing: +- **ACID transactions** for data consistency +- **Schema evolution** without data rewrites +- **Time travel** to query historical data +- **Efficient metadata management** for large datasets +- **Optimized for analytical queries** rather than individual file operations + +### When to Use Analytics Buckets + +**Use analytics buckets for:** +- Time-series data (logs, metrics, events) +- Data lake architectures +- Business intelligence and reporting +- Large-scale batch processing +- Analytical workloads requiring ACID guarantees + +**Use regular storage buckets for:** +- User file uploads (images, documents, videos) +- Individual file management +- Content delivery +- Simple object storage needs + +### Quick Start + +You can access analytics functionality through the `analytics` property on your storage client: + +#### Via Supabase Client + +```typescript +import { createClient } from '@supabase/supabase-js' + +const supabase = createClient( + 'https://your-project.supabase.co', + 'your-anon-key' +) + +// Access analytics operations +const analytics = supabase.storage.analytics + +// Create an analytics bucket +const { data, error } = await analytics.createBucket('analytics-data') +if (error) { + console.error('Failed to create analytics bucket:', error.message) +} else { + console.log('Created bucket:', data.name) +} +``` + +#### Via StorageClient + +```typescript +import { StorageClient } from '@supabase/storage-js' + +const storageClient = new StorageClient('https://your-project.supabase.co/storage/v1', { + apikey: 'YOUR_API_KEY', + Authorization: 'Bearer YOUR_TOKEN', +}) + +// Access analytics operations +const analytics = storageClient.analytics + +// Create an analytics bucket +await analytics.createBucket('analytics-data') +``` + +### API Reference + +#### Create Analytics Bucket + +Creates a new analytics bucket using Iceberg table format: + +```typescript +const { data, error } = await analytics.createBucket('my-analytics-bucket') + +if (error) { + console.error('Error:', error.message) +} else { + console.log('Created bucket:', data) +} +``` + +**Returns:** +```typescript +{ + data: { + id: string + type: 'ANALYTICS' + format: string + created_at: string + updated_at: string + } | null + error: StorageError | null +} +``` + +#### List Analytics Buckets + +Retrieves all analytics buckets in your project with optional filtering and pagination: + +```typescript +const { data, error } = await analytics.listBuckets({ + limit: 10, + offset: 0, + sortColumn: 'created_at', + sortOrder: 'desc', + search: 'prod' +}) + +if (data) { + console.log(`Found ${data.length} analytics buckets`) + data.forEach(bucket => { + console.log(`- ${bucket.id} (created: ${bucket.created_at})`) + }) +} +``` + +**Parameters:** +- `limit?: number` - Maximum number of buckets to return +- `offset?: number` - Number of buckets to skip (for pagination) +- `sortColumn?: 'id' | 'name' | 'created_at' | 'updated_at'` - Column to sort by +- `sortOrder?: 'asc' | 'desc'` - Sort direction +- `search?: string` - Search term to filter bucket names + +**Returns:** +```typescript +{ + data: AnalyticBucket[] | null + error: StorageError | null +} +``` + +**Example with Pagination:** + +```typescript +// Fetch first page +const firstPage = await analytics.listBuckets({ + limit: 100, + offset: 0, + sortColumn: 'created_at', + sortOrder: 'desc' +}) + +// Fetch second page +const secondPage = await analytics.listBuckets({ + limit: 100, + offset: 100, + sortColumn: 'created_at', + sortOrder: 'desc' +}) +``` + +#### Delete Analytics Bucket + +Deletes an analytics bucket. The bucket must be empty before deletion. + +```typescript +const { data, error } = await analytics.deleteBucket('old-analytics-bucket') + +if (error) { + console.error('Failed to delete:', error.message) +} else { + console.log('Bucket deleted:', data.message) +} +``` + +**Returns:** +```typescript +{ + data: { message: string } | null + error: StorageError | null +} +``` + +> **Note:** A bucket cannot be deleted if it contains data. You must empty the bucket first. + +### Error Handling + +Analytics buckets use the same error handling pattern as the rest of the Storage SDK: + +```typescript +const { data, error } = await analytics.createBucket('my-bucket') + +if (error) { + console.error('Error:', error.message) + console.error('Status:', error.status) + console.error('Status Code:', error.statusCode) + // Handle error appropriately +} +``` + +#### Throwing Errors + +You can configure the client to throw errors instead of returning them: + +```typescript +const analytics = storageClient.analytics +analytics.throwOnError() + +try { + const { data } = await analytics.createBucket('my-bucket') + // data is guaranteed to be present + console.log('Success:', data) +} catch (error) { + if (error instanceof StorageApiError) { + console.error('API Error:', error.statusCode, error.message) + } +} +``` + +### TypeScript Types + +The library exports TypeScript types for analytics buckets: + +```typescript +import type { + AnalyticBucket, + BucketType, + StorageError, +} from '@supabase/storage-js' + +// AnalyticBucket type +interface AnalyticBucket { + id: string + type: 'ANALYTICS' + format: string + created_at: string + updated_at: string +} +``` + +### Common Patterns + +#### Checking if a Bucket Exists + +```typescript +async function bucketExists(bucketName: string): Promise { + const { data, error } = await analytics.listBuckets({ + search: bucketName + }) + + if (error) { + console.error('Error checking bucket:', error.message) + return false + } + + return data?.some(bucket => bucket.id === bucketName) ?? false +} +``` + +#### Creating Bucket with Error Handling + +```typescript +async function ensureAnalyticsBucket(bucketName: string) { + // Try to create the bucket + const { data, error } = await analytics.createBucket(bucketName) + + if (error) { + // Check if bucket already exists (conflict error) + if (error.statusCode === '409') { + console.log(`Bucket '${bucketName}' already exists`) + return { success: true, created: false } + } + + // Other error occurred + console.error('Failed to create bucket:', error.message) + return { success: false, error } + } + + console.log(`Created new bucket: '${bucketName}'`) + return { success: true, created: true, data } +} +``` + +#### Listing All Buckets with Pagination + +```typescript +async function getAllAnalyticsBuckets() { + const allBuckets: AnalyticBucket[] = [] + let offset = 0 + const limit = 100 + + while (true) { + const { data, error } = await analytics.listBuckets({ + limit, + offset, + sortColumn: 'created_at', + sortOrder: 'desc' + }) + + if (error) { + console.error('Error fetching buckets:', error.message) + break + } + + if (!data || data.length === 0) { + break + } + + allBuckets.push(...data) + + // If we got fewer results than the limit, we've reached the end + if (data.length < limit) { + break + } + + offset += limit + } + + return allBuckets +} +``` + +## Vector Embeddings + +Supabase Storage provides built-in support for storing and querying high-dimensional vector embeddings, powered by S3 Vectors. This enables semantic search, similarity matching, and AI-powered applications without needing a separate vector database. + +> **Note:** Vector embeddings functionality is available in `@supabase/storage-js` v2.76 and later. + +### Features + +- **Vector Buckets**: Organize vector indexes into logical containers +- **Vector Indexes**: Define schemas with configurable dimensions and distance metrics +- **Batch Operations**: Insert/update/delete up to 500 vectors per request +- **Similarity Search**: Query for nearest neighbors using cosine, euclidean, or dot product distance +- **Metadata Filtering**: Store and filter vectors by arbitrary JSON metadata +- **Pagination**: Efficiently scan large vector datasets +- **Parallel Scanning**: Distribute scans across multiple workers for high throughput +- **Cross-platform**: Works in Node.js, browsers, and edge runtimes + +### Quick Start + +You can access vector functionality in three ways, depending on your use case: + +#### Option 1: Via Supabase Client (Most Common) + +If you're using the full Supabase client: + +```typescript +import { createClient } from '@supabase/supabase-js' + +const supabase = createClient( + 'https://your-project.supabase.co', + 'your-anon-key' +) + +// Access vector operations through storage +const vectors = supabase.storage.vectors + +// Create a vector bucket +await vectors.createBucket('embeddings-prod') + +// Create an index +const bucket = vectors.from('embeddings-prod') +await bucket.createIndex({ + indexName: 'documents-openai', + dataType: 'float32', + dimension: 1536, + distanceMetric: 'cosine', +}) + +// Insert vectors +const index = bucket.index('documents-openai') +await index.putVectors({ + vectors: [ + { + key: 'doc-1', + data: { float32: [0.1, 0.2, 0.3 /* ...1536 dimensions */] }, + metadata: { title: 'Introduction', category: 'docs' }, + }, + ], +}) + +// Query similar vectors +const { data, error } = await index.queryVectors({ + queryVector: { float32: [0.15, 0.25, 0.35 /* ...1536 dimensions */] }, + topK: 5, + returnDistance: true, + returnMetadata: true, +}) + +if (data) { + data.matches.forEach((match) => { + console.log(`${match.key}: distance=${match.distance}`) + console.log('Metadata:', match.metadata) + }) +} +``` + +#### Option 2: Via StorageClient + +If you're using the standalone `StorageClient` for storage operations, access vectors through the `vectors` property: + +```typescript +import { StorageClient } from '@supabase/storage-js' + +const storageClient = new StorageClient('https://your-project.supabase.co/storage/v1', { + apikey: 'YOUR_API_KEY', + Authorization: 'Bearer YOUR_TOKEN', +}) + +// Access vector operations +const vectors = storageClient.vectors + +// Use the same API as shown in Option 1 +await vectors.createBucket('embeddings-prod') +const bucket = vectors.from('embeddings-prod') +// ... rest of operations +``` + +#### Option 3: Standalone Vector Client + +For vector-only applications that don't need regular file storage operations: + +```typescript +import { StorageVectorsClient } from '@supabase/storage-js' + +// Initialize standalone vector client +const vectorClient = new StorageVectorsClient('https://your-project.supabase.co/storage/v1', { + headers: { Authorization: 'Bearer YOUR_TOKEN' }, +}) + +// Use the same API as shown in Option 1 +await vectorClient.createBucket('embeddings-prod') +const bucket = vectorClient.from('embeddings-prod') +// ... rest of operations +``` + +> **When to use each approach:** +> +> - **Option 1**: When using other Supabase features (auth, database, realtime) +> - **Option 2**: When working with both file storage and vectors +> - **Option 3**: For dedicated vector-only applications without file storage + +### API Reference + +#### Client Initialization + +```typescript +const vectorClient = new StorageVectorsClient(url, options?) +``` + +**Options:** + +- `headers?: Record` - Custom HTTP headers (e.g., Authorization) +- `fetch?: Fetch` - Custom fetch implementation + +#### Vector Buckets + +Vector buckets are top-level containers for organizing vector indexes. + +##### Create Bucket + +```typescript +const { data, error } = await vectorClient.createBucket('my-bucket') +``` + +##### Get Bucket + +```typescript +const { data, error } = await vectorClient.getBucket('my-bucket') +console.log('Created at:', new Date(data.vectorBucket.creationTime! * 1000)) +``` + +##### List Buckets + +```typescript +const { data, error } = await vectorClient.listBuckets({ + prefix: 'prod-', + maxResults: 100, +}) + +// Pagination +if (data?.nextToken) { + const next = await vectorClient.listBuckets({ nextToken: data.nextToken }) +} +``` + +##### Delete Bucket + +```typescript +// Bucket must be empty (all indexes deleted first) +const { error } = await vectorClient.deleteBucket('my-bucket') +``` + +#### Vector Indexes + +Vector indexes define the schema for embeddings including dimension and distance metric. + +##### Create Index + +```typescript +const bucket = vectorClient.from('my-bucket') + +await bucket.createIndex({ + indexName: 'my-index', + dataType: 'float32', + dimension: 1536, + distanceMetric: 'cosine', // 'cosine' | 'euclidean' | 'dotproduct' + metadataConfiguration: { + nonFilterableMetadataKeys: ['raw_text', 'internal_id'], + }, +}) +``` + +**Distance Metrics:** + +- `cosine` - Cosine similarity (normalized dot product) +- `euclidean` - Euclidean distance (L2 norm) +- `dotproduct` - Dot product similarity + +##### Get Index + +```typescript +const { data, error } = await bucket.getIndex('my-index') +console.log('Dimension:', data?.index.dimension) +console.log('Distance metric:', data?.index.distanceMetric) +``` + +##### List Indexes + +```typescript +const { data, error } = await bucket.listIndexes({ + prefix: 'documents-', + maxResults: 100, +}) +``` + +##### Delete Index + +```typescript +// Deletes index and all its vectors +await bucket.deleteIndex('my-index') +``` + +#### Vector Operations + +##### Insert/Update Vectors (Upsert) + +```typescript +const index = vectorClient.from('my-bucket').index('my-index') + +await index.putVectors({ + vectors: [ + { + key: 'unique-id-1', + data: { + float32: [ + /* 1536 numbers */ + ], + }, + metadata: { + title: 'Document Title', + category: 'technical', + page: 1, + }, + }, + // ... up to 500 vectors per request + ], +}) +``` + +**Limitations:** + +- 1-500 vectors per request +- Vectors must match index dimension +- Keys must be unique within index + +##### Get Vectors by Key + +```typescript +const { data, error } = await index.getVectors({ + keys: ['doc-1', 'doc-2', 'doc-3'], + returnData: true, // Include embeddings + returnMetadata: true, // Include metadata +}) + +data?.vectors.forEach((v) => { + console.log(v.key, v.metadata) +}) +``` + +##### Query Similar Vectors (ANN Search) + +```typescript +const { data, error } = await index.queryVectors({ + queryVector: { + float32: [ + /* 1536 numbers */ + ], + }, + topK: 10, + filter: { + category: 'technical', + published: true, + }, + returnDistance: true, + returnMetadata: true, +}) + +// Results ordered by similarity +data?.matches.forEach((match) => { + console.log(`${match.key}: distance=${match.distance}`) +}) +``` + +**Filter Syntax:** +The `filter` parameter accepts arbitrary JSON for metadata filtering. Non-filterable keys (configured at index creation) cannot be used in filters but can still be returned. + +##### List/Scan Vectors + +```typescript +// Simple pagination +let nextToken: string | undefined +do { + const { data } = await index.listVectors({ + maxResults: 500, + nextToken, + returnMetadata: true, + }) + + console.log('Batch:', data?.vectors.length) + nextToken = data?.nextToken +} while (nextToken) + +// Parallel scanning (4 workers) +const workers = [0, 1, 2, 3].map(async (segmentIndex) => { + const { data } = await index.listVectors({ + segmentCount: 4, + segmentIndex, + returnMetadata: true, + }) + return data?.vectors || [] +}) + +const results = await Promise.all(workers) +const allVectors = results.flat() +``` + +**Limitations:** + +- `maxResults`: 1-1000 (default: 500) +- `segmentCount`: 1-16 +- Response may be limited by 1MB size + +##### Delete Vectors + +```typescript +await index.deleteVectors({ + keys: ['doc-1', 'doc-2', 'doc-3'], + // ... up to 500 keys per request +}) +``` + +### Error Handling + +The library uses a consistent error handling pattern: + +```typescript +const { data, error } = await vectorClient.createBucket('my-bucket') + +if (error) { + console.error('Error:', error.message) + console.error('Status:', error.status) + console.error('Code:', error.statusCode) +} +``` + +#### Error Codes + +| Code | HTTP | Description | +| ---------------------------- | ---- | ----------------------- | +| `InternalError` | 500 | Internal server error | +| `S3VectorConflictException` | 409 | Resource already exists | +| `S3VectorNotFoundException` | 404 | Resource not found | +| `S3VectorBucketNotEmpty` | 400 | Bucket contains indexes | +| `S3VectorMaxBucketsExceeded` | 400 | Bucket quota exceeded | +| `S3VectorMaxIndexesExceeded` | 400 | Index quota exceeded | + +#### Throwing Errors + +You can configure the client to throw errors instead: + +```typescript +const vectorClient = new StorageVectorsClient(url, options) +vectorClient.throwOnError() + +try { + const { data } = await vectorClient.createBucket('my-bucket') + // data is guaranteed to be present +} catch (error) { + if (error instanceof StorageVectorsApiError) { + console.error('API Error:', error.statusCode) + } +} +``` + +### Advanced Usage + +#### Scoped Clients + +Create scoped clients for cleaner code: + +```typescript +// Bucket-scoped operations +const bucket = vectorClient.from('embeddings-prod') +await bucket.createIndex({ + /* ... */ +}) +await bucket.listIndexes() + +// Index-scoped operations +const index = bucket.index('documents-openai') +await index.putVectors({ + /* ... */ +}) +await index.queryVectors({ + /* ... */ +}) +``` + +#### Custom Fetch + +Provide a custom fetch implementation: + +```typescript +import { StorageVectorsClient } from '@supabase/storage-js' + +const vectorClient = new StorageVectorsClient(url, { + fetch: customFetch, + headers: { + /* ... */ + }, +}) +``` + +#### Batch Processing + +Process large datasets in batches: + +```typescript +async function insertLargeDataset(vectors: VectorObject[]) { + const batchSize = 500 + + for (let i = 0; i < vectors.length; i += batchSize) { + const batch = vectors.slice(i, i + batchSize) + await index.putVectors({ vectors: batch }) + console.log(`Inserted ${i + batch.length}/${vectors.length}`) + } +} +``` + +#### Float32 Validation + +Ensure vectors are properly normalized to float32: + +```typescript +import { normalizeToFloat32 } from '@supabase/storage-js' + +const vector = normalizeToFloat32([0.1, 0.2, 0.3 /* ... */]) +``` + +### Type Definitions + +The library exports comprehensive TypeScript types: + +```typescript +import type { + VectorBucket, + VectorIndex, + VectorData, + VectorObject, + VectorMatch, + VectorMetadata, + DistanceMetric, + ApiResponse, + StorageVectorsError, +} from '@supabase/storage-js' +``` + ## Development This package is part of the [Supabase JavaScript monorepo](https://github.com/supabase/supabase-js). To work on this package: @@ -423,10 +1316,4 @@ The test infrastructure (`infra/docker-compose.yml`) includes: We welcome contributions! Please see our [Contributing Guide](../../../CONTRIBUTING.md) for details on how to get started. -For major changes or if you're unsure about something, please open an issue first to discuss your proposed changes. - -## Sponsors - -We are building the features of Firebase using enterprise-grade, open source products. We support existing communities wherever possible, and if the products don’t exist we build them and open source them ourselves. Thanks to these sponsors who are making the OSS ecosystem better for everyone. - -[![New Sponsor](https://user-images.githubusercontent.com/10214025/90518111-e74bbb00-e198-11ea-8f88-c9e3c1aa4b5b.png)](https://github.com/sponsors/supabase) +For major changes or if you're unsure about something, please open an issue first to discuss your proposed changes. \ No newline at end of file diff --git a/packages/core/storage-js/src/StorageClient.ts b/packages/core/storage-js/src/StorageClient.ts index 1e7a421e7..25da7fc07 100644 --- a/packages/core/storage-js/src/StorageClient.ts +++ b/packages/core/storage-js/src/StorageClient.ts @@ -1,6 +1,8 @@ import StorageFileApi from './packages/StorageFileApi' import StorageBucketApi from './packages/StorageBucketApi' +import StorageAnalyticsApi from './packages/StorageAnalyticsApi' import { Fetch } from './lib/fetch' +import { StorageVectorsClient } from './lib/vectors' export interface StorageClientOptions { useNewHostname?: boolean @@ -24,4 +26,39 @@ export class StorageClient extends StorageBucketApi { from(id: string): StorageFileApi { return new StorageFileApi(this.url, this.headers, id, this.fetch) } + + /** + * Access vector storage operations. + * + * @returns A StorageVectorsClient instance configured with the current storage settings. + */ + get vectors(): StorageVectorsClient { + return new StorageVectorsClient(this.url + '/vector', { + headers: this.headers, + fetch: this.fetch, + }) + } + + /** + * Access analytics storage operations using Iceberg tables. + * + * @returns A StorageAnalyticsApi instance configured with the current storage settings. + * @example + * ```typescript + * const client = createClient(url, key) + * const analytics = client.storage.analytics + * + * // Create an analytics bucket + * await analytics.createBucket('my-analytics-bucket') + * + * // List all analytics buckets + * const { data: buckets } = await analytics.listBuckets() + * + * // Delete an analytics bucket + * await analytics.deleteBucket('old-analytics-bucket') + * ``` + */ + get analytics(): StorageAnalyticsApi { + return new StorageAnalyticsApi(this.url + '/iceberg', this.headers, this.fetch) + } } diff --git a/packages/core/storage-js/src/index.ts b/packages/core/storage-js/src/index.ts index b7d3d8caa..15794f0e4 100644 --- a/packages/core/storage-js/src/index.ts +++ b/packages/core/storage-js/src/index.ts @@ -1,4 +1,6 @@ export { StorageClient } from './StorageClient' export type { StorageClientOptions } from './StorageClient' +export { default as StorageAnalyticsApi } from './packages/StorageAnalyticsApi' export * from './lib/types' export * from './lib/errors' +export * from './lib/vectors' diff --git a/packages/core/storage-js/src/lib/constants.ts b/packages/core/storage-js/src/lib/constants.ts index dc81a349c..6c8b41ae1 100644 --- a/packages/core/storage-js/src/lib/constants.ts +++ b/packages/core/storage-js/src/lib/constants.ts @@ -1,2 +1,4 @@ import { version } from './version' -export const DEFAULT_HEADERS = { 'X-Client-Info': `storage-js/${version}` } +export const DEFAULT_HEADERS = { + 'X-Client-Info': `storage-js/${version}`, +} diff --git a/packages/core/storage-js/src/lib/index.ts b/packages/core/storage-js/src/lib/index.ts index fb0c99760..98399fae5 100644 --- a/packages/core/storage-js/src/lib/index.ts +++ b/packages/core/storage-js/src/lib/index.ts @@ -2,3 +2,4 @@ export * from '../packages/StorageBucketApi' export * from '../packages/StorageFileApi' export * from './types' export * from './constants' +export * from './vectors' diff --git a/packages/core/storage-js/src/lib/types.ts b/packages/core/storage-js/src/lib/types.ts index f692c5aa9..99a2bd8fc 100644 --- a/packages/core/storage-js/src/lib/types.ts +++ b/packages/core/storage-js/src/lib/types.ts @@ -1,5 +1,10 @@ import { StorageError } from './errors' +/** + * Type of storage bucket + * - STANDARD: Regular file storage buckets + * - ANALYTICS: Iceberg table-based buckets for analytical workloads + */ export type BucketType = 'STANDARD' | 'ANALYTICS' export interface Bucket { @@ -14,6 +19,23 @@ export interface Bucket { public: boolean } +/** + * Represents an Analytics Bucket using Apache Iceberg table format. + * Analytics buckets are optimized for analytical queries and data processing. + */ +export interface AnalyticBucket { + /** Unique identifier for the bucket */ + id: string + /** Bucket type - always 'ANALYTICS' for analytics buckets */ + type: 'ANALYTICS' + /** Storage format used (e.g., 'iceberg') */ + format: string + /** ISO 8601 timestamp of bucket creation */ + created_at: string + /** ISO 8601 timestamp of last update */ + updated_at: string +} + export interface FileObject { name: string bucket_id: string diff --git a/packages/core/storage-js/src/lib/vectors/StorageVectorsClient.ts b/packages/core/storage-js/src/lib/vectors/StorageVectorsClient.ts new file mode 100644 index 000000000..9ce309b50 --- /dev/null +++ b/packages/core/storage-js/src/lib/vectors/StorageVectorsClient.ts @@ -0,0 +1,405 @@ +import VectorIndexApi, { CreateIndexOptions } from './VectorIndexApi' +import VectorDataApi from './VectorDataApi' +import { Fetch } from './fetch' +import VectorBucketApi from './VectorBucketApi' +import { + DeleteVectorsOptions, + GetVectorsOptions, + ListIndexesOptions, + ListVectorsOptions, + PutVectorsOptions, + QueryVectorsOptions, +} from './types' + +/** + * Configuration options for the Storage Vectors client + */ +export interface StorageVectorsClientOptions { + /** + * Custom headers to include in all requests + */ + headers?: { [key: string]: string } + /** + * Custom fetch implementation (optional) + * Useful for testing or custom request handling + */ + fetch?: Fetch +} + +/** + * Main client for interacting with S3 Vectors API + * Provides access to bucket, index, and vector data operations + * + * **Usage Patterns:** + * + * 1. **Via StorageClient (recommended for most use cases):** + * ```typescript + * import { StorageClient } from '@supabase/storage-js' + * + * const storageClient = new StorageClient(url, headers) + * const vectors = storageClient.vectors + * + * // Use vector operations + * await vectors.createBucket('embeddings-prod') + * const bucket = vectors.from('embeddings-prod') + * await bucket.createIndex({ ... }) + * ``` + * + * 2. **Standalone (for vector-only applications):** + * ```typescript + * import { StorageVectorsClient } from '@supabase/storage-js' + * + * const vectorsClient = new StorageVectorsClient('https://api.example.com', { + * headers: { 'Authorization': 'Bearer token' } + * }) + * + * // Access bucket operations + * await vectorsClient.createBucket('embeddings-prod') + * + * // Access index operations via buckets + * const bucket = vectorsClient.from('embeddings-prod') + * await bucket.createIndex({ + * indexName: 'documents', + * dataType: 'float32', + * dimension: 1536, + * distanceMetric: 'cosine' + * }) + * + * // Access vector operations via index + * const index = bucket.index('documents') + * await index.putVectors({ + * vectors: [ + * { key: 'doc-1', data: { float32: [...] }, metadata: { title: 'Intro' } } + * ] + * }) + * + * // Query similar vectors + * const { data } = await index.queryVectors({ + * queryVector: { float32: [...] }, + * topK: 5, + * returnDistance: true + * }) + * ``` + */ +export class StorageVectorsClient extends VectorBucketApi { + constructor(url: string, options: StorageVectorsClientOptions = {}) { + super(url, options.headers || {}, options.fetch) + } + + /** + * Access operations for a specific vector bucket + * Returns a scoped client for index and vector operations within the bucket + * + * @param vectorBucketName - Name of the vector bucket + * @returns Bucket-scoped client with index and vector operations + * + * @example + * ```typescript + * const bucket = client.bucket('embeddings-prod') + * + * // Create an index in this bucket + * await bucket.createIndex({ + * indexName: 'documents-openai', + * dataType: 'float32', + * dimension: 1536, + * distanceMetric: 'cosine' + * }) + * + * // List indexes in this bucket + * const { data } = await bucket.listIndexes() + * ``` + */ + from(vectorBucketName: string): VectorBucketScope { + return new VectorBucketScope(this.url, this.headers, vectorBucketName, this.fetch) + } +} + +/** + * Scoped client for operations within a specific vector bucket + * Provides index management and access to vector operations + */ +export class VectorBucketScope extends VectorIndexApi { + private vectorBucketName: string + + constructor( + url: string, + headers: { [key: string]: string }, + vectorBucketName: string, + fetch?: Fetch + ) { + super(url, headers, fetch) + this.vectorBucketName = vectorBucketName + } + + /** + * Creates a new vector index in this bucket + * Convenience method that automatically includes the bucket name + * + * @param options - Index configuration (vectorBucketName is automatically set) + * @returns Promise with empty response on success or error + * + * @example + * ```typescript + * const bucket = client.bucket('embeddings-prod') + * await bucket.createIndex({ + * indexName: 'documents-openai', + * dataType: 'float32', + * dimension: 1536, + * distanceMetric: 'cosine', + * metadataConfiguration: { + * nonFilterableMetadataKeys: ['raw_text'] + * } + * }) + * ``` + */ + override async createIndex(options: Omit) { + return super.createIndex({ + ...options, + vectorBucketName: this.vectorBucketName, + }) + } + + /** + * Lists indexes in this bucket + * Convenience method that automatically includes the bucket name + * + * @param options - Listing options (vectorBucketName is automatically set) + * @returns Promise with list of indexes or error + * + * @example + * ```typescript + * const bucket = client.bucket('embeddings-prod') + * const { data } = await bucket.listIndexes({ prefix: 'documents-' }) + * ``` + */ + override async listIndexes(options: Omit = {}) { + return super.listIndexes({ + ...options, + vectorBucketName: this.vectorBucketName, + }) + } + + /** + * Retrieves metadata for a specific index in this bucket + * Convenience method that automatically includes the bucket name + * + * @param indexName - Name of the index to retrieve + * @returns Promise with index metadata or error + * + * @example + * ```typescript + * const bucket = client.bucket('embeddings-prod') + * const { data } = await bucket.getIndex('documents-openai') + * console.log('Dimension:', data?.index.dimension) + * ``` + */ + override async getIndex(indexName: string) { + return super.getIndex(this.vectorBucketName, indexName) + } + + /** + * Deletes an index from this bucket + * Convenience method that automatically includes the bucket name + * + * @param indexName - Name of the index to delete + * @returns Promise with empty response on success or error + * + * @example + * ```typescript + * const bucket = client.bucket('embeddings-prod') + * await bucket.deleteIndex('old-index') + * ``` + */ + override async deleteIndex(indexName: string) { + return super.deleteIndex(this.vectorBucketName, indexName) + } + + /** + * Access operations for a specific index within this bucket + * Returns a scoped client for vector data operations + * + * @param indexName - Name of the index + * @returns Index-scoped client with vector data operations + * + * @example + * ```typescript + * const index = client.bucket('embeddings-prod').index('documents-openai') + * + * // Insert vectors + * await index.putVectors({ + * vectors: [ + * { key: 'doc-1', data: { float32: [...] }, metadata: { title: 'Intro' } } + * ] + * }) + * + * // Query similar vectors + * const { data } = await index.queryVectors({ + * queryVector: { float32: [...] }, + * topK: 5 + * }) + * ``` + */ + index(indexName: string): VectorIndexScope { + return new VectorIndexScope( + this.url, + this.headers, + this.vectorBucketName, + indexName, + this.fetch + ) + } +} + +/** + * Scoped client for operations within a specific vector index + * Provides vector data operations (put, get, list, query, delete) + */ +export class VectorIndexScope extends VectorDataApi { + private vectorBucketName: string + private indexName: string + + constructor( + url: string, + headers: { [key: string]: string }, + vectorBucketName: string, + indexName: string, + fetch?: Fetch + ) { + super(url, headers, fetch) + this.vectorBucketName = vectorBucketName + this.indexName = indexName + } + + /** + * Inserts or updates vectors in this index + * Convenience method that automatically includes bucket and index names + * + * @param options - Vector insertion options (bucket and index names automatically set) + * @returns Promise with empty response on success or error + * + * @example + * ```typescript + * const index = client.bucket('embeddings-prod').index('documents-openai') + * await index.putVectors({ + * vectors: [ + * { + * key: 'doc-1', + * data: { float32: [0.1, 0.2, ...] }, + * metadata: { title: 'Introduction', page: 1 } + * } + * ] + * }) + * ``` + */ + override async putVectors(options: Omit) { + return super.putVectors({ + ...options, + vectorBucketName: this.vectorBucketName, + indexName: this.indexName, + }) + } + + /** + * Retrieves vectors by keys from this index + * Convenience method that automatically includes bucket and index names + * + * @param options - Vector retrieval options (bucket and index names automatically set) + * @returns Promise with array of vectors or error + * + * @example + * ```typescript + * const index = client.bucket('embeddings-prod').index('documents-openai') + * const { data } = await index.getVectors({ + * keys: ['doc-1', 'doc-2'], + * returnMetadata: true + * }) + * ``` + */ + override async getVectors(options: Omit) { + return super.getVectors({ + ...options, + vectorBucketName: this.vectorBucketName, + indexName: this.indexName, + }) + } + + /** + * Lists vectors in this index with pagination + * Convenience method that automatically includes bucket and index names + * + * @param options - Listing options (bucket and index names automatically set) + * @returns Promise with array of vectors and pagination token + * + * @example + * ```typescript + * const index = client.bucket('embeddings-prod').index('documents-openai') + * const { data } = await index.listVectors({ + * maxResults: 500, + * returnMetadata: true + * }) + * ``` + */ + override async listVectors( + options: Omit = {} + ) { + return super.listVectors({ + ...options, + vectorBucketName: this.vectorBucketName, + indexName: this.indexName, + }) + } + + /** + * Queries for similar vectors in this index + * Convenience method that automatically includes bucket and index names + * + * @param options - Query options (bucket and index names automatically set) + * @returns Promise with array of similar vectors ordered by distance + * + * @example + * ```typescript + * const index = client.bucket('embeddings-prod').index('documents-openai') + * const { data } = await index.queryVectors({ + * queryVector: { float32: [0.1, 0.2, ...] }, + * topK: 5, + * filter: { category: 'technical' }, + * returnDistance: true, + * returnMetadata: true + * }) + * ``` + */ + override async queryVectors( + options: Omit + ) { + return super.queryVectors({ + ...options, + vectorBucketName: this.vectorBucketName, + indexName: this.indexName, + }) + } + + /** + * Deletes vectors by keys from this index + * Convenience method that automatically includes bucket and index names + * + * @param options - Deletion options (bucket and index names automatically set) + * @returns Promise with empty response on success or error + * + * @example + * ```typescript + * const index = client.bucket('embeddings-prod').index('documents-openai') + * await index.deleteVectors({ + * keys: ['doc-1', 'doc-2', 'doc-3'] + * }) + * ``` + */ + override async deleteVectors( + options: Omit + ) { + return super.deleteVectors({ + ...options, + vectorBucketName: this.vectorBucketName, + indexName: this.indexName, + }) + } +} diff --git a/packages/core/storage-js/src/lib/vectors/VectorBucketApi.ts b/packages/core/storage-js/src/lib/vectors/VectorBucketApi.ts new file mode 100644 index 000000000..c4a49d2ce --- /dev/null +++ b/packages/core/storage-js/src/lib/vectors/VectorBucketApi.ts @@ -0,0 +1,217 @@ +import { DEFAULT_HEADERS } from './constants' +import { isStorageVectorsError } from './errors' +import { Fetch, post } from './fetch' +import { resolveFetch } from './helpers' +import { + ApiResponse, + VectorBucket, + ListVectorBucketsOptions, + ListVectorBucketsResponse, +} from './types' + +/** + * API class for managing Vector Buckets + * Provides methods for creating, reading, listing, and deleting vector buckets + */ +export default class VectorBucketApi { + protected url: string + protected headers: { [key: string]: string } + protected fetch: Fetch + protected shouldThrowOnError = false + + /** + * Creates a new VectorBucketApi instance + * @param url - The base URL for the storage vectors API + * @param headers - HTTP headers to include in requests + * @param fetch - Optional custom fetch implementation + */ + constructor(url: string, headers: { [key: string]: string } = {}, fetch?: Fetch) { + this.url = url.replace(/\/$/, '') + this.headers = { ...DEFAULT_HEADERS, ...headers } + this.fetch = resolveFetch(fetch) + } + + /** + * Enable throwing errors instead of returning them in the response + * When enabled, failed operations will throw instead of returning { data: null, error } + * + * @returns This instance for method chaining + * @example + * ```typescript + * const client = new VectorBucketApi(url, headers) + * client.throwOnError() + * const { data } = await client.createBucket('my-bucket') // throws on error + * ``` + */ + public throwOnError(): this { + this.shouldThrowOnError = true + return this + } + + /** + * Creates a new vector bucket + * Vector buckets are containers for vector indexes and their data + * + * @param vectorBucketName - Unique name for the vector bucket + * @returns Promise with empty response on success or error + * + * @throws {StorageVectorsApiError} With code: + * - `S3VectorConflictException` if bucket already exists (HTTP 409) + * - `S3VectorMaxBucketsExceeded` if quota exceeded (HTTP 400) + * - `InternalError` for server errors (HTTP 500) + * + * @example + * ```typescript + * const { data, error } = await client.createBucket('embeddings-prod') + * if (error) { + * console.error('Failed to create bucket:', error.message) + * } + * ``` + */ + async createBucket(vectorBucketName: string): Promise> { + try { + const data = await post( + this.fetch, + `${this.url}/CreateVectorBucket`, + { vectorBucketName }, + { headers: this.headers } + ) + return { data: data || {}, error: null } + } catch (error) { + if (this.shouldThrowOnError) { + throw error + } + if (isStorageVectorsError(error)) { + return { data: null, error } + } + throw error + } + } + + /** + * Retrieves metadata for a specific vector bucket + * Returns bucket configuration including encryption settings and creation time + * + * @param vectorBucketName - Name of the vector bucket to retrieve + * @returns Promise with bucket metadata or error + * + * @throws {StorageVectorsApiError} With code: + * - `S3VectorNotFoundException` if bucket doesn't exist (HTTP 404) + * - `InternalError` for server errors (HTTP 500) + * + * @example + * ```typescript + * const { data, error } = await client.getBucket('embeddings-prod') + * if (data) { + * console.log('Bucket created at:', new Date(data.vectorBucket.creationTime! * 1000)) + * } + * ``` + */ + async getBucket( + vectorBucketName: string + ): Promise> { + try { + const data = await post( + this.fetch, + `${this.url}/GetVectorBucket`, + { vectorBucketName }, + { headers: this.headers } + ) + return { data, error: null } + } catch (error) { + if (this.shouldThrowOnError) { + throw error + } + if (isStorageVectorsError(error)) { + return { data: null, error } + } + throw error + } + } + + /** + * Lists vector buckets with optional filtering and pagination + * Supports prefix-based filtering and paginated results + * + * @param options - Listing options + * @param options.prefix - Filter buckets by name prefix + * @param options.maxResults - Maximum results per page (default: 100) + * @param options.nextToken - Pagination token from previous response + * @returns Promise with list of buckets and pagination token + * + * @throws {StorageVectorsApiError} With code: + * - `InternalError` for server errors (HTTP 500) + * + * @example + * ```typescript + * // List all buckets with prefix 'prod-' + * const { data, error } = await client.listBuckets({ prefix: 'prod-' }) + * if (data) { + * console.log('Found buckets:', data.buckets.length) + * // Fetch next page if available + * if (data.nextToken) { + * const next = await client.listBuckets({ nextToken: data.nextToken }) + * } + * } + * ``` + */ + async listBuckets( + options: ListVectorBucketsOptions = {} + ): Promise> { + try { + const data = await post(this.fetch, `${this.url}/ListVectorBuckets`, options, { + headers: this.headers, + }) + return { data, error: null } + } catch (error) { + if (this.shouldThrowOnError) { + throw error + } + if (isStorageVectorsError(error)) { + return { data: null, error } + } + throw error + } + } + + /** + * Deletes a vector bucket + * Bucket must be empty before deletion (all indexes must be removed first) + * + * @param vectorBucketName - Name of the vector bucket to delete + * @returns Promise with empty response on success or error + * + * @throws {StorageVectorsApiError} With code: + * - `S3VectorBucketNotEmpty` if bucket contains indexes (HTTP 400) + * - `S3VectorNotFoundException` if bucket doesn't exist (HTTP 404) + * - `InternalError` for server errors (HTTP 500) + * + * @example + * ```typescript + * // Delete all indexes first, then delete bucket + * const { error } = await client.deleteBucket('old-bucket') + * if (error?.statusCode === 'S3VectorBucketNotEmpty') { + * console.error('Must delete all indexes first') + * } + * ``` + */ + async deleteBucket(vectorBucketName: string): Promise> { + try { + const data = await post( + this.fetch, + `${this.url}/DeleteVectorBucket`, + { vectorBucketName }, + { headers: this.headers } + ) + return { data: data || {}, error: null } + } catch (error) { + if (this.shouldThrowOnError) { + throw error + } + if (isStorageVectorsError(error)) { + return { data: null, error } + } + throw error + } + } +} diff --git a/packages/core/storage-js/src/lib/vectors/VectorDataApi.ts b/packages/core/storage-js/src/lib/vectors/VectorDataApi.ts new file mode 100644 index 000000000..99d6f33e3 --- /dev/null +++ b/packages/core/storage-js/src/lib/vectors/VectorDataApi.ts @@ -0,0 +1,341 @@ +import { DEFAULT_HEADERS } from './constants' +import { isStorageVectorsError } from './errors' +import { Fetch, post } from './fetch' +import { resolveFetch } from './helpers' +import { + ApiResponse, + PutVectorsOptions, + GetVectorsOptions, + GetVectorsResponse, + DeleteVectorsOptions, + ListVectorsOptions, + ListVectorsResponse, + QueryVectorsOptions, + QueryVectorsResponse, +} from './types' + +/** + * API class for managing Vector Data within Vector Indexes + * Provides methods for inserting, querying, listing, and deleting vector embeddings + */ +export default class VectorDataApi { + protected url: string + protected headers: { [key: string]: string } + protected fetch: Fetch + protected shouldThrowOnError = false + + constructor(url: string, headers: { [key: string]: string } = {}, fetch?: Fetch) { + this.url = url.replace(/\/$/, '') + this.headers = { ...DEFAULT_HEADERS, ...headers } + this.fetch = resolveFetch(fetch) + } + + /** + * Enable throwing errors instead of returning them in the response + * When enabled, failed operations will throw instead of returning { data: null, error } + * + * @returns This instance for method chaining + * @example + * ```typescript + * const client = new VectorDataApi(url, headers) + * client.throwOnError() + * const { data } = await client.putVectors(options) // throws on error + * ``` + */ + public throwOnError(): this { + this.shouldThrowOnError = true + return this + } + + /** + * Inserts or updates vectors in batch (upsert operation) + * Accepts 1-500 vectors per request. Larger batches should be split + * + * @param options - Vector insertion options + * @param options.vectorBucketName - Name of the parent vector bucket + * @param options.indexName - Name of the target index + * @param options.vectors - Array of vectors to insert/update (1-500 items) + * @returns Promise with empty response on success or error + * + * @throws {StorageVectorsApiError} With code: + * - `S3VectorConflictException` if duplicate key conflict occurs (HTTP 409) + * - `S3VectorNotFoundException` if bucket or index doesn't exist (HTTP 404) + * - `InternalError` for server errors (HTTP 500) + * + * @example + * ```typescript + * const { data, error } = await client.putVectors({ + * vectorBucketName: 'embeddings-prod', + * indexName: 'documents-openai-small', + * vectors: [ + * { + * key: 'doc-1', + * data: { float32: [0.1, 0.2, 0.3, ...] }, // 1536 dimensions + * metadata: { title: 'Introduction', page: 1 } + * }, + * { + * key: 'doc-2', + * data: { float32: [0.4, 0.5, 0.6, ...] }, + * metadata: { title: 'Conclusion', page: 42 } + * } + * ] + * }) + * ``` + */ + async putVectors(options: PutVectorsOptions): Promise> { + try { + // Validate batch size + if (options.vectors.length < 1 || options.vectors.length > 500) { + throw new Error('Vector batch size must be between 1 and 500 items') + } + + const data = await post(this.fetch, `${this.url}/PutVectors`, options, { + headers: this.headers, + }) + return { data: data || {}, error: null } + } catch (error) { + if (this.shouldThrowOnError) { + throw error + } + if (isStorageVectorsError(error)) { + return { data: null, error } + } + throw error + } + } + + /** + * Retrieves vectors by their keys in batch + * Optionally includes vector data and/or metadata in response + * Additional permissions required when returning data or metadata + * + * @param options - Vector retrieval options + * @param options.vectorBucketName - Name of the parent vector bucket + * @param options.indexName - Name of the index + * @param options.keys - Array of vector keys to retrieve + * @param options.returnData - Whether to include vector embeddings (requires permission) + * @param options.returnMetadata - Whether to include metadata (requires permission) + * @returns Promise with array of vectors or error + * + * @throws {StorageVectorsApiError} With code: + * - `S3VectorNotFoundException` if bucket or index doesn't exist (HTTP 404) + * - `InternalError` for server errors (HTTP 500) + * + * @example + * ```typescript + * const { data, error } = await client.getVectors({ + * vectorBucketName: 'embeddings-prod', + * indexName: 'documents-openai-small', + * keys: ['doc-1', 'doc-2', 'doc-3'], + * returnData: false, // Don't return embeddings + * returnMetadata: true // Return metadata only + * }) + * if (data) { + * data.vectors.forEach(v => console.log(v.key, v.metadata)) + * } + * ``` + */ + async getVectors(options: GetVectorsOptions): Promise> { + try { + const data = await post(this.fetch, `${this.url}/GetVectors`, options, { + headers: this.headers, + }) + return { data, error: null } + } catch (error) { + if (this.shouldThrowOnError) { + throw error + } + if (isStorageVectorsError(error)) { + return { data: null, error } + } + throw error + } + } + + /** + * Lists/scans vectors in an index with pagination + * Supports parallel scanning via segment configuration for high-throughput scenarios + * Additional permissions required when returning data or metadata + * + * @param options - Vector listing options + * @param options.vectorBucketName - Name of the parent vector bucket + * @param options.indexName - Name of the index + * @param options.maxResults - Maximum results per page (default: 500, max: 1000) + * @param options.nextToken - Pagination token from previous response + * @param options.returnData - Whether to include vector embeddings (requires permission) + * @param options.returnMetadata - Whether to include metadata (requires permission) + * @param options.segmentCount - Total parallel segments (1-16) for distributed scanning + * @param options.segmentIndex - Zero-based segment index (0 to segmentCount-1) + * @returns Promise with array of vectors, pagination token, or error + * + * @throws {StorageVectorsApiError} With code: + * - `S3VectorNotFoundException` if bucket or index doesn't exist (HTTP 404) + * - `InternalError` for server errors (HTTP 500) + * + * @example + * ```typescript + * // Simple pagination + * let nextToken: string | undefined + * do { + * const { data, error } = await client.listVectors({ + * vectorBucketName: 'embeddings-prod', + * indexName: 'documents-openai-small', + * maxResults: 500, + * nextToken, + * returnMetadata: true + * }) + * if (error) break + * console.log('Batch:', data.vectors.length) + * nextToken = data.nextToken + * } while (nextToken) + * + * // Parallel scanning (4 concurrent workers) + * const workers = [0, 1, 2, 3].map(async (segmentIndex) => { + * const { data } = await client.listVectors({ + * vectorBucketName: 'embeddings-prod', + * indexName: 'documents-openai-small', + * segmentCount: 4, + * segmentIndex, + * returnMetadata: true + * }) + * return data?.vectors || [] + * }) + * const results = await Promise.all(workers) + * ``` + */ + async listVectors(options: ListVectorsOptions): Promise> { + try { + // Validate segment configuration + if (options.segmentCount !== undefined) { + if (options.segmentCount < 1 || options.segmentCount > 16) { + throw new Error('segmentCount must be between 1 and 16') + } + if (options.segmentIndex !== undefined) { + if (options.segmentIndex < 0 || options.segmentIndex >= options.segmentCount) { + throw new Error(`segmentIndex must be between 0 and ${options.segmentCount - 1}`) + } + } + } + + const data = await post(this.fetch, `${this.url}/ListVectors`, options, { + headers: this.headers, + }) + return { data, error: null } + } catch (error) { + if (this.shouldThrowOnError) { + throw error + } + if (isStorageVectorsError(error)) { + return { data: null, error } + } + throw error + } + } + + /** + * Queries for similar vectors using approximate nearest neighbor (ANN) search + * Returns top-K most similar vectors based on the configured distance metric + * Supports optional metadata filtering (requires GetVectors permission) + * + * @param options - Query options + * @param options.vectorBucketName - Name of the parent vector bucket + * @param options.indexName - Name of the index + * @param options.queryVector - Query embedding to find similar vectors + * @param options.topK - Number of nearest neighbors to return (default: 10) + * @param options.filter - Optional JSON filter for metadata (requires GetVectors permission) + * @param options.returnDistance - Whether to include similarity distances + * @param options.returnMetadata - Whether to include metadata (requires GetVectors permission) + * @returns Promise with array of similar vectors ordered by distance + * + * @throws {StorageVectorsApiError} With code: + * - `S3VectorNotFoundException` if bucket or index doesn't exist (HTTP 404) + * - `InternalError` for server errors (HTTP 500) + * + * @example + * ```typescript + * // Semantic search with filtering + * const { data, error } = await client.queryVectors({ + * vectorBucketName: 'embeddings-prod', + * indexName: 'documents-openai-small', + * queryVector: { float32: [0.1, 0.2, 0.3, ...] }, // 1536 dimensions + * topK: 5, + * filter: { + * category: 'technical', + * published: true + * }, + * returnDistance: true, + * returnMetadata: true + * }) + * if (data) { + * data.matches.forEach(match => { + * console.log(`${match.key}: distance=${match.distance}`) + * console.log('Metadata:', match.metadata) + * }) + * } + * ``` + */ + async queryVectors(options: QueryVectorsOptions): Promise> { + try { + const data = await post(this.fetch, `${this.url}/QueryVectors`, options, { + headers: this.headers, + }) + return { data, error: null } + } catch (error) { + if (this.shouldThrowOnError) { + throw error + } + if (isStorageVectorsError(error)) { + return { data: null, error } + } + throw error + } + } + + /** + * Deletes vectors by their keys in batch + * Accepts 1-500 keys per request + * + * @param options - Vector deletion options + * @param options.vectorBucketName - Name of the parent vector bucket + * @param options.indexName - Name of the index + * @param options.keys - Array of vector keys to delete (1-500 items) + * @returns Promise with empty response on success or error + * + * @throws {StorageVectorsApiError} With code: + * - `S3VectorNotFoundException` if bucket or index doesn't exist (HTTP 404) + * - `InternalError` for server errors (HTTP 500) + * + * @example + * ```typescript + * const { error } = await client.deleteVectors({ + * vectorBucketName: 'embeddings-prod', + * indexName: 'documents-openai-small', + * keys: ['doc-1', 'doc-2', 'doc-3'] + * }) + * if (!error) { + * console.log('Vectors deleted successfully') + * } + * ``` + */ + async deleteVectors(options: DeleteVectorsOptions): Promise> { + try { + // Validate batch size + if (options.keys.length < 1 || options.keys.length > 500) { + throw new Error('Keys batch size must be between 1 and 500 items') + } + + const data = await post(this.fetch, `${this.url}/DeleteVectors`, options, { + headers: this.headers, + }) + return { data: data || {}, error: null } + } catch (error) { + if (this.shouldThrowOnError) { + throw error + } + if (isStorageVectorsError(error)) { + return { data: null, error } + } + throw error + } + } +} diff --git a/packages/core/storage-js/src/lib/vectors/VectorIndexApi.ts b/packages/core/storage-js/src/lib/vectors/VectorIndexApi.ts new file mode 100644 index 000000000..8f9bca0f6 --- /dev/null +++ b/packages/core/storage-js/src/lib/vectors/VectorIndexApi.ts @@ -0,0 +1,245 @@ +import { DEFAULT_HEADERS } from './constants' +import { isStorageVectorsError } from './errors' +import { Fetch, post } from './fetch' +import { resolveFetch } from './helpers' +import { + ApiResponse, + VectorIndex, + ListIndexesOptions, + ListIndexesResponse, + VectorDataType, + DistanceMetric, + MetadataConfiguration, +} from './types' + +/** + * Options for creating a vector index + */ +export interface CreateIndexOptions { + vectorBucketName: string + indexName: string + dataType: VectorDataType + dimension: number + distanceMetric: DistanceMetric + metadataConfiguration?: MetadataConfiguration +} + +/** + * API class for managing Vector Indexes within Vector Buckets + * Provides methods for creating, reading, listing, and deleting vector indexes + */ +export default class VectorIndexApi { + protected url: string + protected headers: { [key: string]: string } + protected fetch: Fetch + protected shouldThrowOnError = false + + constructor(url: string, headers: { [key: string]: string } = {}, fetch?: Fetch) { + this.url = url.replace(/\/$/, '') + this.headers = { ...DEFAULT_HEADERS, ...headers } + this.fetch = resolveFetch(fetch) + } + + /** + * Enable throwing errors instead of returning them in the response + * When enabled, failed operations will throw instead of returning { data: null, error } + * + * @returns This instance for method chaining + * @example + * ```typescript + * const client = new VectorIndexApi(url, headers) + * client.throwOnError() + * const { data } = await client.createIndex(options) // throws on error + * ``` + */ + public throwOnError(): this { + this.shouldThrowOnError = true + return this + } + + /** + * Creates a new vector index within a bucket + * Defines the schema for vectors including dimensionality, distance metric, and metadata config + * + * @param options - Index configuration + * @param options.vectorBucketName - Name of the parent vector bucket + * @param options.indexName - Unique name for the index within the bucket + * @param options.dataType - Data type for vector components (currently only 'float32') + * @param options.dimension - Dimensionality of vectors (e.g., 384, 768, 1536) + * @param options.distanceMetric - Similarity metric ('cosine', 'euclidean', 'dotproduct') + * @param options.metadataConfiguration - Optional config for non-filterable metadata keys + * @returns Promise with empty response on success or error + * + * @throws {StorageVectorsApiError} With code: + * - `S3VectorConflictException` if index already exists (HTTP 409) + * - `S3VectorMaxIndexesExceeded` if quota exceeded (HTTP 400) + * - `S3VectorNotFoundException` if bucket doesn't exist (HTTP 404) + * - `InternalError` for server errors (HTTP 500) + * + * @example + * ```typescript + * const { data, error } = await client.createIndex({ + * vectorBucketName: 'embeddings-prod', + * indexName: 'documents-openai-small', + * dataType: 'float32', + * dimension: 1536, + * distanceMetric: 'cosine', + * metadataConfiguration: { + * nonFilterableMetadataKeys: ['raw_text', 'internal_id'] + * } + * }) + * ``` + */ + async createIndex(options: CreateIndexOptions): Promise> { + try { + const data = await post(this.fetch, `${this.url}/CreateIndex`, options, { + headers: this.headers, + }) + return { data: data || {}, error: null } + } catch (error) { + if (this.shouldThrowOnError) { + throw error + } + if (isStorageVectorsError(error)) { + return { data: null, error } + } + throw error + } + } + + /** + * Retrieves metadata for a specific vector index + * Returns index configuration including dimension, distance metric, and metadata settings + * + * @param vectorBucketName - Name of the parent vector bucket + * @param indexName - Name of the index to retrieve + * @returns Promise with index metadata or error + * + * @throws {StorageVectorsApiError} With code: + * - `S3VectorNotFoundException` if index or bucket doesn't exist (HTTP 404) + * - `InternalError` for server errors (HTTP 500) + * + * @example + * ```typescript + * const { data, error } = await client.getIndex('embeddings-prod', 'documents-openai-small') + * if (data) { + * console.log('Index dimension:', data.index.dimension) + * console.log('Distance metric:', data.index.distanceMetric) + * } + * ``` + */ + async getIndex( + vectorBucketName: string, + indexName: string + ): Promise> { + try { + const data = await post( + this.fetch, + `${this.url}/GetIndex`, + { vectorBucketName, indexName }, + { headers: this.headers } + ) + return { data, error: null } + } catch (error) { + if (this.shouldThrowOnError) { + throw error + } + if (isStorageVectorsError(error)) { + return { data: null, error } + } + throw error + } + } + + /** + * Lists vector indexes within a bucket with optional filtering and pagination + * Supports prefix-based filtering and paginated results + * + * @param options - Listing options + * @param options.vectorBucketName - Name of the parent vector bucket + * @param options.prefix - Filter indexes by name prefix + * @param options.maxResults - Maximum results per page (default: 100) + * @param options.nextToken - Pagination token from previous response + * @returns Promise with list of indexes and pagination token + * + * @throws {StorageVectorsApiError} With code: + * - `S3VectorNotFoundException` if bucket doesn't exist (HTTP 404) + * - `InternalError` for server errors (HTTP 500) + * + * @example + * ```typescript + * // List all indexes in a bucket + * const { data, error } = await client.listIndexes({ + * vectorBucketName: 'embeddings-prod', + * prefix: 'documents-' + * }) + * if (data) { + * console.log('Found indexes:', data.indexes.map(i => i.indexName)) + * // Fetch next page if available + * if (data.nextToken) { + * const next = await client.listIndexes({ + * vectorBucketName: 'embeddings-prod', + * nextToken: data.nextToken + * }) + * } + * } + * ``` + */ + async listIndexes(options: ListIndexesOptions): Promise> { + try { + const data = await post(this.fetch, `${this.url}/ListIndexes`, options, { + headers: this.headers, + }) + return { data, error: null } + } catch (error) { + if (this.shouldThrowOnError) { + throw error + } + if (isStorageVectorsError(error)) { + return { data: null, error } + } + throw error + } + } + + /** + * Deletes a vector index and all its data + * This operation removes the index schema and all vectors stored in the index + * + * @param vectorBucketName - Name of the parent vector bucket + * @param indexName - Name of the index to delete + * @returns Promise with empty response on success or error + * + * @throws {StorageVectorsApiError} With code: + * - `S3VectorNotFoundException` if index or bucket doesn't exist (HTTP 404) + * - `InternalError` for server errors (HTTP 500) + * + * @example + * ```typescript + * // Delete an index and all its vectors + * const { error } = await client.deleteIndex('embeddings-prod', 'old-index') + * if (!error) { + * console.log('Index deleted successfully') + * } + * ``` + */ + async deleteIndex(vectorBucketName: string, indexName: string): Promise> { + try { + const data = await post( + this.fetch, + `${this.url}/DeleteIndex`, + { vectorBucketName, indexName }, + { headers: this.headers } + ) + return { data: data || {}, error: null } + } catch (error) { + if (this.shouldThrowOnError) { + throw error + } + if (isStorageVectorsError(error)) { + return { data: null, error } + } + throw error + } + } +} diff --git a/packages/core/storage-js/src/lib/vectors/constants.ts b/packages/core/storage-js/src/lib/vectors/constants.ts new file mode 100644 index 000000000..7273379b9 --- /dev/null +++ b/packages/core/storage-js/src/lib/vectors/constants.ts @@ -0,0 +1,5 @@ +import { version } from '../version' +export const DEFAULT_HEADERS = { + 'X-Client-Info': `storage-js/${version}`, + 'Content-Type': 'application/json', +} diff --git a/packages/core/storage-js/src/lib/vectors/errors.ts b/packages/core/storage-js/src/lib/vectors/errors.ts new file mode 100644 index 000000000..bcaed5beb --- /dev/null +++ b/packages/core/storage-js/src/lib/vectors/errors.ts @@ -0,0 +1,78 @@ +/** + * Base error class for all Storage Vectors errors + */ +export class StorageVectorsError extends Error { + protected __isStorageVectorsError = true + + constructor(message: string) { + super(message) + this.name = 'StorageVectorsError' + } +} + +/** + * Type guard to check if an error is a StorageVectorsError + * @param error - The error to check + * @returns True if the error is a StorageVectorsError + */ +export function isStorageVectorsError(error: unknown): error is StorageVectorsError { + return typeof error === 'object' && error !== null && '__isStorageVectorsError' in error +} + +/** + * API error returned from S3 Vectors service + * Includes HTTP status code and service-specific error code + */ +export class StorageVectorsApiError extends StorageVectorsError { + status: number + statusCode: string + + constructor(message: string, status: number, statusCode: string) { + super(message) + this.name = 'StorageVectorsApiError' + this.status = status + this.statusCode = statusCode + } + + toJSON() { + return { + name: this.name, + message: this.message, + status: this.status, + statusCode: this.statusCode, + } + } +} + +/** + * Unknown error that doesn't match expected error patterns + * Wraps the original error for debugging + */ +export class StorageVectorsUnknownError extends StorageVectorsError { + originalError: unknown + + constructor(message: string, originalError: unknown) { + super(message) + this.name = 'StorageVectorsUnknownError' + this.originalError = originalError + } +} + +/** + * Error codes specific to S3 Vectors API + * Maps AWS service errors to application-friendly error codes + */ +export enum StorageVectorsErrorCode { + /** Internal server fault (HTTP 500) */ + InternalError = 'InternalError', + /** Resource already exists / conflict (HTTP 409) */ + S3VectorConflictException = 'S3VectorConflictException', + /** Resource not found (HTTP 404) */ + S3VectorNotFoundException = 'S3VectorNotFoundException', + /** Delete bucket while not empty (HTTP 400) */ + S3VectorBucketNotEmpty = 'S3VectorBucketNotEmpty', + /** Exceeds bucket quota/limit (HTTP 400) */ + S3VectorMaxBucketsExceeded = 'S3VectorMaxBucketsExceeded', + /** Exceeds index quota/limit (HTTP 400) */ + S3VectorMaxIndexesExceeded = 'S3VectorMaxIndexesExceeded', +} diff --git a/packages/core/storage-js/src/lib/vectors/fetch.ts b/packages/core/storage-js/src/lib/vectors/fetch.ts new file mode 100644 index 000000000..c18d2901f --- /dev/null +++ b/packages/core/storage-js/src/lib/vectors/fetch.ts @@ -0,0 +1,218 @@ +import { StorageVectorsApiError, StorageVectorsUnknownError } from './errors' +import { isPlainObject, resolveResponse } from './helpers' +import { VectorFetchParameters } from './types' + +export type Fetch = typeof fetch + +/** + * Options for fetch requests + * @property headers - Custom HTTP headers + * @property noResolveJson - If true, return raw Response instead of parsing JSON + */ +export interface FetchOptions { + headers?: { + [key: string]: string + } + noResolveJson?: boolean +} + +/** + * HTTP methods supported by the API + */ +export type RequestMethodType = 'GET' | 'POST' | 'PUT' | 'DELETE' + +/** + * Extracts error message from various error response formats + * @param err - Error object from API + * @returns Human-readable error message + */ +const _getErrorMessage = (err: any): string => + err.msg || err.message || err.error_description || err.error || JSON.stringify(err) + +/** + * Handles fetch errors and converts them to StorageVectors error types + * @param error - The error caught from fetch + * @param reject - Promise rejection function + * @param options - Fetch options that may affect error handling + */ +const handleError = async ( + error: unknown, + reject: (reason?: any) => void, + options?: FetchOptions +) => { + // Check if error is a Response-like object (has status and ok properties) + // This is more reliable than instanceof which can fail across realms + const isResponseLike = + error && + typeof error === 'object' && + 'status' in error && + 'ok' in error && + typeof (error as any).status === 'number' + + if (isResponseLike && !options?.noResolveJson) { + const status = (error as any).status || 500 + const responseError = error as any + + // Try to parse JSON body if available + if (typeof responseError.json === 'function') { + responseError + .json() + .then((err: any) => { + const statusCode = err?.statusCode || err?.code || status + '' + reject(new StorageVectorsApiError(_getErrorMessage(err), status, statusCode)) + }) + .catch(() => { + // If JSON parsing fails, create an ApiError with the HTTP status code + const statusCode = status + '' + const message = responseError.statusText || `HTTP ${status} error` + reject(new StorageVectorsApiError(message, status, statusCode)) + }) + } else { + // No json() method available, create error from status + const statusCode = status + '' + const message = responseError.statusText || `HTTP ${status} error` + reject(new StorageVectorsApiError(message, status, statusCode)) + } + } else { + reject(new StorageVectorsUnknownError(_getErrorMessage(error), error)) + } +} + +/** + * Builds request parameters for fetch calls + * @param method - HTTP method + * @param options - Custom fetch options + * @param parameters - Additional fetch parameters like AbortSignal + * @param body - Request body (will be JSON stringified if plain object) + * @returns Complete fetch request parameters + */ +const _getRequestParams = ( + method: RequestMethodType, + options?: FetchOptions, + parameters?: VectorFetchParameters, + body?: object +) => { + const params: { [k: string]: any } = { method, headers: options?.headers || {} } + + if (method === 'GET' || !body) { + return params + } + + if (isPlainObject(body)) { + params.headers = { 'Content-Type': 'application/json', ...options?.headers } + params.body = JSON.stringify(body) + } else { + params.body = body + } + + return { ...params, ...parameters } +} + +/** + * Internal request handler that wraps fetch with error handling + * @param fetcher - Fetch function to use + * @param method - HTTP method + * @param url - Request URL + * @param options - Custom fetch options + * @param parameters - Additional fetch parameters + * @param body - Request body + * @returns Promise with parsed response or error + */ +async function _handleRequest( + fetcher: Fetch, + method: RequestMethodType, + url: string, + options?: FetchOptions, + parameters?: VectorFetchParameters, + body?: object +): Promise { + return new Promise((resolve, reject) => { + fetcher(url, _getRequestParams(method, options, parameters, body)) + .then((result) => { + if (!result.ok) throw result + if (options?.noResolveJson) return result + // Handle empty responses (204, empty body) + const contentType = result.headers.get('content-type') + if (!contentType || !contentType.includes('application/json')) { + return {} + } + return result.json() + }) + .then((data) => resolve(data)) + .catch((error) => handleError(error, reject, options)) + }) +} + +/** + * Performs a GET request + * @param fetcher - Fetch function to use + * @param url - Request URL + * @param options - Custom fetch options + * @param parameters - Additional fetch parameters + * @returns Promise with parsed response + */ +export async function get( + fetcher: Fetch, + url: string, + options?: FetchOptions, + parameters?: VectorFetchParameters +): Promise { + return _handleRequest(fetcher, 'GET', url, options, parameters) +} + +/** + * Performs a POST request + * @param fetcher - Fetch function to use + * @param url - Request URL + * @param body - Request body to be JSON stringified + * @param options - Custom fetch options + * @param parameters - Additional fetch parameters + * @returns Promise with parsed response + */ +export async function post( + fetcher: Fetch, + url: string, + body: object, + options?: FetchOptions, + parameters?: VectorFetchParameters +): Promise { + return _handleRequest(fetcher, 'POST', url, options, parameters, body) +} + +/** + * Performs a PUT request + * @param fetcher - Fetch function to use + * @param url - Request URL + * @param body - Request body to be JSON stringified + * @param options - Custom fetch options + * @param parameters - Additional fetch parameters + * @returns Promise with parsed response + */ +export async function put( + fetcher: Fetch, + url: string, + body: object, + options?: FetchOptions, + parameters?: VectorFetchParameters +): Promise { + return _handleRequest(fetcher, 'PUT', url, options, parameters, body) +} + +/** + * Performs a DELETE request + * @param fetcher - Fetch function to use + * @param url - Request URL + * @param body - Request body to be JSON stringified + * @param options - Custom fetch options + * @param parameters - Additional fetch parameters + * @returns Promise with parsed response + */ +export async function remove( + fetcher: Fetch, + url: string, + body: object, + options?: FetchOptions, + parameters?: VectorFetchParameters +): Promise { + return _handleRequest(fetcher, 'DELETE', url, options, parameters, body) +} diff --git a/packages/core/storage-js/src/lib/vectors/helpers.ts b/packages/core/storage-js/src/lib/vectors/helpers.ts new file mode 100644 index 000000000..ed7d54955 --- /dev/null +++ b/packages/core/storage-js/src/lib/vectors/helpers.ts @@ -0,0 +1,93 @@ +type Fetch = typeof fetch + +/** + * Resolves the fetch implementation to use + * Uses custom fetch if provided, otherwise falls back to: + * - Native fetch in browser/modern environments + * - @supabase/node-fetch polyfill in Node.js environments without fetch + * + * @param customFetch - Optional custom fetch implementation + * @returns Resolved fetch function + */ +export const resolveFetch = (customFetch?: Fetch): Fetch => { + let _fetch: Fetch + if (customFetch) { + _fetch = customFetch + } else if (typeof fetch === 'undefined') { + _fetch = (...args) => + import('@supabase/node-fetch' as any).then(({ default: fetch }) => fetch(...args)) + } else { + _fetch = fetch + } + return (...args) => _fetch(...args) +} + +/** + * Resolves the Response constructor to use + * Uses native Response in browser/modern environments + * Falls back to @supabase/node-fetch polyfill in Node.js environments + * + * @returns Response constructor + */ +export const resolveResponse = async (): Promise => { + if (typeof Response === 'undefined') { + // @ts-ignore + return (await import('@supabase/node-fetch' as any)).Response + } + + return Response +} + +/** + * Determine if input is a plain object + * An object is plain if it's created by either {}, new Object(), or Object.create(null) + * + * @param value - Value to check + * @returns True if value is a plain object + * @source https://github.com/sindresorhus/is-plain-obj + */ +export const isPlainObject = (value: object): boolean => { + if (typeof value !== 'object' || value === null) { + return false + } + + const prototype = Object.getPrototypeOf(value) + return ( + (prototype === null || + prototype === Object.prototype || + Object.getPrototypeOf(prototype) === null) && + !(Symbol.toStringTag in value) && + !(Symbol.iterator in value) + ) +} + +/** + * Normalizes a number array to float32 format + * Ensures all vector values are valid 32-bit floats + * + * @param values - Array of numbers to normalize + * @returns Normalized float32 array + */ +export const normalizeToFloat32 = (values: number[]): number[] => { + // Use Float32Array to ensure proper precision + return Array.from(new Float32Array(values)) +} + +/** + * Validates vector dimensions match expected dimension + * Throws error if dimensions don't match + * + * @param vector - Vector data to validate + * @param expectedDimension - Expected vector dimension + * @throws Error if dimensions don't match + */ +export const validateVectorDimension = ( + vector: { float32: number[] }, + expectedDimension?: number +): void => { + if (expectedDimension !== undefined && vector.float32.length !== expectedDimension) { + throw new Error( + `Vector dimension mismatch: expected ${expectedDimension}, got ${vector.float32.length}` + ) + } +} diff --git a/packages/core/storage-js/src/lib/vectors/index.ts b/packages/core/storage-js/src/lib/vectors/index.ts new file mode 100644 index 000000000..c6d301e29 --- /dev/null +++ b/packages/core/storage-js/src/lib/vectors/index.ts @@ -0,0 +1,66 @@ +// Main client +export { StorageVectorsClient, VectorBucketScope, VectorIndexScope } from './StorageVectorsClient' +export type { StorageVectorsClientOptions } from './StorageVectorsClient' + +// API classes (for advanced usage) +export { default as VectorBucketApi } from './VectorBucketApi' +export { default as VectorIndexApi } from './VectorIndexApi' +export { default as VectorDataApi } from './VectorDataApi' +export type { CreateIndexOptions } from './VectorIndexApi' + +// Types +export type { + // Core types + VectorBucket, + VectorIndex, + VectorData, + VectorMetadata, + VectorObject, + VectorMatch, + EncryptionConfiguration, + MetadataConfiguration, + VectorDataType, + DistanceMetric, + VectorFilter, + + // Request/Response types + ListVectorBucketsOptions, + ListVectorBucketsResponse, + ListIndexesOptions, + ListIndexesResponse, + PutVectorsOptions, + GetVectorsOptions, + GetVectorsResponse, + DeleteVectorsOptions, + ListVectorsOptions, + ListVectorsResponse, + QueryVectorsOptions, + QueryVectorsResponse, + + // Response wrappers + ApiResponse, + SuccessResponse, + ErrorResponse, + VectorFetchParameters, +} from './types' + +// Errors +export { + StorageVectorsError, + StorageVectorsApiError, + StorageVectorsUnknownError, + StorageVectorsErrorCode, + isStorageVectorsError, +} from './errors' + +// Fetch utilities (for custom implementations) +export type { Fetch, FetchOptions, RequestMethodType } from './fetch' + +// Helper utilities +export { + resolveFetch, + resolveResponse, + isPlainObject, + normalizeToFloat32, + validateVectorDimension, +} from './helpers' diff --git a/packages/core/storage-js/src/lib/vectors/types.ts b/packages/core/storage-js/src/lib/vectors/types.ts new file mode 100644 index 000000000..2b798f2d9 --- /dev/null +++ b/packages/core/storage-js/src/lib/vectors/types.ts @@ -0,0 +1,299 @@ +import { StorageVectorsError } from './errors' + +/** + * Configuration for encryption at rest + * @property kmsKeyArn - ARN of the KMS key used for encryption + * @property sseType - Server-side encryption type (e.g., 'KMS') + */ +export interface EncryptionConfiguration { + kmsKeyArn?: string + sseType?: string +} + +/** + * Vector bucket metadata + * @property vectorBucketName - Unique name of the vector bucket + * @property creationTime - Unix timestamp of when the bucket was created + * @property encryptionConfiguration - Optional encryption settings + */ +export interface VectorBucket { + vectorBucketName: string + creationTime?: number + encryptionConfiguration?: EncryptionConfiguration +} + +/** + * Metadata configuration for vector index + * Defines which metadata keys should not be indexed for filtering + * @property nonFilterableMetadataKeys - Array of metadata keys that cannot be used in filters + */ +export interface MetadataConfiguration { + nonFilterableMetadataKeys?: string[] +} + +/** + * Supported data types for vectors + * Currently only float32 is supported + */ +export type VectorDataType = 'float32' + +/** + * Distance metrics for vector similarity search + */ +export type DistanceMetric = 'cosine' | 'euclidean' | 'dotproduct' + +/** + * Vector index configuration and metadata + * @property indexName - Unique name of the index within the bucket + * @property vectorBucketName - Name of the parent vector bucket + * @property dataType - Data type of vector components (currently only 'float32') + * @property dimension - Dimensionality of vectors (e.g., 384, 768, 1536) + * @property distanceMetric - Similarity metric used for queries + * @property metadataConfiguration - Configuration for metadata filtering + * @property creationTime - Unix timestamp of when the index was created + */ +export interface VectorIndex { + indexName: string + vectorBucketName: string + dataType: VectorDataType + dimension: number + distanceMetric: DistanceMetric + metadataConfiguration?: MetadataConfiguration + creationTime?: number +} + +/** + * Vector data representation + * Vectors must be float32 arrays with dimensions matching the index + * @property float32 - Array of 32-bit floating point numbers + */ +export interface VectorData { + float32: number[] +} + +/** + * Arbitrary JSON metadata attached to vectors + * Keys configured as non-filterable in the index can be stored but not queried + */ +export type VectorMetadata = Record + +/** + * Single vector object for insertion/update + * @property key - Unique identifier for the vector + * @property data - Vector embedding data + * @property metadata - Optional arbitrary metadata + */ +export interface VectorObject { + key: string + data: VectorData + metadata?: VectorMetadata +} + +/** + * Vector object returned from queries with optional distance + * @property key - Unique identifier for the vector + * @property data - Vector embedding data (if requested) + * @property metadata - Arbitrary metadata (if requested) + * @property distance - Similarity distance from query vector (if requested) + */ +export interface VectorMatch { + key: string + data?: VectorData + metadata?: VectorMetadata + distance?: number +} + +/** + * Options for fetching vector buckets + * @property prefix - Filter buckets by name prefix + * @property maxResults - Maximum number of results to return (default: 100) + * @property nextToken - Token for pagination from previous response + */ +export interface ListVectorBucketsOptions { + prefix?: string + maxResults?: number + nextToken?: string +} + +/** + * Response from listing vector buckets + * @property buckets - Array of bucket names + * @property nextToken - Token for fetching next page (if more results exist) + */ +export interface ListVectorBucketsResponse { + buckets: { vectorBucketName: string }[] + nextToken?: string +} + +/** + * Options for listing indexes within a bucket + * @property vectorBucketName - Name of the parent vector bucket + * @property prefix - Filter indexes by name prefix + * @property maxResults - Maximum number of results to return (default: 100) + * @property nextToken - Token for pagination from previous response + */ +export interface ListIndexesOptions { + vectorBucketName: string + prefix?: string + maxResults?: number + nextToken?: string +} + +/** + * Response from listing indexes + * @property indexes - Array of index names + * @property nextToken - Token for fetching next page (if more results exist) + */ +export interface ListIndexesResponse { + indexes: { indexName: string }[] + nextToken?: string +} + +/** + * Options for batch reading vectors + * @property vectorBucketName - Name of the vector bucket + * @property indexName - Name of the index + * @property keys - Array of vector keys to retrieve + * @property returnData - Whether to include vector data in response + * @property returnMetadata - Whether to include metadata in response + */ +export interface GetVectorsOptions { + vectorBucketName: string + indexName: string + keys: string[] + returnData?: boolean + returnMetadata?: boolean +} + +/** + * Response from getting vectors + * @property vectors - Array of retrieved vector objects + */ +export interface GetVectorsResponse { + vectors: VectorMatch[] +} + +/** + * Options for batch inserting/updating vectors + * @property vectorBucketName - Name of the vector bucket + * @property indexName - Name of the index + * @property vectors - Array of vectors to insert/upsert (1-500 items) + */ +export interface PutVectorsOptions { + vectorBucketName: string + indexName: string + vectors: VectorObject[] +} + +/** + * Options for batch deleting vectors + * @property vectorBucketName - Name of the vector bucket + * @property indexName - Name of the index + * @property keys - Array of vector keys to delete (1-500 items) + */ +export interface DeleteVectorsOptions { + vectorBucketName: string + indexName: string + keys: string[] +} + +/** + * Options for listing/scanning vectors in an index + * Supports parallel scanning via segment configuration + * @property vectorBucketName - Name of the vector bucket + * @property indexName - Name of the index + * @property maxResults - Maximum number of results to return (default: 500, max: 1000) + * @property nextToken - Token for pagination from previous response + * @property returnData - Whether to include vector data in response + * @property returnMetadata - Whether to include metadata in response + * @property segmentCount - Total number of parallel segments (1-16) + * @property segmentIndex - Zero-based index of this segment (0 to segmentCount-1) + */ +export interface ListVectorsOptions { + vectorBucketName: string + indexName: string + maxResults?: number + nextToken?: string + returnData?: boolean + returnMetadata?: boolean + segmentCount?: number + segmentIndex?: number +} + +/** + * Response from listing vectors + * @property vectors - Array of vector objects + * @property nextToken - Token for fetching next page (if more results exist) + */ +export interface ListVectorsResponse { + vectors: VectorMatch[] + nextToken?: string +} + +/** + * JSON filter expression for metadata filtering + * Format and syntax depend on the S3 Vectors service implementation + */ +export type VectorFilter = Record + +/** + * Options for querying similar vectors (ANN search) + * @property vectorBucketName - Name of the vector bucket + * @property indexName - Name of the index + * @property queryVector - Query vector to find similar vectors + * @property topK - Number of nearest neighbors to return (default: 10) + * @property filter - Optional JSON filter for metadata + * @property returnDistance - Whether to include distance scores + * @property returnMetadata - Whether to include metadata in results + */ +export interface QueryVectorsOptions { + vectorBucketName: string + indexName: string + queryVector: VectorData + topK?: number + filter?: VectorFilter + returnDistance?: boolean + returnMetadata?: boolean +} + +/** + * Response from vector similarity query + * @property matches - Array of similar vectors ordered by distance + */ +export interface QueryVectorsResponse { + matches: VectorMatch[] +} + +/** + * Fetch-specific parameters like abort signals + * @property signal - AbortSignal for cancelling requests + */ +export interface VectorFetchParameters { + signal?: AbortSignal +} + +/** + * Standard response wrapper for successful operations + * @property data - Response data of type T + * @property error - Null on success + */ +export interface SuccessResponse { + data: T + error: null +} + +/** + * Standard response wrapper for failed operations + * @property data - Null on error + * @property error - StorageVectorsError with details + */ +export interface ErrorResponse { + data: null + error: StorageVectorsError +} + +/** + * Union type for all API responses + * Follows the pattern: { data: T, error: null } | { data: null, error: Error } + */ +export type ApiResponse = SuccessResponse | ErrorResponse diff --git a/packages/core/storage-js/src/packages/StorageAnalyticsApi.ts b/packages/core/storage-js/src/packages/StorageAnalyticsApi.ts new file mode 100644 index 000000000..cd23f6877 --- /dev/null +++ b/packages/core/storage-js/src/packages/StorageAnalyticsApi.ts @@ -0,0 +1,202 @@ +import { DEFAULT_HEADERS } from '../lib/constants' +import { isStorageError, StorageError } from '../lib/errors' +import { Fetch, get, post, remove } from '../lib/fetch' +import { resolveFetch } from '../lib/helpers' +import { AnalyticBucket, Bucket } from '../lib/types' + +/** + * API class for managing Analytics Buckets using Iceberg tables + * Provides methods for creating, listing, and deleting analytics buckets + */ +export default class StorageAnalyticsApi { + protected url: string + protected headers: { [key: string]: string } + protected fetch: Fetch + protected shouldThrowOnError = false + + /** + * Creates a new StorageAnalyticsApi instance + * @param url - The base URL for the storage API + * @param headers - HTTP headers to include in requests + * @param fetch - Optional custom fetch implementation + */ + constructor(url: string, headers: { [key: string]: string } = {}, fetch?: Fetch) { + this.url = url.replace(/\/$/, '') + this.headers = { ...DEFAULT_HEADERS, ...headers } + this.fetch = resolveFetch(fetch) + } + + /** + * Enable throwing errors instead of returning them in the response + * When enabled, failed operations will throw instead of returning { data: null, error } + * + * @returns This instance for method chaining + */ + public throwOnError(): this { + this.shouldThrowOnError = true + return this + } + + /** + * Creates a new analytics bucket using Iceberg tables + * Analytics buckets are optimized for analytical queries and data processing + * + * @param name A unique name for the bucket you are creating + * @returns Promise with newly created bucket name or error + * + * @example + * ```typescript + * const { data, error } = await storage.analytics.createBucket('analytics-data') + * if (error) { + * console.error('Failed to create analytics bucket:', error.message) + * } else { + * console.log('Created bucket:', data.name) + * } + * ``` + */ + async createBucket(name: string): Promise< + | { + data: AnalyticBucket + error: null + } + | { + data: null + error: StorageError + } + > { + try { + const data = await post(this.fetch, `${this.url}/bucket`, { name }, { headers: this.headers }) + return { data, error: null } + } catch (error) { + if (this.shouldThrowOnError) { + throw error + } + if (isStorageError(error)) { + return { data: null, error } + } + + throw error + } + } + + /** + * Retrieves the details of all Analytics Storage buckets within an existing project + * Only returns buckets of type 'ANALYTICS' + * + * @param options Query parameters for listing buckets + * @param options.limit Maximum number of buckets to return + * @param options.offset Number of buckets to skip + * @param options.sortColumn Column to sort by ('id', 'name', 'created_at', 'updated_at') + * @param options.sortOrder Sort order ('asc' or 'desc') + * @param options.search Search term to filter bucket names + * @returns Promise with list of analytics buckets or error + * + * @example + * ```typescript + * const { data, error } = await storage.analytics.listBuckets({ + * limit: 10, + * offset: 0, + * sortColumn: 'created_at', + * sortOrder: 'desc', + * search: 'analytics' + * }) + * if (data) { + * console.log('Found analytics buckets:', data.length) + * data.forEach(bucket => console.log(`- ${bucket.name}`)) + * } + * ``` + */ + async listBuckets(options?: { + limit?: number + offset?: number + sortColumn?: 'id' | 'name' | 'created_at' | 'updated_at' + sortOrder?: 'asc' | 'desc' + search?: string + }): Promise< + | { + data: AnalyticBucket[] + error: null + } + | { + data: null + error: StorageError + } + > { + try { + // Build query string from options + const queryParams = new URLSearchParams() + if (options?.limit !== undefined) queryParams.set('limit', options.limit.toString()) + if (options?.offset !== undefined) queryParams.set('offset', options.offset.toString()) + if (options?.sortColumn) queryParams.set('sortColumn', options.sortColumn) + if (options?.sortOrder) queryParams.set('sortOrder', options.sortOrder) + if (options?.search) queryParams.set('search', options.search) + + const queryString = queryParams.toString() + const url = queryString ? `${this.url}/bucket?${queryString}` : `${this.url}/bucket` + + const data = await get(this.fetch, url, { headers: this.headers }) + // Filter to only return analytics buckets + const analyticsBuckets = Array.isArray(data) + ? data.filter((bucket: Bucket) => bucket.type === 'ANALYTICS') + : [] + return { data: analyticsBuckets, error: null } + } catch (error) { + if (this.shouldThrowOnError) { + throw error + } + if (isStorageError(error)) { + return { data: null, error } + } + + throw error + } + } + + /** + * Deletes an existing analytics bucket + * A bucket can't be deleted with existing objects inside it + * You must first empty the bucket before deletion + * + * @param bucketId The unique identifier of the bucket you would like to delete + * @returns Promise with success message or error + * + * @example + * ```typescript + * const { data, error } = await analyticsApi.deleteBucket('old-analytics-bucket') + * if (error) { + * console.error('Failed to delete bucket:', error.message) + * } else { + * console.log('Bucket deleted successfully:', data.message) + * } + * ``` + */ + async deleteBucket(bucketId: string): Promise< + | { + data: { message: string } + error: null + } + | { + data: null + error: StorageError + } + > { + try { + const data = await remove( + this.fetch, + `${this.url}/bucket/${bucketId}`, + {}, + { headers: this.headers } + ) + return { data, error: null } + } catch (error) { + if (this.shouldThrowOnError) { + throw error + } + if (isStorageError(error)) { + return { data: null, error } + } + + throw error + } + } +} diff --git a/packages/core/storage-js/test/bucket-api.spec.ts b/packages/core/storage-js/test/bucket-api.spec.ts new file mode 100644 index 000000000..deb51964b --- /dev/null +++ b/packages/core/storage-js/test/bucket-api.spec.ts @@ -0,0 +1,268 @@ +/** + * Integration tests for Vector Bucket API + * Tests all bucket operations: create, get, list, delete + */ + +import { + createTestClient, + setupTest, + generateTestName, + assertSuccessResponse, + assertErrorResponse, + assertErrorCode, +} from './helpers' + +describe('VectorBucketApi Integration Tests', () => { + let client: ReturnType + + beforeEach(() => { + setupTest() + client = createTestClient() + }) + + describe('createBucket', () => { + it('should create a new vector bucket successfully', async () => { + const bucketName = generateTestName('test-bucket') + + const response = await client.createBucket(bucketName) + + assertSuccessResponse(response) + expect(response.data).toEqual({}) + }) + + it('should return conflict error when bucket already exists', async () => { + const bucketName = generateTestName('test-bucket') + + // Create bucket first time + await client.createBucket(bucketName) + + // Try to create again + const response = await client.createBucket(bucketName) + + const error = assertErrorResponse(response) + assertErrorCode(error, 409) + expect(error.message).toContain('already exists') + }) + + it('should create multiple buckets with different names', async () => { + const bucket1 = generateTestName('test-bucket-1') + const bucket2 = generateTestName('test-bucket-2') + + const response1 = await client.createBucket(bucket1) + const response2 = await client.createBucket(bucket2) + + assertSuccessResponse(response1) + assertSuccessResponse(response2) + }) + }) + + describe('getBucket', () => { + it('should retrieve an existing bucket', async () => { + const bucketName = generateTestName('test-bucket') + + // Create bucket + await client.createBucket(bucketName) + + // Retrieve bucket + const response = await client.getBucket(bucketName) + + const data = assertSuccessResponse(response) + expect(data.vectorBucket).toBeDefined() + expect(data.vectorBucket.vectorBucketName).toBe(bucketName) + expect(data.vectorBucket.creationTime).toBeDefined() + expect(typeof data.vectorBucket.creationTime).toBe('number') + }) + + it('should return not found error for non-existent bucket', async () => { + const response = await client.getBucket('non-existent-bucket') + + const error = assertErrorResponse(response) + assertErrorCode(error, 404) + expect(error.message).toContain('not found') + }) + + it('should return bucket with encryption configuration if set', async () => { + const bucketName = generateTestName('test-bucket') + + await client.createBucket(bucketName) + const response = await client.getBucket(bucketName) + + const data = assertSuccessResponse(response) + // Encryption configuration is optional + if (data.vectorBucket.encryptionConfiguration) { + expect(data.vectorBucket.encryptionConfiguration).toHaveProperty('sseType') + } + }) + }) + + describe('listBuckets', () => { + it('should list all buckets', async () => { + const bucket1 = generateTestName('test-bucket-1') + const bucket2 = generateTestName('test-bucket-2') + + await client.createBucket(bucket1) + await client.createBucket(bucket2) + + const response = await client.listBuckets() + + const data = assertSuccessResponse(response) + expect(data.buckets).toBeDefined() + expect(Array.isArray(data.buckets)).toBe(true) + expect(data.buckets.length).toBeGreaterThanOrEqual(2) + + const bucketNames = data.buckets.map((b) => b.vectorBucketName) + expect(bucketNames).toContain(bucket1) + expect(bucketNames).toContain(bucket2) + }) + + it('should filter buckets by prefix', async () => { + const prefix = generateTestName('prefix-test') + const bucket1 = `${prefix}-bucket-1` + const bucket2 = `${prefix}-bucket-2` + const bucket3 = generateTestName('other-bucket') + + await client.createBucket(bucket1) + await client.createBucket(bucket2) + await client.createBucket(bucket3) + + const response = await client.listBuckets({ prefix }) + + const data = assertSuccessResponse(response) + expect(data.buckets.length).toBeGreaterThanOrEqual(2) + + const bucketNames = data.buckets.map((b) => b.vectorBucketName) + expect(bucketNames).toContain(bucket1) + expect(bucketNames).toContain(bucket2) + // bucket3 should not be included as it doesn't match prefix + const hasOtherBucket = bucketNames.some((name) => name.includes('other-bucket')) + if (hasOtherBucket) { + // If other buckets exist, they should match the prefix + expect(bucketNames.every((name) => name.startsWith(prefix))).toBe(true) + } + }) + + it('should support pagination with maxResults', async () => { + const response = await client.listBuckets({ maxResults: 1 }) + + const data = assertSuccessResponse(response) + expect(data.buckets.length).toBeLessThanOrEqual(1) + + if (data.buckets.length === 1 && data.nextToken) { + expect(data.nextToken).toBeDefined() + expect(typeof data.nextToken).toBe('string') + } + }) + + it('should return empty array when no buckets match prefix', async () => { + const response = await client.listBuckets({ + prefix: 'non-existent-prefix-' + Date.now(), + }) + + const data = assertSuccessResponse(response) + expect(data.buckets).toEqual([]) + expect(data.nextToken).toBeUndefined() + }) + }) + + describe('deleteBucket', () => { + it('should delete an empty bucket successfully', async () => { + const bucketName = generateTestName('test-bucket') + + await client.createBucket(bucketName) + + const response = await client.deleteBucket(bucketName) + + assertSuccessResponse(response) + expect(response.data).toEqual({}) + + // Verify bucket is deleted + const getResponse = await client.getBucket(bucketName) + assertErrorResponse(getResponse) + }) + + it('should return not found error for non-existent bucket', async () => { + const response = await client.deleteBucket('non-existent-bucket') + + const error = assertErrorResponse(response) + assertErrorCode(error, 404) + }) + + it('should return error when bucket is not empty', async () => { + const bucketName = generateTestName('test-bucket') + + await client.createBucket(bucketName) + + // Create an index in the bucket + const bucket = client.from(bucketName) + await bucket.createIndex({ + indexName: 'test-index', + dataType: 'float32', + dimension: 3, + distanceMetric: 'cosine', + }) + + // Try to delete bucket with index + const response = await client.deleteBucket(bucketName) + + const error = assertErrorResponse(response) + assertErrorCode(error, 400) + expect(error.message).toContain('not empty') + }) + + it('should successfully delete bucket after removing all indexes', async () => { + const bucketName = generateTestName('test-bucket') + + await client.createBucket(bucketName) + + const bucket = client.from(bucketName) + await bucket.createIndex({ + indexName: 'test-index', + dataType: 'float32', + dimension: 3, + distanceMetric: 'cosine', + }) + + // Delete the index first + await bucket.deleteIndex('test-index') + + // Now delete the bucket + const response = await client.deleteBucket(bucketName) + + assertSuccessResponse(response) + }) + }) + + describe('throwOnError mode', () => { + it('should throw error instead of returning error response', async () => { + client.throwOnError() + + await expect(client.getBucket('non-existent-bucket')).rejects.toThrow() + }) + + it('should still return data on success', async () => { + const bucketName = generateTestName('test-bucket') + client.throwOnError() + + await client.createBucket(bucketName) + const response = await client.getBucket(bucketName) + + expect(response.data).toBeDefined() + expect(response.error).toBeNull() + }) + }) + + describe('VectorBucketScope (from)', () => { + it('should create a bucket scope successfully', async () => { + const bucketName = generateTestName('test-bucket') + + await client.createBucket(bucketName) + + const bucketScope = client.from(bucketName) + + expect(bucketScope).toBeDefined() + expect(typeof bucketScope.createIndex).toBe('function') + expect(typeof bucketScope.listIndexes).toBe('function') + expect(typeof bucketScope.index).toBe('function') + }) + }) +}) diff --git a/packages/core/storage-js/test/e2e-workflow.spec.ts b/packages/core/storage-js/test/e2e-workflow.spec.ts new file mode 100644 index 000000000..1fe239788 --- /dev/null +++ b/packages/core/storage-js/test/e2e-workflow.spec.ts @@ -0,0 +1,484 @@ +/** + * End-to-end workflow integration tests + * Tests complete workflows from bucket creation to vector operations + */ + +import { + createTestClient, + setupTest, + generateTestName, + generateRandomVector, + assertSuccessResponse, +} from './helpers' + +describe('End-to-End Workflow Tests', () => { + let client: ReturnType + + beforeEach(() => { + setupTest() + client = createTestClient() + }) + + describe('Complete Vector Search Workflow', () => { + it('should complete full workflow: create bucket, index, insert, query, delete', async () => { + const bucketName = generateTestName('e2e-bucket') + const indexName = generateTestName('e2e-index') + + // Step 1: Create bucket + const createBucketResponse = await client.createBucket(bucketName) + assertSuccessResponse(createBucketResponse) + + // Step 2: Verify bucket exists + const getBucketResponse = await client.getBucket(bucketName) + const bucketData = assertSuccessResponse(getBucketResponse) + expect(bucketData.vectorBucket.vectorBucketName).toBe(bucketName) + + // Step 3: Create index + const bucket = client.from(bucketName) + const createIndexResponse = await bucket.createIndex({ + indexName, + dataType: 'float32', + dimension: 384, + distanceMetric: 'cosine', + metadataConfiguration: { + nonFilterableMetadataKeys: ['raw_text'], + }, + }) + assertSuccessResponse(createIndexResponse) + + // Step 4: Verify index exists + const getIndexResponse = await bucket.getIndex(indexName) + const indexData = assertSuccessResponse(getIndexResponse) + expect(indexData.index.indexName).toBe(indexName) + expect(indexData.index.dimension).toBe(384) + expect(indexData.index.distanceMetric).toBe('cosine') + + // Step 5: Insert vectors + const index = bucket.index(indexName) + const documents = [ + { + key: 'doc-1', + data: { float32: generateRandomVector(384) }, + metadata: { + title: 'Introduction to Vector Databases', + category: 'tech', + published: true, + }, + }, + { + key: 'doc-2', + data: { float32: generateRandomVector(384) }, + metadata: { + title: 'Advanced Vector Search Techniques', + category: 'tech', + published: true, + }, + }, + { + key: 'doc-3', + data: { float32: generateRandomVector(384) }, + metadata: { + title: 'Machine Learning Fundamentals', + category: 'science', + published: false, + }, + }, + ] + + const putResponse = await index.putVectors({ vectors: documents }) + assertSuccessResponse(putResponse) + + // Step 6: Query for similar vectors + const queryResponse = await index.queryVectors({ + queryVector: { float32: generateRandomVector(384) }, + topK: 2, + filter: { published: true }, + returnDistance: true, + returnMetadata: true, + }) + + const queryData = assertSuccessResponse(queryResponse) + expect(queryData.matches.length).toBeGreaterThan(0) + expect(queryData.matches.length).toBeLessThanOrEqual(2) + + // All matches should have published: true + for (const match of queryData.matches) { + expect(match.metadata?.published).toBe(true) + } + + // Step 7: List all vectors + const listResponse = await index.listVectors({ + returnMetadata: true, + }) + + const listData = assertSuccessResponse(listResponse) + expect(listData.vectors.length).toBeGreaterThanOrEqual(3) + + // Step 8: Get specific vectors + const getResponse = await index.getVectors({ + keys: ['doc-1', 'doc-2'], + returnData: true, + returnMetadata: true, + }) + + const getData = assertSuccessResponse(getResponse) + expect(getData.vectors.length).toBe(2) + + // Step 9: Update a vector + const updateResponse = await index.putVectors({ + vectors: [ + { + key: 'doc-1', + data: { float32: generateRandomVector(384) }, + metadata: { + title: 'Updated: Introduction to Vector Databases', + category: 'tech', + published: true, + updated: true, + }, + }, + ], + }) + assertSuccessResponse(updateResponse) + + // Step 10: Verify update + const verifyResponse = await index.getVectors({ + keys: ['doc-1'], + returnMetadata: true, + }) + + const verifyData = assertSuccessResponse(verifyResponse) + expect(verifyData.vectors[0].metadata?.updated).toBe(true) + + // Step 11: Delete some vectors + const deleteResponse = await index.deleteVectors({ + keys: ['doc-3'], + }) + assertSuccessResponse(deleteResponse) + + // Step 12: Verify deletion + const verifyDeleteResponse = await index.getVectors({ + keys: ['doc-3'], + }) + + const verifyDeleteData = assertSuccessResponse(verifyDeleteResponse) + expect(verifyDeleteData.vectors).toEqual([]) + + // Step 13: Delete index + const deleteIndexResponse = await bucket.deleteIndex(indexName) + assertSuccessResponse(deleteIndexResponse) + + // Step 14: Delete bucket + const deleteBucketResponse = await client.deleteBucket(bucketName) + assertSuccessResponse(deleteBucketResponse) + }) + }) + + describe('Multi-Index Workflow', () => { + it('should manage multiple indexes in the same bucket', async () => { + const bucketName = generateTestName('multi-index-bucket') + + // Create bucket + await client.createBucket(bucketName) + const bucket = client.from(bucketName) + + // Create multiple indexes with different configurations + const indexes = [ + { + name: 'embeddings-small', + dimension: 384, + metric: 'cosine' as const, + }, + { + name: 'embeddings-large', + dimension: 1536, + metric: 'euclidean' as const, + }, + { + name: 'embeddings-dotproduct', + dimension: 768, + metric: 'dotproduct' as const, + }, + ] + + for (const indexConfig of indexes) { + const response = await bucket.createIndex({ + indexName: indexConfig.name, + dataType: 'float32', + dimension: indexConfig.dimension, + distanceMetric: indexConfig.metric, + }) + assertSuccessResponse(response) + } + + // List all indexes + const listResponse = await bucket.listIndexes() + const listData = assertSuccessResponse(listResponse) + expect(listData.indexes.length).toBeGreaterThanOrEqual(3) + + // Insert vectors into each index + for (const indexConfig of indexes) { + const index = bucket.index(indexConfig.name) + const response = await index.putVectors({ + vectors: [ + { + key: 'vec-1', + data: { float32: generateRandomVector(indexConfig.dimension) }, + metadata: { index: indexConfig.name }, + }, + ], + }) + assertSuccessResponse(response) + } + + // Query each index + for (const indexConfig of indexes) { + const index = bucket.index(indexConfig.name) + const response = await index.queryVectors({ + queryVector: { float32: generateRandomVector(indexConfig.dimension) }, + topK: 1, + returnMetadata: true, + }) + const data = assertSuccessResponse(response) + expect(data.matches.length).toBeGreaterThan(0) + } + + // Cleanup: Delete all indexes + for (const indexConfig of indexes) { + await bucket.deleteIndex(indexConfig.name) + } + + // Delete bucket + await client.deleteBucket(bucketName) + }) + }) + + describe('Semantic Search Workflow', () => { + it('should perform semantic search with metadata filtering', async () => { + const bucketName = generateTestName('semantic-bucket') + const indexName = generateTestName('semantic-index') + + // Setup + await client.createBucket(bucketName) + const bucket = client.from(bucketName) + await bucket.createIndex({ + indexName, + dataType: 'float32', + dimension: 128, + distanceMetric: 'cosine', + }) + + const index = bucket.index(indexName) + + // Insert documents with semantic embeddings + const documents = [ + { + key: 'article-1', + data: { float32: generateRandomVector(128) }, + metadata: { + type: 'article', + category: 'technology', + tags: ['ai', 'ml'], + year: 2024, + score: 4.5, + }, + }, + { + key: 'article-2', + data: { float32: generateRandomVector(128) }, + metadata: { + type: 'article', + category: 'technology', + tags: ['web', 'dev'], + year: 2023, + score: 4.0, + }, + }, + { + key: 'paper-1', + data: { float32: generateRandomVector(128) }, + metadata: { + type: 'paper', + category: 'science', + tags: ['research', 'ml'], + year: 2024, + score: 5.0, + }, + }, + { + key: 'blog-1', + data: { float32: generateRandomVector(128) }, + metadata: { + type: 'blog', + category: 'technology', + tags: ['tutorial'], + year: 2024, + score: 3.5, + }, + }, + ] + + await index.putVectors({ vectors: documents }) + + // Search 1: Find technology articles + const tech1Response = await index.queryVectors({ + queryVector: { float32: generateRandomVector(128) }, + topK: 10, + filter: { type: 'article', category: 'technology' }, + returnMetadata: true, + }) + + const tech1Data = assertSuccessResponse(tech1Response) + expect(tech1Data.matches.length).toBeGreaterThan(0) + for (const match of tech1Data.matches) { + expect(match.metadata?.type).toBe('article') + expect(match.metadata?.category).toBe('technology') + } + + // Search 2: Find 2024 content + const year2024Response = await index.queryVectors({ + queryVector: { float32: generateRandomVector(128) }, + topK: 10, + filter: { year: 2024 }, + returnMetadata: true, + }) + + const year2024Data = assertSuccessResponse(year2024Response) + expect(year2024Data.matches.length).toBeGreaterThan(0) + for (const match of year2024Data.matches) { + expect(match.metadata?.year).toBe(2024) + } + + // Search 3: Find papers + const papersResponse = await index.queryVectors({ + queryVector: { float32: generateRandomVector(128) }, + topK: 10, + filter: { type: 'paper' }, + returnMetadata: true, + }) + + const papersData = assertSuccessResponse(papersResponse) + expect(papersData.matches.length).toBeGreaterThan(0) + for (const match of papersData.matches) { + expect(match.metadata?.type).toBe('paper') + } + + // Cleanup + await bucket.deleteIndex(indexName) + await client.deleteBucket(bucketName) + }) + }) + + describe('Batch Processing Workflow', () => { + it('should handle large-scale batch operations', async () => { + const bucketName = generateTestName('batch-bucket') + const indexName = generateTestName('batch-index') + + // Setup + await client.createBucket(bucketName) + const bucket = client.from(bucketName) + await bucket.createIndex({ + indexName, + dataType: 'float32', + dimension: 64, + distanceMetric: 'cosine', + }) + + const index = bucket.index(indexName) + + // Insert in batches of 500 + const totalVectors = 1000 + const batchSize = 500 + + for (let i = 0; i < totalVectors; i += batchSize) { + const batch = Array.from({ length: Math.min(batchSize, totalVectors - i) }, (_, j) => ({ + key: `vector-${i + j}`, + data: { float32: generateRandomVector(64) }, + metadata: { batch: Math.floor((i + j) / batchSize), index: i + j }, + })) + + const response = await index.putVectors({ vectors: batch }) + assertSuccessResponse(response) + } + + // List all vectors (paginated) + const listResponse = await index.listVectors({ + maxResults: 100, + returnMetadata: true, + }) + + const listData = assertSuccessResponse(listResponse) + expect(listData.vectors.length).toBeGreaterThan(0) + expect(listData.vectors.length).toBeLessThanOrEqual(100) + + // Query for similar vectors + const queryResponse = await index.queryVectors({ + queryVector: { float32: generateRandomVector(64) }, + topK: 10, + returnDistance: true, + }) + + const queryData = assertSuccessResponse(queryResponse) + expect(queryData.matches.length).toBeGreaterThan(0) + expect(queryData.matches.length).toBeLessThanOrEqual(10) + + // Delete in batches + const keysToDelete = Array.from({ length: 100 }, (_, i) => `vector-${i}`) + const deleteResponse = await index.deleteVectors({ keys: keysToDelete }) + assertSuccessResponse(deleteResponse) + + // Verify deletion + const getResponse = await index.getVectors({ keys: keysToDelete.slice(0, 10) }) + const getData = assertSuccessResponse(getResponse) + expect(getData.vectors).toEqual([]) + + // Cleanup + await bucket.deleteIndex(indexName) + await client.deleteBucket(bucketName) + }) + }) + + describe('Error Recovery Workflow', () => { + it('should handle errors gracefully and allow recovery', async () => { + const bucketName = generateTestName('error-bucket') + const indexName = generateTestName('error-index') + + // Create bucket + await client.createBucket(bucketName) + const bucket = client.from(bucketName) + + // Try to create index in non-existent bucket (error) + const badBucket = client.from('non-existent-bucket') + const errorResponse = await badBucket.createIndex({ + indexName, + dataType: 'float32', + dimension: 3, + distanceMetric: 'cosine', + }) + + expect(errorResponse.error).toBeTruthy() + + // Recover: Create index in correct bucket + const goodResponse = await bucket.createIndex({ + indexName, + dataType: 'float32', + dimension: 3, + distanceMetric: 'cosine', + }) + + assertSuccessResponse(goodResponse) + + // Continue normal operations + const index = bucket.index(indexName) + const putResponse = await index.putVectors({ + vectors: [{ key: 'vec-1', data: { float32: [0.1, 0.2, 0.3] } }], + }) + + assertSuccessResponse(putResponse) + + // Cleanup + await bucket.deleteIndex(indexName) + await client.deleteBucket(bucketName) + }) + }) +}) diff --git a/packages/core/storage-js/test/helpers.ts b/packages/core/storage-js/test/helpers.ts new file mode 100644 index 000000000..901e9c94a --- /dev/null +++ b/packages/core/storage-js/test/helpers.ts @@ -0,0 +1,161 @@ +/** + * Test helpers and utilities + */ + +/// + +import { StorageVectorsClient } from '../src/lib/vectors' +import { createMockFetch, resetMockStorage } from './mock-server' +import { getTestConfig } from './setup' + +/** + * Create a test client based on configuration + */ +export function createTestClient(): StorageVectorsClient { + const config = getTestConfig() + + if (config.useMockServer) { + return new StorageVectorsClient('https://mock.example.com', { + fetch: createMockFetch(), + headers: {}, + }) + } + + if (!config.apiUrl) { + throw new Error( + 'STORAGE_VECTORS_API_URL environment variable is required when USE_MOCK_SERVER=false' + ) + } + + return new StorageVectorsClient(config.apiUrl, { + headers: config.headers, + }) +} + +/** + * Setup before each test + */ +export function setupTest() { + const config = getTestConfig() + if (config.useMockServer) { + resetMockStorage() + } +} + +/** + * Generate unique test names to avoid conflicts + */ +export function generateTestName(prefix: string): string { + const timestamp = Date.now() + const random = Math.random().toString(36).substring(7) + return `${prefix}-${timestamp}-${random}` +} + +/** + * Sleep utility for tests + */ +export function sleep(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)) +} + +/** + * Retry a function with exponential backoff + */ +export async function retry( + fn: () => Promise, + options: { + maxAttempts?: number + initialDelay?: number + maxDelay?: number + factor?: number + } = {} +): Promise { + const { maxAttempts = 3, initialDelay = 100, maxDelay = 5000, factor = 2 } = options + + let lastError: Error | undefined + let delay = initialDelay + + for (let attempt = 1; attempt <= maxAttempts; attempt++) { + try { + return await fn() + } catch (error) { + lastError = error as Error + if (attempt < maxAttempts) { + await sleep(Math.min(delay, maxDelay)) + delay *= factor + } + } + } + + throw lastError +} + +/** + * Assert that an error has a specific status code + */ +export function assertErrorCode(error: any, expectedCode: number) { + expect(error).toBeTruthy() + expect(error.statusCode.toString()).toBe(expectedCode.toString()) +} + +/** + * Assert that data is successfully returned + */ +export function assertSuccessResponse(response: { data: T | null; error: any }): T { + expect(response.error).toBeNull() + expect(response.data).toBeTruthy() + return response.data! +} + +/** + * Assert that an error response is returned + */ +export function assertErrorResponse(response: { data: any; error: any }) { + expect(response.data).toBeNull() + expect(response.error).toBeTruthy() + return response.error +} + +/** + * Generate a random vector of specified dimension + */ +export function generateRandomVector(dimension: number): number[] { + return Array.from({ length: dimension }, () => Math.random()) +} + +/** + * Calculate cosine similarity between two vectors + */ +export function cosineSimilarity(a: number[], b: number[]): number { + if (a.length !== b.length) { + throw new Error('Vectors must have the same dimension') + } + + let dotProduct = 0 + let normA = 0 + let normB = 0 + + for (let i = 0; i < a.length; i++) { + dotProduct += a[i] * b[i] + normA += a[i] * a[i] + normB += b[i] * b[i] + } + + normA = Math.sqrt(normA) + normB = Math.sqrt(normB) + + if (normA === 0 || normB === 0) { + return 0 + } + + return dotProduct / (normA * normB) +} + +/** + * Normalize a vector to unit length + */ +export function normalizeVector(vector: number[]): number[] { + const norm = Math.sqrt(vector.reduce((sum, val) => sum + val * val, 0)) + if (norm === 0) return vector + return vector.map((val) => val / norm) +} diff --git a/packages/core/storage-js/test/index-api.spec.ts b/packages/core/storage-js/test/index-api.spec.ts new file mode 100644 index 000000000..010f1eaca --- /dev/null +++ b/packages/core/storage-js/test/index-api.spec.ts @@ -0,0 +1,431 @@ +/** + * Integration tests for Vector Index API + * Tests all index operations: create, get, list, delete + */ + +import { + createTestClient, + setupTest, + generateTestName, + assertSuccessResponse, + assertErrorResponse, + assertErrorCode, +} from './helpers' + +describe('VectorIndexApi Integration Tests', () => { + let client: ReturnType + let testBucket: string + + beforeEach(async () => { + setupTest() + client = createTestClient() + testBucket = generateTestName('test-bucket') + await client.createBucket(testBucket) + }) + + describe('createIndex', () => { + it('should create a new index with all required parameters', async () => { + const indexName = generateTestName('test-index') + const bucket = client.from(testBucket) + + const response = await bucket.createIndex({ + indexName, + dataType: 'float32', + dimension: 1536, + distanceMetric: 'cosine', + }) + + assertSuccessResponse(response) + expect(response.data).toEqual({}) + }) + + it('should create index with euclidean distance metric', async () => { + const indexName = generateTestName('test-index') + const bucket = client.from(testBucket) + + const response = await bucket.createIndex({ + indexName, + dataType: 'float32', + dimension: 768, + distanceMetric: 'euclidean', + }) + + assertSuccessResponse(response) + }) + + it('should create index with dotproduct distance metric', async () => { + const indexName = generateTestName('test-index') + const bucket = client.from(testBucket) + + const response = await bucket.createIndex({ + indexName, + dataType: 'float32', + dimension: 384, + distanceMetric: 'dotproduct', + }) + + assertSuccessResponse(response) + }) + + it('should create index with metadata configuration', async () => { + const indexName = generateTestName('test-index') + const bucket = client.from(testBucket) + + const response = await bucket.createIndex({ + indexName, + dataType: 'float32', + dimension: 1536, + distanceMetric: 'cosine', + metadataConfiguration: { + nonFilterableMetadataKeys: ['raw_text', 'internal_id'], + }, + }) + + assertSuccessResponse(response) + }) + + it('should return conflict error when index already exists', async () => { + const indexName = generateTestName('test-index') + const bucket = client.from(testBucket) + + // Create index first time + await bucket.createIndex({ + indexName, + dataType: 'float32', + dimension: 3, + distanceMetric: 'cosine', + }) + + // Try to create again + const response = await bucket.createIndex({ + indexName, + dataType: 'float32', + dimension: 3, + distanceMetric: 'cosine', + }) + + const error = assertErrorResponse(response) + assertErrorCode(error, 409) + expect(error.message).toContain('already exists') + }) + + it('should return not found error when bucket does not exist', async () => { + const bucket = client.from('non-existent-bucket') + + const response = await bucket.createIndex({ + indexName: 'test-index', + dataType: 'float32', + dimension: 3, + distanceMetric: 'cosine', + }) + + const error = assertErrorResponse(response) + assertErrorCode(error, 404) + }) + + it('should create multiple indexes in the same bucket', async () => { + const index1 = generateTestName('test-index-1') + const index2 = generateTestName('test-index-2') + const bucket = client.from(testBucket) + + const response1 = await bucket.createIndex({ + indexName: index1, + dataType: 'float32', + dimension: 3, + distanceMetric: 'cosine', + }) + + const response2 = await bucket.createIndex({ + indexName: index2, + dataType: 'float32', + dimension: 5, + distanceMetric: 'euclidean', + }) + + assertSuccessResponse(response1) + assertSuccessResponse(response2) + }) + + it('should create indexes with different dimensions', async () => { + const dimensions = [3, 128, 384, 768, 1536, 3072] + const bucket = client.from(testBucket) + + for (const dim of dimensions) { + const indexName = generateTestName(`test-index-${dim}`) + const response = await bucket.createIndex({ + indexName, + dataType: 'float32', + dimension: dim, + distanceMetric: 'cosine', + }) + + assertSuccessResponse(response) + } + }) + }) + + describe('getIndex', () => { + it('should retrieve an existing index', async () => { + const indexName = generateTestName('test-index') + const bucket = client.from(testBucket) + + await bucket.createIndex({ + indexName, + dataType: 'float32', + dimension: 1536, + distanceMetric: 'cosine', + }) + + const response = await bucket.getIndex(indexName) + + const data = assertSuccessResponse(response) + expect(data.index).toBeDefined() + expect(data.index.indexName).toBe(indexName) + expect(data.index.vectorBucketName).toBe(testBucket) + expect(data.index.dataType).toBe('float32') + expect(data.index.dimension).toBe(1536) + expect(data.index.distanceMetric).toBe('cosine') + expect(data.index.creationTime).toBeDefined() + expect(typeof data.index.creationTime).toBe('number') + }) + + it('should retrieve index with metadata configuration', async () => { + const indexName = generateTestName('test-index') + const bucket = client.from(testBucket) + + await bucket.createIndex({ + indexName, + dataType: 'float32', + dimension: 768, + distanceMetric: 'euclidean', + metadataConfiguration: { + nonFilterableMetadataKeys: ['raw_text'], + }, + }) + + const response = await bucket.getIndex(indexName) + + const data = assertSuccessResponse(response) + expect(data.index.metadataConfiguration).toBeDefined() + expect(data.index.metadataConfiguration?.nonFilterableMetadataKeys).toContain('raw_text') + }) + + it('should return not found error for non-existent index', async () => { + const bucket = client.from(testBucket) + const response = await bucket.getIndex('non-existent-index') + + const error = assertErrorResponse(response) + assertErrorCode(error, 404) + }) + + it('should return not found error when bucket does not exist', async () => { + const bucket = client.from('non-existent-bucket') + const response = await bucket.getIndex('test-index') + + const error = assertErrorResponse(response) + assertErrorCode(error, 404) + }) + }) + + describe('listIndexes', () => { + it('should list all indexes in a bucket', async () => { + const index1 = generateTestName('test-index-1') + const index2 = generateTestName('test-index-2') + const bucket = client.from(testBucket) + + await bucket.createIndex({ + indexName: index1, + dataType: 'float32', + dimension: 3, + distanceMetric: 'cosine', + }) + + await bucket.createIndex({ + indexName: index2, + dataType: 'float32', + dimension: 3, + distanceMetric: 'cosine', + }) + + const response = await bucket.listIndexes() + + const data = assertSuccessResponse(response) + expect(data.indexes).toBeDefined() + expect(Array.isArray(data.indexes)).toBe(true) + expect(data.indexes.length).toBeGreaterThanOrEqual(2) + + const indexNames = data.indexes.map((i) => i.indexName) + expect(indexNames).toContain(index1) + expect(indexNames).toContain(index2) + }) + + it('should filter indexes by prefix', async () => { + const prefix = generateTestName('prefix-test') + const index1 = `${prefix}-index-1` + const index2 = `${prefix}-index-2` + const index3 = generateTestName('other-index') + const bucket = client.from(testBucket) + + await bucket.createIndex({ + indexName: index1, + dataType: 'float32', + dimension: 3, + distanceMetric: 'cosine', + }) + + await bucket.createIndex({ + indexName: index2, + dataType: 'float32', + dimension: 3, + distanceMetric: 'cosine', + }) + + await bucket.createIndex({ + indexName: index3, + dataType: 'float32', + dimension: 3, + distanceMetric: 'cosine', + }) + + const response = await bucket.listIndexes({ prefix }) + + const data = assertSuccessResponse(response) + expect(data.indexes.length).toBeGreaterThanOrEqual(2) + + const indexNames = data.indexes.map((i) => i.indexName) + expect(indexNames).toContain(index1) + expect(indexNames).toContain(index2) + }) + + it('should support pagination with maxResults', async () => { + const bucket = client.from(testBucket) + + // Create multiple indexes + for (let i = 0; i < 3; i++) { + await bucket.createIndex({ + indexName: generateTestName(`test-index-${i}`), + dataType: 'float32', + dimension: 3, + distanceMetric: 'cosine', + }) + } + + const response = await bucket.listIndexes({ maxResults: 1 }) + + const data = assertSuccessResponse(response) + expect(data.indexes.length).toBeLessThanOrEqual(1) + + if (data.indexes.length === 1 && data.nextToken) { + expect(data.nextToken).toBeDefined() + expect(typeof data.nextToken).toBe('string') + } + }) + + it('should return empty array when no indexes exist', async () => { + const emptyBucket = generateTestName('empty-bucket') + await client.createBucket(emptyBucket) + + const bucket = client.from(emptyBucket) + const response = await bucket.listIndexes() + + const data = assertSuccessResponse(response) + expect(data.indexes).toEqual([]) + }) + + it('should return not found error when bucket does not exist', async () => { + const bucket = client.from('non-existent-bucket') + const response = await bucket.listIndexes() + + const error = assertErrorResponse(response) + assertErrorCode(error, 404) + }) + }) + + describe('deleteIndex', () => { + it('should delete an index successfully', async () => { + const indexName = generateTestName('test-index') + const bucket = client.from(testBucket) + + await bucket.createIndex({ + indexName, + dataType: 'float32', + dimension: 3, + distanceMetric: 'cosine', + }) + + const response = await bucket.deleteIndex(indexName) + + assertSuccessResponse(response) + expect(response.data).toEqual({}) + + // Verify index is deleted + const getResponse = await bucket.getIndex(indexName) + assertErrorResponse(getResponse) + }) + + it('should delete index with vectors', async () => { + const indexName = generateTestName('test-index') + const bucket = client.from(testBucket) + const index = bucket.index(indexName) + + await bucket.createIndex({ + indexName, + dataType: 'float32', + dimension: 3, + distanceMetric: 'cosine', + }) + + // Add some vectors + await index.putVectors({ + vectors: [ + { key: 'vec-1', data: { float32: [0.1, 0.2, 0.3] } }, + { key: 'vec-2', data: { float32: [0.4, 0.5, 0.6] } }, + ], + }) + + // Delete index (should delete vectors too) + const response = await bucket.deleteIndex(indexName) + + assertSuccessResponse(response) + }) + + it('should return not found error for non-existent index', async () => { + const bucket = client.from(testBucket) + const response = await bucket.deleteIndex('non-existent-index') + + const error = assertErrorResponse(response) + assertErrorCode(error, 404) + }) + + it('should return not found error when bucket does not exist', async () => { + const bucket = client.from('non-existent-bucket') + const response = await bucket.deleteIndex('test-index') + + const error = assertErrorResponse(response) + assertErrorCode(error, 404) + }) + }) + + describe('VectorIndexScope (index)', () => { + it('should create an index scope successfully', async () => { + const indexName = generateTestName('test-index') + const bucket = client.from(testBucket) + + await bucket.createIndex({ + indexName, + dataType: 'float32', + dimension: 3, + distanceMetric: 'cosine', + }) + + const indexScope = bucket.index(indexName) + + expect(indexScope).toBeDefined() + expect(typeof indexScope.putVectors).toBe('function') + expect(typeof indexScope.getVectors).toBe('function') + expect(typeof indexScope.listVectors).toBe('function') + expect(typeof indexScope.queryVectors).toBe('function') + expect(typeof indexScope.deleteVectors).toBe('function') + }) + }) +}) diff --git a/packages/core/storage-js/test/mock-server.ts b/packages/core/storage-js/test/mock-server.ts new file mode 100644 index 000000000..bd7b0eb0e --- /dev/null +++ b/packages/core/storage-js/test/mock-server.ts @@ -0,0 +1,723 @@ +/** + * Mock server implementation for testing + * Provides hardcoded responses for all API endpoints + */ + +/// + +import { testData } from './setup' +import type { Fetch } from '../src/lib/vectors' + +interface MockResponse { + status: number + data?: any + error?: { + statusCode: number + error: string + message: string + } +} + +/** + * In-memory storage for mock data + */ +class MockStorage { + private buckets = new Set() + private indexes = new Map>() // bucket -> index -> config + private vectors = new Map>() // bucket:index -> key -> vector + + constructor() { + // Initialize with test data + this.buckets.add(testData.buckets.test) + + const indexConfig = { + indexName: testData.indexes.test, + vectorBucketName: testData.buckets.test, + dataType: 'float32' as const, + dimension: 3, + distanceMetric: 'cosine' as const, + creationTime: Math.floor(Date.now() / 1000), + } + + const bucketIndexes = new Map() + bucketIndexes.set(testData.indexes.test, indexConfig) + this.indexes.set(testData.buckets.test, bucketIndexes) + + // Add sample vectors + const vectorKey = `${testData.buckets.test}:${testData.indexes.test}` + const vectorStorage = new Map() + + vectorStorage.set(testData.vectors.key1, { + key: testData.vectors.key1, + data: { float32: testData.sampleVectors.vector1 }, + metadata: testData.metadata.doc1, + }) + + vectorStorage.set(testData.vectors.key2, { + key: testData.vectors.key2, + data: { float32: testData.sampleVectors.vector2 }, + metadata: testData.metadata.doc2, + }) + + vectorStorage.set(testData.vectors.key3, { + key: testData.vectors.key3, + data: { float32: testData.sampleVectors.vector3 }, + metadata: testData.metadata.doc3, + }) + + this.vectors.set(vectorKey, vectorStorage) + } + + reset() { + this.buckets.clear() + this.indexes.clear() + this.vectors.clear() + } + + // Bucket operations + hasBucket(name: string): boolean { + return this.buckets.has(name) + } + + addBucket(name: string) { + this.buckets.add(name) + this.indexes.set(name, new Map()) + } + + removeBucket(name: string) { + this.buckets.delete(name) + this.indexes.delete(name) + } + + getBuckets(prefix?: string): string[] { + const buckets = Array.from(this.buckets) + if (prefix) { + return buckets.filter((b) => b.startsWith(prefix)) + } + return buckets + } + + // Index operations + hasIndex(bucketName: string, indexName: string): boolean { + return this.indexes.get(bucketName)?.has(indexName) ?? false + } + + addIndex(bucketName: string, config: any) { + let bucketIndexes = this.indexes.get(bucketName) + if (!bucketIndexes) { + bucketIndexes = new Map() + this.indexes.set(bucketName, bucketIndexes) + } + bucketIndexes.set(config.indexName, config) + + // Initialize vector storage for this index + const vectorKey = `${bucketName}:${config.indexName}` + if (!this.vectors.has(vectorKey)) { + this.vectors.set(vectorKey, new Map()) + } + } + + getIndex(bucketName: string, indexName: string): any { + return this.indexes.get(bucketName)?.get(indexName) + } + + getIndexes(bucketName: string, prefix?: string): any[] { + const bucketIndexes = this.indexes.get(bucketName) + if (!bucketIndexes) return [] + + const indexes = Array.from(bucketIndexes.values()) + if (prefix) { + return indexes.filter((i) => i.indexName.startsWith(prefix)) + } + return indexes + } + + removeIndex(bucketName: string, indexName: string) { + this.indexes.get(bucketName)?.delete(indexName) + const vectorKey = `${bucketName}:${indexName}` + this.vectors.delete(vectorKey) + } + + // Vector operations + getVectorStorage(bucketName: string, indexName: string): Map | undefined { + const vectorKey = `${bucketName}:${indexName}` + return this.vectors.get(vectorKey) + } + + putVector(bucketName: string, indexName: string, vector: any) { + const vectorKey = `${bucketName}:${indexName}` + let storage = this.vectors.get(vectorKey) + if (!storage) { + storage = new Map() + this.vectors.set(vectorKey, storage) + } + storage.set(vector.key, vector) + } + + getVector(bucketName: string, indexName: string, key: string): any { + const storage = this.getVectorStorage(bucketName, indexName) + return storage?.get(key) + } + + deleteVector(bucketName: string, indexName: string, key: string) { + const storage = this.getVectorStorage(bucketName, indexName) + storage?.delete(key) + } +} + +const storage = new MockStorage() + +/** + * Mock fetch implementation + */ +export function createMockFetch(): Fetch { + return async (input: string | URL | Request, init?: RequestInit): Promise => { + // Handle different input types safely without assuming Request constructor exists + const url = + typeof input === 'string' + ? input + : input instanceof URL + ? input.toString() + : (input as any).url || String(input) + const urlStr = url.toString() + const endpoint = urlStr.split('/').pop() || '' + const body = init?.body ? JSON.parse(init.body as string) : {} + const method = init?.method || 'GET' + + let response: MockResponse + + try { + response = await handleRequest(endpoint, method, body) + } catch (error: any) { + response = { + status: 500, + error: { + statusCode: 500, + error: 'Internal Server Error', + message: error.message, + }, + } + } + + // Create mock Response object + const responseBody = JSON.stringify(response.error || response.data || {}) + + // Check if Response constructor is available (Node 18+, modern browsers) + if (typeof Response !== 'undefined') { + return new Response(responseBody, { + status: response.status, + headers: { + 'Content-Type': 'application/json', + }, + }) as any + } + + // Fallback: Create a minimal Response-like object for older environments + const mockResponse: any = { + ok: response.status >= 200 && response.status < 300, + status: response.status, + statusText: response.status === 200 ? 'OK' : 'Error', + headers: { + get: (key: string) => (key.toLowerCase() === 'content-type' ? 'application/json' : null), + }, + json: async () => JSON.parse(responseBody), + text: async () => responseBody, + } + + return mockResponse as Response + } +} + +async function handleRequest(endpoint: string, method: string, body: any): Promise { + // Bucket endpoints + if (endpoint === 'CreateVectorBucket') { + return handleCreateBucket(body) + } + if (endpoint === 'GetVectorBucket') { + return handleGetBucket(body) + } + if (endpoint === 'ListVectorBuckets') { + return handleListBuckets(body) + } + if (endpoint === 'DeleteVectorBucket') { + return handleDeleteBucket(body) + } + + // Index endpoints + if (endpoint === 'CreateIndex') { + return handleCreateIndex(body) + } + if (endpoint === 'GetIndex') { + return handleGetIndex(body) + } + if (endpoint === 'ListIndexes') { + return handleListIndexes(body) + } + if (endpoint === 'DeleteIndex') { + return handleDeleteIndex(body) + } + + // Vector data endpoints + if (endpoint === 'PutVectors') { + return handlePutVectors(body) + } + if (endpoint === 'GetVectors') { + return handleGetVectors(body) + } + if (endpoint === 'ListVectors') { + return handleListVectors(body) + } + if (endpoint === 'QueryVectors') { + return handleQueryVectors(body) + } + if (endpoint === 'DeleteVectors') { + return handleDeleteVectors(body) + } + + return { + status: 404, + error: { + statusCode: 404, + error: 'Not Found', + message: `Endpoint not found: ${endpoint}`, + }, + } +} + +// Bucket handlers +function handleCreateBucket(body: any): MockResponse { + const { vectorBucketName } = body + + if (storage.hasBucket(vectorBucketName)) { + return { + status: 409, + error: { + statusCode: 409, + error: 'Conflict', + message: `Bucket '${vectorBucketName}' already exists`, + }, + } + } + + storage.addBucket(vectorBucketName) + return { status: 200, data: {} } +} + +function handleGetBucket(body: any): MockResponse { + const { vectorBucketName } = body + + if (!storage.hasBucket(vectorBucketName)) { + return { + status: 404, + error: { + statusCode: 404, + error: 'Not Found', + message: `Bucket '${vectorBucketName}' not found`, + }, + } + } + + return { + status: 200, + data: { + vectorBucket: { + vectorBucketName, + creationTime: Math.floor(Date.now() / 1000), + }, + }, + } +} + +function handleListBuckets(body: any): MockResponse { + const { prefix, maxResults = 100 } = body + const buckets = storage.getBuckets(prefix) + + return { + status: 200, + data: { + buckets: buckets.slice(0, maxResults).map((name) => ({ vectorBucketName: name })), + nextToken: buckets.length > maxResults ? 'mock-next-token' : undefined, + }, + } +} + +function handleDeleteBucket(body: any): MockResponse { + const { vectorBucketName } = body + + if (!storage.hasBucket(vectorBucketName)) { + return { + status: 404, + error: { + statusCode: 404, + error: 'Not Found', + message: `Bucket '${vectorBucketName}' not found`, + }, + } + } + + const indexes = storage.getIndexes(vectorBucketName) + if (indexes.length > 0) { + return { + status: 400, + error: { + statusCode: 400, + error: 'Bad Request', + message: `Bucket '${vectorBucketName}' is not empty`, + }, + } + } + + storage.removeBucket(vectorBucketName) + return { status: 200, data: {} } +} + +// Index handlers +function handleCreateIndex(body: any): MockResponse { + const { vectorBucketName, indexName } = body + + if (!storage.hasBucket(vectorBucketName)) { + return { + status: 404, + error: { + statusCode: 404, + error: 'Not Found', + message: `Bucket '${vectorBucketName}' not found`, + }, + } + } + + if (storage.hasIndex(vectorBucketName, indexName)) { + return { + status: 409, + error: { + statusCode: 409, + error: 'Conflict', + message: `Index '${indexName}' already exists`, + }, + } + } + + storage.addIndex(vectorBucketName, { + ...body, + creationTime: Math.floor(Date.now() / 1000), + }) + + return { status: 200, data: {} } +} + +function handleGetIndex(body: any): MockResponse { + const { vectorBucketName, indexName } = body + + if (!storage.hasBucket(vectorBucketName)) { + return { + status: 404, + error: { + statusCode: 404, + error: 'Not Found', + message: `Bucket '${vectorBucketName}' not found`, + }, + } + } + + const index = storage.getIndex(vectorBucketName, indexName) + if (!index) { + return { + status: 404, + error: { + statusCode: 404, + error: 'Not Found', + message: `Index '${indexName}' not found`, + }, + } + } + + return { + status: 200, + data: { index }, + } +} + +function handleListIndexes(body: any): MockResponse { + const { vectorBucketName, prefix, maxResults = 100 } = body + + if (!storage.hasBucket(vectorBucketName)) { + return { + status: 404, + error: { + statusCode: 404, + error: 'Not Found', + message: `Bucket '${vectorBucketName}' not found`, + }, + } + } + + const indexes = storage.getIndexes(vectorBucketName, prefix) + + return { + status: 200, + data: { + indexes: indexes.slice(0, maxResults).map((i) => ({ indexName: i.indexName })), + nextToken: indexes.length > maxResults ? 'mock-next-token' : undefined, + }, + } +} + +function handleDeleteIndex(body: any): MockResponse { + const { vectorBucketName, indexName } = body + + if (!storage.hasBucket(vectorBucketName)) { + return { + status: 404, + error: { + statusCode: 404, + error: 'Not Found', + message: `Bucket '${vectorBucketName}' not found`, + }, + } + } + + if (!storage.hasIndex(vectorBucketName, indexName)) { + return { + status: 404, + error: { + statusCode: 404, + error: 'Not Found', + message: `Index '${indexName}' not found`, + }, + } + } + + storage.removeIndex(vectorBucketName, indexName) + return { status: 200, data: {} } +} + +// Vector data handlers +function handlePutVectors(body: any): MockResponse { + const { vectorBucketName, indexName, vectors } = body + + if (!storage.hasBucket(vectorBucketName)) { + return { + status: 404, + error: { + statusCode: 404, + error: 'Not Found', + message: `Bucket '${vectorBucketName}' not found`, + }, + } + } + + if (!storage.hasIndex(vectorBucketName, indexName)) { + return { + status: 404, + error: { + statusCode: 404, + error: 'Not Found', + message: `Index '${indexName}' not found`, + }, + } + } + + for (const vector of vectors) { + storage.putVector(vectorBucketName, indexName, vector) + } + + return { status: 200, data: {} } +} + +function handleGetVectors(body: any): MockResponse { + const { vectorBucketName, indexName, keys, returnData = true, returnMetadata = true } = body + + if (!storage.hasBucket(vectorBucketName)) { + return { + status: 404, + error: { + statusCode: 404, + error: 'Not Found', + message: `Bucket '${vectorBucketName}' not found`, + }, + } + } + + if (!storage.hasIndex(vectorBucketName, indexName)) { + return { + status: 404, + error: { + statusCode: 404, + error: 'Not Found', + message: `Index '${indexName}' not found`, + }, + } + } + + const vectors = keys + .map((key: string) => { + const vector = storage.getVector(vectorBucketName, indexName, key) + if (!vector) return null + + const result: any = { key: vector.key } + if (returnData) result.data = vector.data + if (returnMetadata) result.metadata = vector.metadata + + return result + }) + .filter(Boolean) + + return { + status: 200, + data: { vectors }, + } +} + +function handleListVectors(body: any): MockResponse { + const { + vectorBucketName, + indexName, + maxResults = 500, + returnData = true, + returnMetadata = true, + } = body + + if (!storage.hasBucket(vectorBucketName)) { + return { + status: 404, + error: { + statusCode: 404, + error: 'Not Found', + message: `Bucket '${vectorBucketName}' not found`, + }, + } + } + + if (!storage.hasIndex(vectorBucketName, indexName)) { + return { + status: 404, + error: { + statusCode: 404, + error: 'Not Found', + message: `Index '${indexName}' not found`, + }, + } + } + + const vectorStorage = storage.getVectorStorage(vectorBucketName, indexName) + const allVectors = Array.from(vectorStorage?.values() || []) + + const vectors = allVectors.slice(0, maxResults).map((vector) => { + const result: any = { key: vector.key } + if (returnData) result.data = vector.data + if (returnMetadata) result.metadata = vector.metadata + return result + }) + + return { + status: 200, + data: { + vectors, + nextToken: allVectors.length > maxResults ? 'mock-next-token' : undefined, + }, + } +} + +function handleQueryVectors(body: any): MockResponse { + const { + vectorBucketName, + indexName, + topK = 10, + filter, + returnDistance = false, + returnMetadata = true, + } = body + + if (!storage.hasBucket(vectorBucketName)) { + return { + status: 404, + error: { + statusCode: 404, + error: 'Not Found', + message: `Bucket '${vectorBucketName}' not found`, + }, + } + } + + if (!storage.hasIndex(vectorBucketName, indexName)) { + return { + status: 404, + error: { + statusCode: 404, + error: 'Not Found', + message: `Index '${indexName}' not found`, + }, + } + } + + const vectorStorage = storage.getVectorStorage(vectorBucketName, indexName) + let allVectors = Array.from(vectorStorage?.values() || []) + + // Apply filter if provided + if (filter) { + allVectors = allVectors.filter((vector) => { + if (!vector.metadata) return false + return Object.entries(filter).every(([key, value]) => vector.metadata[key] === value) + }) + } + + // Calculate cosine similarity (simplified mock) + const matches = allVectors + .map((vector, index) => { + const result: any = { key: vector.key } + if (returnDistance) { + // Mock distance calculation + result.distance = 0.1 + index * 0.05 + } + if (returnMetadata) result.metadata = vector.metadata + return result + }) + .slice(0, topK) + + return { + status: 200, + data: { matches }, + } +} + +function handleDeleteVectors(body: any): MockResponse { + const { vectorBucketName, indexName, keys } = body + + if (!storage.hasBucket(vectorBucketName)) { + return { + status: 404, + error: { + statusCode: 404, + error: 'Not Found', + message: `Bucket '${vectorBucketName}' not found`, + }, + } + } + + if (!storage.hasIndex(vectorBucketName, indexName)) { + return { + status: 404, + error: { + statusCode: 404, + error: 'Not Found', + message: `Index '${indexName}' not found`, + }, + } + } + + for (const key of keys) { + storage.deleteVector(vectorBucketName, indexName, key) + } + + return { status: 200, data: {} } +} + +/** + * Reset mock storage to initial state + */ +export function resetMockStorage() { + storage.reset() + // Re-initialize with default test data + const newStorage = new MockStorage() + Object.assign(storage, newStorage) +} diff --git a/packages/core/storage-js/test/setup.ts b/packages/core/storage-js/test/setup.ts new file mode 100644 index 000000000..0cba1e8cc --- /dev/null +++ b/packages/core/storage-js/test/setup.ts @@ -0,0 +1,75 @@ +/** + * Test configuration and setup + * Supports both real API testing and mock server testing + */ + +/// + +export interface TestConfig { + /** + * Whether to use a mock server or real API + */ + useMockServer: boolean + /** + * Base URL for the API (used when useMockServer is false) + */ + apiUrl?: string + /** + * API headers (e.g., Authorization token) + */ + headers?: Record +} + +/** + * Get test configuration from environment variables + */ +export function getTestConfig(): TestConfig { + const useMockServer = process.env.USE_MOCK_SERVER !== 'false' + const apiUrl = process.env.STORAGE_VECTORS_API_URL + const authToken = process.env.STORAGE_VECTORS_API_TOKEN + + const headers: Record = {} + if (authToken) { + headers['Authorization'] = `Bearer ${authToken}` + } + + return { + useMockServer, + apiUrl: useMockServer ? undefined : apiUrl, + headers: useMockServer ? {} : headers, + } +} + +/** + * Shared test data + */ +export const testData = { + buckets: { + test: 'test-bucket', + test2: 'test-bucket-2', + nonExistent: 'non-existent-bucket', + }, + indexes: { + test: 'test-index', + test2: 'test-index-2', + nonExistent: 'non-existent-index', + }, + vectors: { + key1: 'vector-1', + key2: 'vector-2', + key3: 'vector-3', + nonExistent: 'non-existent-vector', + }, + // Sample 3-dimensional vectors for testing + sampleVectors: { + vector1: [0.1, 0.2, 0.3], + vector2: [0.4, 0.5, 0.6], + vector3: [0.7, 0.8, 0.9], + query: [0.15, 0.25, 0.35], + }, + metadata: { + doc1: { title: 'Document 1', category: 'tech', page: 1 }, + doc2: { title: 'Document 2', category: 'science', page: 2 }, + doc3: { title: 'Document 3', category: 'tech', page: 3 }, + }, +} diff --git a/packages/core/storage-js/test/vector-data-api.spec.ts b/packages/core/storage-js/test/vector-data-api.spec.ts new file mode 100644 index 000000000..20cbdbaf3 --- /dev/null +++ b/packages/core/storage-js/test/vector-data-api.spec.ts @@ -0,0 +1,648 @@ +/** + * Integration tests for Vector Data API + * Tests all vector operations: put, get, list, query, delete + */ + +import { + createTestClient, + setupTest, + generateTestName, + generateRandomVector, + assertSuccessResponse, + assertErrorResponse, + assertErrorCode, +} from './helpers' + +describe('VectorDataApi Integration Tests', () => { + let client: ReturnType + let testBucket: string + let testIndex: string + + beforeEach(async () => { + setupTest() + client = createTestClient() + testBucket = generateTestName('test-bucket') + testIndex = generateTestName('test-index') + + await client.createBucket(testBucket) + const bucket = client.from(testBucket) + await bucket.createIndex({ + indexName: testIndex, + dataType: 'float32', + dimension: 3, + distanceMetric: 'cosine', + }) + }) + + describe('putVectors', () => { + it('should insert a single vector successfully', async () => { + const index = client.from(testBucket).index(testIndex) + + const response = await index.putVectors({ + vectors: [ + { + key: 'vector-1', + data: { float32: [0.1, 0.2, 0.3] }, + metadata: { title: 'Test Vector' }, + }, + ], + }) + + assertSuccessResponse(response) + expect(response.data).toEqual({}) + }) + + it('should insert multiple vectors in batch', async () => { + const index = client.from(testBucket).index(testIndex) + + const response = await index.putVectors({ + vectors: [ + { key: 'vec-1', data: { float32: [0.1, 0.2, 0.3] }, metadata: { id: 1 } }, + { key: 'vec-2', data: { float32: [0.4, 0.5, 0.6] }, metadata: { id: 2 } }, + { key: 'vec-3', data: { float32: [0.7, 0.8, 0.9] }, metadata: { id: 3 } }, + ], + }) + + assertSuccessResponse(response) + }) + + it('should insert vector without metadata', async () => { + const index = client.from(testBucket).index(testIndex) + + const response = await index.putVectors({ + vectors: [{ key: 'vec-no-meta', data: { float32: [0.1, 0.2, 0.3] } }], + }) + + assertSuccessResponse(response) + }) + + it('should upsert (update existing vector)', async () => { + const index = client.from(testBucket).index(testIndex) + + // Insert first time + await index.putVectors({ + vectors: [ + { + key: 'vec-1', + data: { float32: [0.1, 0.2, 0.3] }, + metadata: { version: 1 }, + }, + ], + }) + + // Update same key + const response = await index.putVectors({ + vectors: [ + { + key: 'vec-1', + data: { float32: [0.4, 0.5, 0.6] }, + metadata: { version: 2 }, + }, + ], + }) + + assertSuccessResponse(response) + + // Verify updated + const getResponse = await index.getVectors({ + keys: ['vec-1'], + returnData: true, + returnMetadata: true, + }) + + const data = assertSuccessResponse(getResponse) + expect(data.vectors[0].metadata?.version).toBe(2) + }) + + it('should insert vectors with complex metadata', async () => { + const index = client.from(testBucket).index(testIndex) + + const response = await index.putVectors({ + vectors: [ + { + key: 'complex-meta', + data: { float32: [0.1, 0.2, 0.3] }, + metadata: { + title: 'Document Title', + category: 'tech', + tags: ['ai', 'ml', 'vectors'], + published: true, + score: 4.5, + nested: { field: 'value' }, + }, + }, + ], + }) + + assertSuccessResponse(response) + }) + + it('should return not found error when bucket does not exist', async () => { + const index = client.from('non-existent-bucket').index(testIndex) + + const response = await index.putVectors({ + vectors: [{ key: 'vec-1', data: { float32: [0.1, 0.2, 0.3] } }], + }) + + const error = assertErrorResponse(response) + assertErrorCode(error, 404) + }) + + it('should return not found error when index does not exist', async () => { + const index = client.from(testBucket).index('non-existent-index') + + const response = await index.putVectors({ + vectors: [{ key: 'vec-1', data: { float32: [0.1, 0.2, 0.3] } }], + }) + + const error = assertErrorResponse(response) + assertErrorCode(error, 404) + }) + + it('should handle batch size limits', async () => { + const index = client.from(testBucket).index(testIndex) + + // Create a large batch (500 vectors) + const vectors = Array.from({ length: 500 }, (_, i) => ({ + key: `vec-${i}`, + data: { float32: generateRandomVector(3) }, + })) + + const response = await index.putVectors({ vectors }) + + assertSuccessResponse(response) + }) + }) + + describe('getVectors', () => { + beforeEach(async () => { + const index = client.from(testBucket).index(testIndex) + + // Insert test vectors + await index.putVectors({ + vectors: [ + { + key: 'vec-1', + data: { float32: [0.1, 0.2, 0.3] }, + metadata: { title: 'Vector 1' }, + }, + { + key: 'vec-2', + data: { float32: [0.4, 0.5, 0.6] }, + metadata: { title: 'Vector 2' }, + }, + { + key: 'vec-3', + data: { float32: [0.7, 0.8, 0.9] }, + metadata: { title: 'Vector 3' }, + }, + ], + }) + }) + + it('should retrieve vectors by keys', async () => { + const index = client.from(testBucket).index(testIndex) + + const response = await index.getVectors({ + keys: ['vec-1', 'vec-2'], + returnData: true, + returnMetadata: true, + }) + + const data = assertSuccessResponse(response) + expect(data.vectors).toBeDefined() + expect(data.vectors.length).toBe(2) + + const keys = data.vectors.map((v) => v.key) + expect(keys).toContain('vec-1') + expect(keys).toContain('vec-2') + }) + + it('should retrieve vectors with data', async () => { + const index = client.from(testBucket).index(testIndex) + + const response = await index.getVectors({ + keys: ['vec-1'], + returnData: true, + returnMetadata: false, + }) + + const data = assertSuccessResponse(response) + expect(data.vectors[0].data).toBeDefined() + expect(data.vectors[0].data?.float32).toEqual([0.1, 0.2, 0.3]) + expect(data.vectors[0].metadata).toBeUndefined() + }) + + it('should retrieve vectors with metadata only', async () => { + const index = client.from(testBucket).index(testIndex) + + const response = await index.getVectors({ + keys: ['vec-1'], + returnData: false, + returnMetadata: true, + }) + + const data = assertSuccessResponse(response) + expect(data.vectors[0].data).toBeUndefined() + expect(data.vectors[0].metadata).toBeDefined() + expect(data.vectors[0].metadata?.title).toBe('Vector 1') + }) + + it('should retrieve vectors with keys only', async () => { + const index = client.from(testBucket).index(testIndex) + + const response = await index.getVectors({ + keys: ['vec-1'], + returnData: false, + returnMetadata: false, + }) + + const data = assertSuccessResponse(response) + expect(data.vectors[0].key).toBe('vec-1') + expect(data.vectors[0].data).toBeUndefined() + expect(data.vectors[0].metadata).toBeUndefined() + }) + + it('should return empty array for non-existent keys', async () => { + const index = client.from(testBucket).index(testIndex) + + const response = await index.getVectors({ + keys: ['non-existent-key'], + returnData: true, + returnMetadata: true, + }) + + const data = assertSuccessResponse(response) + expect(data.vectors).toEqual([]) + }) + + it('should retrieve mix of existing and non-existent keys', async () => { + const index = client.from(testBucket).index(testIndex) + + const response = await index.getVectors({ + keys: ['vec-1', 'non-existent', 'vec-2'], + returnData: true, + returnMetadata: true, + }) + + const data = assertSuccessResponse(response) + expect(data.vectors.length).toBe(2) + + const keys = data.vectors.map((v) => v.key) + expect(keys).toContain('vec-1') + expect(keys).toContain('vec-2') + expect(keys).not.toContain('non-existent') + }) + }) + + describe('listVectors', () => { + beforeEach(async () => { + const index = client.from(testBucket).index(testIndex) + + // Insert multiple vectors + await index.putVectors({ + vectors: Array.from({ length: 10 }, (_, i) => ({ + key: `vec-${i}`, + data: { float32: generateRandomVector(3) }, + metadata: { index: i }, + })), + }) + }) + + it('should list all vectors in index', async () => { + const index = client.from(testBucket).index(testIndex) + + const response = await index.listVectors({ + returnData: true, + returnMetadata: true, + }) + + const data = assertSuccessResponse(response) + expect(data.vectors).toBeDefined() + expect(data.vectors.length).toBeGreaterThanOrEqual(10) + }) + + it('should list vectors with data', async () => { + const index = client.from(testBucket).index(testIndex) + + const response = await index.listVectors({ + returnData: true, + returnMetadata: false, + }) + + const data = assertSuccessResponse(response) + expect(data.vectors[0].data).toBeDefined() + expect(data.vectors[0].metadata).toBeUndefined() + }) + + it('should list vectors with metadata only', async () => { + const index = client.from(testBucket).index(testIndex) + + const response = await index.listVectors({ + returnData: false, + returnMetadata: true, + }) + + const data = assertSuccessResponse(response) + expect(data.vectors[0].data).toBeUndefined() + expect(data.vectors[0].metadata).toBeDefined() + }) + + it('should support pagination with maxResults', async () => { + const index = client.from(testBucket).index(testIndex) + + const response = await index.listVectors({ + maxResults: 5, + returnMetadata: true, + }) + + const data = assertSuccessResponse(response) + expect(data.vectors.length).toBeLessThanOrEqual(5) + + if (data.vectors.length === 5 && data.nextToken) { + expect(data.nextToken).toBeDefined() + } + }) + + it('should return empty array for empty index', async () => { + const emptyIndex = generateTestName('empty-index') + const bucket = client.from(testBucket) + + await bucket.createIndex({ + indexName: emptyIndex, + dataType: 'float32', + dimension: 3, + distanceMetric: 'cosine', + }) + + const index = bucket.index(emptyIndex) + const response = await index.listVectors() + + const data = assertSuccessResponse(response) + expect(data.vectors).toEqual([]) + }) + }) + + describe('queryVectors', () => { + beforeEach(async () => { + const index = client.from(testBucket).index(testIndex) + + // Insert test vectors with different metadata + await index.putVectors({ + vectors: [ + { + key: 'doc-1', + data: { float32: [0.1, 0.2, 0.3] }, + metadata: { category: 'tech', published: true, score: 5 }, + }, + { + key: 'doc-2', + data: { float32: [0.15, 0.25, 0.35] }, + metadata: { category: 'tech', published: false, score: 3 }, + }, + { + key: 'doc-3', + data: { float32: [0.4, 0.5, 0.6] }, + metadata: { category: 'science', published: true, score: 4 }, + }, + { + key: 'doc-4', + data: { float32: [0.7, 0.8, 0.9] }, + metadata: { category: 'science', published: true, score: 5 }, + }, + ], + }) + }) + + it('should query for similar vectors', async () => { + const index = client.from(testBucket).index(testIndex) + + const response = await index.queryVectors({ + queryVector: { float32: [0.12, 0.22, 0.32] }, + topK: 3, + returnMetadata: true, + }) + + const data = assertSuccessResponse(response) + expect(data.matches).toBeDefined() + expect(Array.isArray(data.matches)).toBe(true) + expect(data.matches.length).toBeGreaterThan(0) + expect(data.matches.length).toBeLessThanOrEqual(3) + }) + + it('should return vectors with distance scores', async () => { + const index = client.from(testBucket).index(testIndex) + + const response = await index.queryVectors({ + queryVector: { float32: [0.1, 0.2, 0.3] }, + topK: 2, + returnDistance: true, + returnMetadata: true, + }) + + const data = assertSuccessResponse(response) + expect(data.matches[0].distance).toBeDefined() + expect(typeof data.matches[0].distance).toBe('number') + }) + + it('should filter by metadata', async () => { + const index = client.from(testBucket).index(testIndex) + + const response = await index.queryVectors({ + queryVector: { float32: [0.1, 0.2, 0.3] }, + topK: 5, + filter: { category: 'tech' }, + returnMetadata: true, + }) + + const data = assertSuccessResponse(response) + expect(data.matches.length).toBeGreaterThan(0) + + // All results should match filter + for (const match of data.matches) { + expect(match.metadata?.category).toBe('tech') + } + }) + + it('should filter by multiple metadata fields', async () => { + const index = client.from(testBucket).index(testIndex) + + const response = await index.queryVectors({ + queryVector: { float32: [0.1, 0.2, 0.3] }, + topK: 5, + filter: { category: 'tech', published: true }, + returnMetadata: true, + }) + + const data = assertSuccessResponse(response) + + for (const match of data.matches) { + expect(match.metadata?.category).toBe('tech') + expect(match.metadata?.published).toBe(true) + } + }) + + it('should respect topK parameter', async () => { + const index = client.from(testBucket).index(testIndex) + + const response = await index.queryVectors({ + queryVector: { float32: [0.5, 0.5, 0.5] }, + topK: 2, + }) + + const data = assertSuccessResponse(response) + expect(data.matches.length).toBeLessThanOrEqual(2) + }) + + it('should return empty matches when filter matches nothing', async () => { + const index = client.from(testBucket).index(testIndex) + + const response = await index.queryVectors({ + queryVector: { float32: [0.1, 0.2, 0.3] }, + topK: 5, + filter: { category: 'non-existent' }, + }) + + const data = assertSuccessResponse(response) + expect(data.matches).toEqual([]) + }) + + it('should query without metadata in results', async () => { + const index = client.from(testBucket).index(testIndex) + + const response = await index.queryVectors({ + queryVector: { float32: [0.1, 0.2, 0.3] }, + topK: 3, + returnMetadata: false, + returnDistance: true, + }) + + const data = assertSuccessResponse(response) + expect(data.matches[0].key).toBeDefined() + expect(data.matches[0].metadata).toBeUndefined() + }) + }) + + describe('deleteVectors', () => { + beforeEach(async () => { + const index = client.from(testBucket).index(testIndex) + + // Insert test vectors + await index.putVectors({ + vectors: [ + { key: 'vec-1', data: { float32: [0.1, 0.2, 0.3] } }, + { key: 'vec-2', data: { float32: [0.4, 0.5, 0.6] } }, + { key: 'vec-3', data: { float32: [0.7, 0.8, 0.9] } }, + ], + }) + }) + + it('should delete a single vector', async () => { + const index = client.from(testBucket).index(testIndex) + + const response = await index.deleteVectors({ keys: ['vec-1'] }) + + assertSuccessResponse(response) + expect(response.data).toEqual({}) + + // Verify deletion + const getResponse = await index.getVectors({ keys: ['vec-1'] }) + const data = assertSuccessResponse(getResponse) + expect(data.vectors).toEqual([]) + }) + + it('should delete multiple vectors', async () => { + const index = client.from(testBucket).index(testIndex) + + const response = await index.deleteVectors({ + keys: ['vec-1', 'vec-2'], + }) + + assertSuccessResponse(response) + + // Verify deletion + const getResponse = await index.getVectors({ + keys: ['vec-1', 'vec-2', 'vec-3'], + }) + const data = assertSuccessResponse(getResponse) + expect(data.vectors.length).toBe(1) + expect(data.vectors[0].key).toBe('vec-3') + }) + + it('should succeed when deleting non-existent keys', async () => { + const index = client.from(testBucket).index(testIndex) + + const response = await index.deleteVectors({ + keys: ['non-existent-1', 'non-existent-2'], + }) + + assertSuccessResponse(response) + }) + + it('should handle batch deletion limits', async () => { + const index = client.from(testBucket).index(testIndex) + + // Insert many vectors + const vectors = Array.from({ length: 100 }, (_, i) => ({ + key: `batch-vec-${i}`, + data: { float32: generateRandomVector(3) }, + })) + + await index.putVectors({ vectors }) + + // Delete in batch (max 500) + const keysToDelete = vectors.slice(0, 50).map((v) => v.key) + const response = await index.deleteVectors({ keys: keysToDelete }) + + assertSuccessResponse(response) + + // Verify deletion + const getResponse = await index.getVectors({ keys: keysToDelete }) + const data = assertSuccessResponse(getResponse) + expect(data.vectors).toEqual([]) + }) + + it('should return not found error when bucket does not exist', async () => { + const index = client.from('non-existent-bucket').index(testIndex) + + const response = await index.deleteVectors({ keys: ['vec-1'] }) + + const error = assertErrorResponse(response) + assertErrorCode(error, 404) + }) + + it('should return not found error when index does not exist', async () => { + const index = client.from(testBucket).index('non-existent-index') + + const response = await index.deleteVectors({ keys: ['vec-1'] }) + + const error = assertErrorResponse(response) + assertErrorCode(error, 404) + }) + }) + + describe('Batch operations', () => { + it('should handle large batch inserts efficiently', async () => { + const index = client.from(testBucket).index(testIndex) + + // Insert 500 vectors (max batch size) + const vectors = Array.from({ length: 500 }, (_, i) => ({ + key: `large-batch-${i}`, + data: { float32: generateRandomVector(3) }, + metadata: { batch: 'large', index: i }, + })) + + const response = await index.putVectors({ vectors }) + + assertSuccessResponse(response) + + // Verify some vectors were inserted + const getResponse = await index.getVectors({ + keys: ['large-batch-0', 'large-batch-100', 'large-batch-499'], + }) + + const data = assertSuccessResponse(getResponse) + expect(data.vectors.length).toBe(3) + }) + }) +}) diff --git a/packages/core/storage-js/tsconfig.json b/packages/core/storage-js/tsconfig.json index e4361c213..97c0f7abd 100644 --- a/packages/core/storage-js/tsconfig.json +++ b/packages/core/storage-js/tsconfig.json @@ -22,5 +22,6 @@ "allowSyntheticDefaultImports": true, "noImplicitOverride": false, "isolatedModules": false - } + }, + "references": [] } diff --git a/packages/core/storage-js/tsconfig.lib.json b/packages/core/storage-js/tsconfig.lib.json new file mode 100644 index 000000000..8bce1b756 --- /dev/null +++ b/packages/core/storage-js/tsconfig.lib.json @@ -0,0 +1,20 @@ +{ + "extends": "../../../tsconfig.base.json", + "compilerOptions": { + "module": "ES2020", + "outDir": "./dist/main", + "rootDir": "src", + "sourceMap": true, + "target": "ES6", + "emitDeclarationOnly": false, + "declaration": true, + "declarationMap": true, + + "stripInternal": true, + "allowSyntheticDefaultImports": true, + "esModuleInterop": true + }, + "include": ["src/**/*.ts"], + "exclude": ["src/**/*.spec.ts", "src/**/*.test.ts"], + "references": [] +} diff --git a/packages/core/storage-js/tsconfig.module.json b/packages/core/storage-js/tsconfig.module.json index 8726ca43b..65a348eb4 100644 --- a/packages/core/storage-js/tsconfig.module.json +++ b/packages/core/storage-js/tsconfig.module.json @@ -1,7 +1,7 @@ { - "extends": "./tsconfig", + "extends": "./tsconfig.lib.json", "compilerOptions": { "module": "ES2020", - "outDir": "dist/module" + "outDir": "./dist/module" } } diff --git a/packages/core/storage-js/webpack.config.js b/packages/core/storage-js/webpack.config.js index 4139d9d00..a9653327d 100644 --- a/packages/core/storage-js/webpack.config.js +++ b/packages/core/storage-js/webpack.config.js @@ -20,6 +20,12 @@ module.exports = { transpileOnly: true, }, }, + { + test: /\.m?js$/, + resolve: { + fullySpecified: false, + }, + }, ], }, resolve: { diff --git a/packages/core/supabase-js/README.md b/packages/core/supabase-js/README.md index 6c7d6eeda..9bf7a7549 100644 --- a/packages/core/supabase-js/README.md +++ b/packages/core/supabase-js/README.md @@ -185,11 +185,11 @@ cd ../../.. | `test:integration:browser` | Browser tests using Deno + Puppeteer | Supabase running + Deno installed | | `test:edge-functions` | Edge Functions tests | Supabase running + Deno installed | | `test:types` | TypeScript type checking + JSR validation | None | -| `test:deno` | Deno runtime compatibility tests | Supabase running + Deno installed | | `test:bun` | Bun runtime compatibility tests | Supabase running + Bun installed | +| `test:node:playwright` | WebSocket browser tests | Supabase running + Playwright | +| Deno (see section below) | Deno runtime compatibility tests | Supabase running + Deno installed | | Expo (see section below) | React Native/Expo tests | Supabase running + dependencies updated | | Next.js (see below) | Next.js SSR tests | Supabase running + dependencies updated | -| `test:node:playwright` | WebSocket browser tests | Supabase running + Playwright | #### Unit Testing @@ -249,10 +249,11 @@ npx nx test:all supabase-js # Prerequisites: # 1. Supabase must be running (see Prerequisites) # 2. Update test dependencies and pack current build -npx nx update:test-deps:expo supabase-js +cd packages/core/supabase-js +npm run update:test-deps:expo # Run Expo tests from the Expo test project -cd packages/core/supabase-js/test/integration/expo +cd test/integration/expo npm install npm test cd ../../.. diff --git a/packages/core/supabase-js/package.json b/packages/core/supabase-js/package.json index db6845b35..cdf213ee5 100644 --- a/packages/core/supabase-js/package.json +++ b/packages/core/supabase-js/package.json @@ -73,5 +73,29 @@ "webpack-cli": "^4.9.2" }, "jsdelivr": "dist/umd/supabase.js", - "unpkg": "dist/umd/supabase.js" + "unpkg": "dist/umd/supabase.js", + "nx": { + "targets": { + "test:integration:browser": { + "dependsOn": [ + { + "projects": [ + "storage-js" + ], + "target": "build" + } + ] + }, + "test:edge-functions": { + "dependsOn": [ + { + "projects": [ + "storage-js" + ], + "target": "build" + } + ] + } + } + } } diff --git a/packages/core/supabase-js/supabase/.temp/cli-latest b/packages/core/supabase-js/supabase/.temp/cli-latest index c5299e677..2213dd2c1 100644 --- a/packages/core/supabase-js/supabase/.temp/cli-latest +++ b/packages/core/supabase-js/supabase/.temp/cli-latest @@ -1 +1 @@ -v2.47.2 \ No newline at end of file +v2.51.0 \ No newline at end of file diff --git a/packages/core/supabase-js/test/deno/setup-deps.js b/packages/core/supabase-js/test/deno/setup-deps.js index 689080743..d8f1a436b 100755 --- a/packages/core/supabase-js/test/deno/setup-deps.js +++ b/packages/core/supabase-js/test/deno/setup-deps.js @@ -67,4 +67,4 @@ denoJson.compilerOptions.types = Array.isArray(denoJson.compilerOptions.types) fs.writeFileSync(denoJsonPath, JSON.stringify(denoJson, null, 2) + '\n') console.log('Updated deno.json with versions from package.json') -console.log('Versions used:', versions) +console.log('Versions used:', versions) \ No newline at end of file diff --git a/packages/core/supabase-js/tsconfig.lib.json b/packages/core/supabase-js/tsconfig.lib.json new file mode 100644 index 000000000..78cd73a5a --- /dev/null +++ b/packages/core/supabase-js/tsconfig.lib.json @@ -0,0 +1,34 @@ +{ + "extends": "../../../tsconfig.base.json", + "compilerOptions": { + "module": "ES2020", + "outDir": "./dist/main", + "rootDir": "src", + "sourceMap": true, + "target": "ES2015", + "emitDeclarationOnly": false, + + "stripInternal": true, + "allowSyntheticDefaultImports": true, + "esModuleInterop": true + }, + "include": ["src/**/*.ts"], + "exclude": ["src/**/*.spec.ts", "src/**/*.test.ts"], + "references": [ + { + "path": "../storage-js/tsconfig.lib.json" + }, + { + "path": "../realtime-js" + }, + { + "path": "../postgrest-js" + }, + { + "path": "../functions-js" + }, + { + "path": "../auth-js" + } + ] +} diff --git a/packages/core/supabase-js/webpack.config.js b/packages/core/supabase-js/webpack.config.js index fc170305f..857e44041 100644 --- a/packages/core/supabase-js/webpack.config.js +++ b/packages/core/supabase-js/webpack.config.js @@ -21,6 +21,12 @@ module.exports = (env) => ({ transpileOnly: true, }, }, + { + test: /\.m?js$/, + resolve: { + fullySpecified: false, + }, + }, ], }, resolve: { diff --git a/tsconfig.json b/tsconfig.json index c02f6d2b0..69433af3e 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -12,9 +12,6 @@ { "path": "./packages/core/realtime-js" }, - { - "path": "./packages/core/supabase-js" - }, { "path": "./packages/core/storage-js" },