diff --git a/examples/src/langchain-classic/indexes/vector_stores/redis/redis_advanced_filtering.ts b/examples/src/langchain-classic/indexes/vector_stores/redis/redis_advanced_filtering.ts new file mode 100644 index 000000000000..bddb0222443c --- /dev/null +++ b/examples/src/langchain-classic/indexes/vector_stores/redis/redis_advanced_filtering.ts @@ -0,0 +1,275 @@ +/** + * Advanced Redis Vector Store Filtering Examples + * + * This example demonstrates all the advanced filtering capabilities of RedisVectorStore: + * - Tag filters for categorical data + * - Numeric filters for ranges and comparisons + * - Text filters for full-text search + * - Geo filters for location-based queries + * - Timestamp filters for date/time queries + * - Complex combinations with AND/OR logic + * - Custom filters with raw RediSearch query syntax + * + * Note: Timestamps are stored as numeric fields (Unix epoch timestamps). + * Date objects are automatically converted during serialization and returned + * as numbers during deserialization. + */ + +import { createClient } from "redis"; +import { OpenAIEmbeddings } from "@langchain/openai"; +import { + RedisVectorStore, + Tag, + Num, + Text, + Geo, + Timestamp, + Custom, +} from "@langchain/redis"; +import { Document } from "@langchain/core/documents"; + +// Connect to Redis +const client = createClient({ + url: process.env.REDIS_URL ?? "redis://localhost:6379", +}); +await client.connect(); + +// Sample documents with rich metadata +const docs = [ + new Document({ + metadata: { + category: "electronics", + price: 299.99, + title: "Wireless Bluetooth Headphones", + location: [-122.4194, 37.7749], // San Francisco + created_at: new Date("2023-01-15"), + brand: "TechCorp", + rating: 4.5, + }, + pageContent: + "High-quality wireless Bluetooth headphones with noise cancellation", + }), + new Document({ + metadata: { + category: "books", + price: 24.99, + title: "JavaScript Programming Guide", + location: [-74.006, 40.7128], // New York + created_at: new Date("2023-03-20"), + author: "John Smith", + pages: 450, + }, + pageContent: + "Comprehensive guide to modern JavaScript programming techniques", + }), + new Document({ + metadata: { + category: "electronics", + price: 899.99, + title: "4K Smart TV", + location: [-118.2437, 34.0522], // Los Angeles + created_at: new Date("2023-02-10"), + brand: "ViewTech", + screen_size: 55, + }, + pageContent: + "Ultra HD 4K Smart TV with streaming capabilities and voice control", + }), + new Document({ + metadata: { + category: "books", + price: 19.99, + title: "Machine Learning Basics", + location: [-122.4194, 37.7749], // San Francisco + created_at: new Date("2023-04-05"), + author: "Jane Doe", + pages: 320, + }, + pageContent: + "Introduction to machine learning concepts and practical applications", + }), +]; + +// Create vector store with metadata schema for proper indexing +const vectorStore = await RedisVectorStore.fromDocuments( + docs, + new OpenAIEmbeddings(), + { + redisClient: client, + indexName: "advanced_products", + customSchema: [ + { name: "category", type: "tag" }, + { name: "price", type: "numeric", options: { sortable: true } }, + { name: "title", type: "text", options: { weight: 2.0 } }, + { name: "location", type: "geo" }, + // Timestamps are stored as numeric fields (Unix epoch timestamps) + { name: "created_at", type: "numeric", options: { sortable: true } }, + { name: "brand", type: "tag" }, + { name: "author", type: "tag" }, + { name: "rating", type: "numeric" }, + { name: "pages", type: "numeric" }, + { name: "screen_size", type: "numeric" }, + ], + } +); + +console.log("=== Advanced Redis Vector Store Filtering Examples ===\n"); + +// Example 1: Simple tag filtering +console.log("1. Simple tag filtering - Electronics only:"); +const electronicsFilter = Tag("category").eq("electronics"); +const electronicsResults = await vectorStore.similaritySearch( + "high quality device", + 5, + electronicsFilter +); +console.log(`Found ${electronicsResults.length} electronics items`); +electronicsResults.forEach((doc) => + console.log(`- ${doc.metadata.title} ($${doc.metadata.price})`) +); +console.log(); + +// Example 2: Numeric range filtering +console.log("2. Numeric range filtering - Products between $20-$500:"); +const priceFilter = Num("price").between(20, 500); +const priceResults = await vectorStore.similaritySearch( + "quality product", + 5, + priceFilter +); +console.log(`Found ${priceResults.length} products in price range`); +priceResults.forEach((doc) => + console.log(`- ${doc.metadata.title} ($${doc.metadata.price})`) +); +console.log(); + +// Example 3: Text search filtering +console.log("3. Text search filtering - Titles containing 'programming':"); +const textFilter = Text("title").wildcard("*programming*"); +const textResults = await vectorStore.similaritySearch( + "learning guide", + 5, + textFilter +); +console.log(`Found ${textResults.length} programming-related items`); +textResults.forEach((doc) => + console.log(`- ${doc.metadata.title} by ${doc.metadata.author || "N/A"}`) +); +console.log(); + +// Example 4: Geographic filtering +console.log("4. Geographic filtering - Items within 50km of San Francisco:"); +const geoFilter = Geo("location").within(-122.4194, 37.7749, 50, "km"); +const geoResults = await vectorStore.similaritySearch( + "local products", + 5, + geoFilter +); +console.log(`Found ${geoResults.length} items near San Francisco`); +geoResults.forEach((doc) => + console.log(`- ${doc.metadata.title} (${doc.metadata.location})`) +); +console.log(); + +// Example 5: Timestamp filtering +console.log("5. Timestamp filtering - Items created after March 1, 2023:"); +const timestampFilter = Timestamp("created_at").gt(new Date("2023-03-01")); +const timestampResults = await vectorStore.similaritySearch( + "recent items", + 5, + timestampFilter +); +console.log(`Found ${timestampResults.length} recent items`); +timestampResults.forEach((doc) => { + // created_at is stored as a Unix epoch timestamp (number) + // Convert it back to a Date for display + const createdDate = new Date((doc.metadata.created_at as number) * 1000); + console.log(`- ${doc.metadata.title} (${createdDate.toDateString()})`); +}); +console.log(); + +// Example 6: Complex combined filtering +console.log("6. Complex filtering - Electronics under $400 in California:"); +const complexFilter = Tag("category") + .eq("electronics") + .and(Num("price").lt(400)) + .and(Geo("location").within(-119.4179, 36.7783, 500, "km")); // California center + +const complexResults = await vectorStore.similaritySearch( + "affordable electronics", + 5, + complexFilter +); +console.log( + `Found ${complexResults.length} affordable electronics in California` +); +complexResults.forEach((doc) => + console.log( + `- ${doc.metadata.title} ($${doc.metadata.price}) at ${doc.metadata.location}` + ) +); +console.log(); + +// Example 7: OR filtering +console.log("7. OR filtering - Books OR items under $30:"); +const orFilter = Tag("category").eq("books").or(Num("price").lt(30)); +const orResults = await vectorStore.similaritySearch( + "affordable items", + 5, + orFilter +); +console.log(`Found ${orResults.length} books or cheap items`); +orResults.forEach((doc) => + console.log( + `- ${doc.metadata.title} (${doc.metadata.category}, $${doc.metadata.price})` + ) +); +console.log(); + +// Example 8: Multiple tag values +console.log("8. Multiple tag values - TechCorp OR ViewTech brands:"); +const multiTagFilter = Tag("brand").eq(["TechCorp", "ViewTech"]); +const multiTagResults = await vectorStore.similaritySearch( + "branded products", + 5, + multiTagFilter +); +console.log(`Found ${multiTagResults.length} items from specified brands`); +multiTagResults.forEach((doc) => + console.log(`- ${doc.metadata.title} by ${doc.metadata.brand}`) +); +console.log(); + +// Example 9: Negation filtering +console.log("9. Negation filtering - NOT electronics:"); +const negationFilter = Tag("category").ne("electronics"); +const negationResults = await vectorStore.similaritySearch( + "non-electronic items", + 5, + negationFilter +); +console.log(`Found ${negationResults.length} non-electronic items`); +negationResults.forEach((doc) => + console.log(`- ${doc.metadata.title} (${doc.metadata.category})`) +); +console.log(); + +// Example 10: Custom filter with raw RediSearch syntax +console.log("10. Custom filter - Raw RediSearch query syntax:"); +const customFilter = Custom("(@category:{electronics} @price:[0 400])"); +const customResults = await vectorStore.similaritySearch( + "affordable tech", + 5, + customFilter +); +console.log(`Found ${customResults.length} affordable electronics`); +customResults.forEach((doc) => + console.log(`- ${doc.metadata.title} ($${doc.metadata.price})`) +); +console.log(); + +// Cleanup +await vectorStore.delete({ deleteAll: true }); +await client.disconnect(); + +console.log("\n=== Advanced filtering examples completed! ==="); diff --git a/libs/providers/langchain-redis/package.json b/libs/providers/langchain-redis/package.json index 53941d32db3d..c90766802872 100644 --- a/libs/providers/langchain-redis/package.json +++ b/libs/providers/langchain-redis/package.json @@ -27,6 +27,7 @@ "format:check": "prettier --config .prettierrc --check \"src\"" }, "dependencies": { + "uuid": "^10.0.0", "redis": "^4.6.13" }, "peerDependencies": { @@ -44,7 +45,6 @@ "eslint": "^9.34.0", "prettier": "^2.8.3", "typescript": "~5.8.3", - "uuid": "^10.0.0", "vitest": "^3.2.4" }, "publishConfig": { diff --git a/libs/providers/langchain-redis/src/filters.ts b/libs/providers/langchain-redis/src/filters.ts new file mode 100644 index 000000000000..0322a2e5fd7c --- /dev/null +++ b/libs/providers/langchain-redis/src/filters.ts @@ -0,0 +1,905 @@ +/** + * Filter expression classes for advanced metadata filtering in Redis vector stores. + * + * These classes provide a type-safe way to construct Redis query filters for vector similarity search. + * They generate RediSearch query syntax that can be used to filter documents based on metadata fields. + * + * @see https://redis.io/docs/latest/develop/ai/search-and-query/query/ + * @see https://redis.io/docs/latest/develop/ai/search-and-query/query/vector-search/ + * + * @example + * ```typescript + * // Simple tag filter + * const filter = Tag("category").eq("electronics"); + * + * // Numeric range filter + * const priceFilter = Num("price").between(50, 200); + * + * // Combining filters with AND + * const complexFilter = Tag("category").eq("electronics").and(Num("price").lt(100)); + * + * // Combining filters with OR + * const orFilter = Tag("brand").eq("Apple").or(Tag("brand").eq("Samsung")); + * + * // Custom filter with raw RediSearch syntax + * const customFilter = Custom("(@category:{electronics} @price:[0 100])"); + * ``` + */ + +/** + * Base class for all filter expressions. + * + * All filter types extend this class and implement the `toString()` method + * to generate the appropriate RediSearch query syntax. + * + * @see https://redis.io/docs/latest/develop/ai/search-and-query/query/ + */ +export abstract class FilterExpression { + /** + * Discriminator property for type-safe filter identification. + * Each filter type has a unique filterType value. + */ + abstract readonly filterType: string; + + /** + * Converts the filter expression to a RediSearch query string. + * + * @returns The RediSearch query string representation of this filter + */ + abstract toString(): string; + + /** + * Combine this filter with another using AND logic. + * + * In RediSearch, AND operations are represented by space-separated conditions + * within parentheses: `(condition1 condition2)` + * + * @param other - The filter expression to combine with + * @returns A new AndFilter combining both expressions + * + * @example + * ```typescript + * const filter = Tag("category").eq("books").and(Num("price").lt(30)); + * // Generates: (@category:{books} @price:[-inf 30]) + * ``` + */ + and(other: FilterExpression): FilterExpression { + // eslint-disable-next-line @typescript-eslint/no-use-before-define + return new AndFilter([this, other]); + } + + /** + * Combine this filter with another using OR logic. + * + * In RediSearch, OR operations are represented by pipe-separated conditions + * within parentheses: `(condition1|condition2)` + * + * @param other - The filter expression to combine with + * @returns A new OrFilter combining both expressions + * + * @example + * ```typescript + * const filter = Tag("category").eq("books").or(Tag("category").eq("electronics")); + * // Generates: (@category:{books}|@category:{electronics}) + * ``` + */ + or(other: FilterExpression): FilterExpression { + // eslint-disable-next-line @typescript-eslint/no-use-before-define + return new OrFilter([this, other]); + } +} + +/** + * Logical AND filter for combining multiple filter conditions. + * + * Combines two filter expressions with AND logic. In RediSearch, this is represented + * by space-separated conditions within parentheses. + * + * @see https://redis.io/docs/latest/develop/ai/search-and-query/query/combined/#and + * + * @example + * ```typescript + * const filter = new AndFilter( + * Tag("category").eq("electronics"), + * Num("price").between(100, 500) + * ); + * // Generates: (@category:{electronics} @price:[100 500]) + * ``` + */ +export class AndFilter extends FilterExpression { + readonly filterType = "and" as const; + + constructor(public readonly filters: [FilterExpression, FilterExpression]) { + super(); + } + + toString(): string { + const leftStr = this.filters[0].toString(); + const rightStr = this.filters[1].toString(); + + // Handle wildcard cases - if either side is a wildcard, return the other side + if (leftStr === "*") return rightStr; + if (rightStr === "*") return leftStr; + + return `(${leftStr} ${rightStr})`; + } +} + +/** + * Logical OR filter for combining alternative filter conditions. + * + * Combines two filter expressions with OR logic. In RediSearch, this is represented + * by pipe-separated conditions within parentheses. + * + * @see https://redis.io/docs/latest/develop/ai/search-and-query/query/combined/#or + * + * @example + * ```typescript + * const filter = new OrFilter( + * Tag("brand").eq("Apple"), + * Tag("brand").eq("Samsung") + * ); + * // Generates: (@brand:{Apple}|@brand:{Samsung}) + * ``` + */ +export class OrFilter extends FilterExpression { + readonly filterType = "or" as const; + + constructor(public readonly filters: [FilterExpression, FilterExpression]) { + super(); + } + + toString(): string { + const leftStr = this.filters[0].toString(); + const rightStr = this.filters[1].toString(); + + // Handle wildcard cases - if either side is a wildcard, the entire OR is a wildcard + if (leftStr === "*" || rightStr === "*") return "*"; + + return `(${leftStr}|${rightStr})`; + } +} + +/** + * Tag filter for exact matching on tag fields. + * + * Tag fields in Redis are used for exact-match filtering on categorical data. + * They support efficient filtering on multiple values using OR logic within the tag set. + * + * Tag fields must be indexed with the TAG type in the metadata schema. + * + * @see https://redis.io/docs/latest/develop/ai/search-and-query/advanced-concepts/tags/ + * + * @example + * ```typescript + * // Single value + * const filter = new TagFilter("category", "electronics"); + * // Generates: @category:{electronics} + * + * // Multiple values (OR logic) + * const filter = new TagFilter("category", ["electronics", "books"]); + * // Generates: @category:{electronics|books} + * + * // Negation + * const filter = new TagFilter("category", "electronics", true); + * // Generates: (-@category:{electronics}) + * + * // Using the convenience method + * const filter = Tag("category").eq("electronics"); + * const notFilter = Tag("category").ne("books"); + * ``` + */ +export class TagFilter extends FilterExpression { + readonly filterType = "tag" as const; + + constructor( + private field: string, + private values: string | string[] | Set, + private negate: boolean = false + ) { + super(); + } + + /** + * Creates a builder object for constructing tag filters. + * + * @param field - The name of the tag field to filter on + * @returns An object with `eq` and `ne` methods for creating filters + */ + static create(field: string) { + return { + eq: (values: string | string[] | Set) => + new TagFilter(field, values, false), + ne: (values: string | string[] | Set) => + new TagFilter(field, values, true), + }; + } + + toString(): string { + if ( + !this.values || + (Array.isArray(this.values) && this.values.length === 0) || + (this.values && + typeof this.values === "object" && + "size" in this.values && + this.values.size === 0) + ) { + return "*"; // Return wildcard for empty filters + } + + let valueStr: string; + if (typeof this.values === "string") { + valueStr = this.values; + } else if (Array.isArray(this.values)) { + valueStr = this.values.join("|"); + } else { + valueStr = Array.from(this.values).join("|"); + } + + const filter = `@${this.field}:{${valueStr}}`; + return this.negate ? `(-${filter})` : filter; + } +} + +/** + * Numeric filter for range and exact matching on numeric fields. + * + * Numeric fields in Redis support range queries and exact matching on numerical values. + * They use interval notation where square brackets `[` `]` indicate inclusive bounds + * and parentheses `(` indicate exclusive bounds. + * + * Numeric fields must be indexed with the NUMERIC type in the metadata schema. + * + * @see https://redis.io/docs/latest/develop/ai/search-and-query/query/exact-match/#numeric-field + * @see https://redis.io/docs/latest/develop/ai/search-and-query/query/range/ + * + * @example + * ```typescript + * // Exact match + * const filter = new NumericFilter("price", "eq", 99.99); + * // Generates: @price:[99.99 99.99] + * + * // Greater than (exclusive) + * const filter = new NumericFilter("price", "gt", 50); + * // Generates: @price:[(50 +inf] + * + * // Less than or equal (inclusive) + * const filter = new NumericFilter("price", "lte", 100); + * // Generates: @price:[-inf 100] + * + * // Range (inclusive on both ends) + * const filter = new NumericFilter("price", "between", [50, 200]); + * // Generates: @price:[50 200] + * + * // Using convenience methods + * const filter = Num("price").between(50, 200); + * const filter2 = Num("rating").gte(4.5); + * ``` + */ +export class NumericFilter extends FilterExpression { + readonly filterType = "numeric" as const; + + constructor( + private field: string, + private operator: "eq" | "ne" | "gt" | "gte" | "lt" | "lte" | "between", + private value: number | [number, number], + private negate: boolean = false + ) { + super(); + } + + /** + * Creates a builder object for constructing numeric filters. + * + * @param field - The name of the numeric field to filter on + * @returns An object with comparison methods (eq, ne, gt, gte, lt, lte, between) + */ + static create(field: string) { + return { + eq: (value: number) => new NumericFilter(field, "eq", value), + ne: (value: number) => new NumericFilter(field, "ne", value), + gt: (value: number) => new NumericFilter(field, "gt", value), + gte: (value: number) => new NumericFilter(field, "gte", value), + lt: (value: number) => new NumericFilter(field, "lt", value), + lte: (value: number) => new NumericFilter(field, "lte", value), + between: (min: number, max: number) => + new NumericFilter(field, "between", [min, max]), + }; + } + + toString(): string { + let rangeStr: string; + + switch (this.operator) { + case "eq": + rangeStr = `[${this.value} ${this.value}]`; + break; + case "ne": + return `(-@${this.field}:[${this.value} ${this.value}])`; + case "gt": + // Exclusive lower bound using parenthesis + rangeStr = `[(${this.value} +inf]`; + break; + case "gte": + // Inclusive lower bound using square bracket + rangeStr = `[${this.value} +inf]`; + break; + case "lt": + // Exclusive upper bound using parenthesis + rangeStr = `[-inf (${this.value}]`; + break; + case "lte": + // Inclusive upper bound using square bracket + rangeStr = `[-inf ${this.value}]`; + break; + case "between": + if (Array.isArray(this.value)) { + rangeStr = `[${this.value[0]} ${this.value[1]}]`; + } else { + throw new Error("Between operator requires array of two numbers"); + } + break; + default: + throw new Error(`Unknown numeric operator: ${this.operator}`); + } + + const filter = `@${this.field}:${rangeStr}`; + return this.negate ? `(-${filter})` : filter; + } +} + +/** + * Text filter for full-text search on text fields. + * + * Text fields in Redis support various types of text matching including exact phrases, + * wildcard patterns, and fuzzy matching. Text fields are tokenized and support + * full-text search capabilities. + * + * Text fields must be indexed with the TEXT type in the metadata schema. + * + * @see https://redis.io/docs/latest/develop/ai/search-and-query/query/full-text/ + * + * @example + * ```typescript + * // Exact phrase match + * const filter = new TextFilter("title", "wireless headphones", "exact"); + * // Generates: @title:("wireless headphones") + * + * // Wildcard match (use * for any characters) + * const filter = new TextFilter("title", "head*", "wildcard"); + * // Generates: @title:(head*) + * + * // Fuzzy match (allows typos/variations) + * const filter = new TextFilter("title", "headphone", "fuzzy"); + * // Generates: @title:(%%headphone%%) + * + * // Word match (tokenized search) + * const filter = new TextFilter("description", "bluetooth wireless", "match"); + * // Generates: @title:(bluetooth wireless) + * + * // Using convenience methods + * const filter = Text("title").exact("wireless headphones"); + * const filter2 = Text("title").wildcard("*phone*"); + * const filter3 = Text("description").fuzzy("blutooth"); + * ``` + */ +export class TextFilter extends FilterExpression { + readonly filterType = "text" as const; + + constructor( + private field: string, + private query: string, + private operator: "match" | "wildcard" | "fuzzy" | "exact" = "exact", + private negate: boolean = false + ) { + super(); + } + + /** + * Creates a builder object for constructing text filters. + * + * @param field - The name of the text field to filter on + * @returns An object with text search methods (eq, ne, match, wildcard, fuzzy) + */ + static create(field: string) { + return { + eq: (query: string) => new TextFilter(field, query, "exact"), + ne: (query: string) => new TextFilter(field, query, "exact", true), + match: (query: string) => new TextFilter(field, query, "match"), + wildcard: (query: string) => new TextFilter(field, query, "wildcard"), + fuzzy: (query: string) => new TextFilter(field, query, "fuzzy"), + }; + } + + toString(): string { + if (!this.query || this.query.trim() === "") { + return "*"; // Return wildcard for empty queries + } + + let queryStr: string; + switch (this.operator) { + case "exact": + // Exact phrase match using quotes + queryStr = `"${this.query}"`; + break; + case "match": + // Tokenized word matching + queryStr = this.query; + break; + case "wildcard": + // Wildcard matching - wildcards should be included in the query string + queryStr = this.query; + break; + case "fuzzy": + // Fuzzy matching using %% prefix and suffix + queryStr = `%%${this.query}%%`; + break; + default: + queryStr = this.query; + } + + const filter = `@${this.field}:(${queryStr})`; + return this.negate ? `(-${filter})` : filter; + } +} + +/** + * Geographic filter for location-based searches. + * + * Geo fields in Redis support radius-based geographic queries. They store coordinates + * as longitude,latitude pairs and allow filtering based on distance from a point. + * + * Geo fields must be indexed with the GEO type in the metadata schema. + * Coordinates should be stored as "longitude,latitude" strings or [lon, lat] arrays. + * + * @see https://redis.io/docs/latest/develop/ai/search-and-query/query/geo-spatial/ + * + * @example + * ```typescript + * // Find locations within 10km of San Francisco + * const filter = new GeoFilter("location", -122.4194, 37.7749, 10, "km"); + * // Generates: @location:[-122.4194 37.7749 10 km] + * + * // Find locations within 5 miles of New York + * const filter = new GeoFilter("store_location", -74.0060, 40.7128, 5, "mi"); + * // Generates: @store_location:[-74.0060 40.7128 5 mi] + * + * // Find locations outside a radius (negation) + * const filter = new GeoFilter("location", -122.4194, 37.7749, 50, "km", true); + * // Generates: (-@location:[-122.4194 37.7749 50 km]) + * + * // Using convenience methods + * const filter = Geo("location").within(-122.4194, 37.7749, 10, "km"); + * const filter2 = Geo("location").outside(-74.0060, 40.7128, 100, "mi"); + * ``` + */ +export class GeoFilter extends FilterExpression { + readonly filterType = "geo" as const; + + constructor( + private field: string, + private longitude: number, + private latitude: number, + private radius: number, + private unit: "km" | "mi" | "m" | "ft" = "km", + private negate: boolean = false + ) { + super(); + } + + /** + * Creates a builder object for constructing geographic filters. + * + * @param field - The name of the geo field to filter on + * @returns An object with `within` and `outside` methods for creating geo filters + */ + static create(field: string) { + return { + within: ( + longitude: number, + latitude: number, + radius: number, + unit: "km" | "mi" | "m" | "ft" = "km" + ) => new GeoFilter(field, longitude, latitude, radius, unit), + outside: ( + longitude: number, + latitude: number, + radius: number, + unit: "km" | "mi" | "m" | "ft" = "km" + ) => new GeoFilter(field, longitude, latitude, radius, unit, true), + }; + } + + toString(): string { + const filter = `@${this.field}:[${this.longitude} ${this.latitude} ${this.radius} ${this.unit}]`; + return this.negate ? `(-${filter})` : filter; + } +} + +/** + * Custom filter for providing raw RediSearch query syntax. + * + * This filter allows you to provide a custom RediSearch query string that will be used + * as-is without any modification. This is useful when you need to use advanced RediSearch + * features that are not covered by the other filter types, or when you want complete + * control over the query syntax. + * + * **Warning**: When using custom filters, you are responsible for ensuring the query + * syntax is valid RediSearch syntax. Invalid syntax will cause search queries to fail. + * + * @see https://redis.io/docs/stack/search/reference/query_syntax/ + * + * @example + * ```typescript + * // Simple custom filter + * const filter = new CustomFilter("@category:{electronics}"); + * + * // Complex custom filter with multiple conditions + * const filter = new CustomFilter("(@category:{electronics} @price:[0 100])"); + * + * // Using advanced RediSearch features + * const filter = new CustomFilter("@title:(wireless|bluetooth) @price:[50 200]"); + * + * // Combining with other filters + * const filter = new CustomFilter("@category:{electronics}") + * .and(Num("price").lt(100)); + * + * // Using the convenience function + * const filter = Custom("(@brand:{Apple} @year:[2020 +inf])"); + * ``` + */ +export class CustomFilter extends FilterExpression { + readonly filterType = "custom" as const; + + constructor(private query: string) { + super(); + } + + toString(): string { + return this.query; + } +} + +/** + * Timestamp filter for date/time-based searches. + * + * **Important**: In Redis, there is no separate "timestamp" field type. Timestamps are stored + * as NUMERIC fields containing Unix epoch timestamps (seconds since Jan 1, 1970 UTC). + * + * This filter class is a convenience wrapper that: + * - Automatically converts JavaScript Date objects to Unix epoch timestamps + * - Provides a fluent API for date/time comparisons + * - Generates the correct numeric range queries for Redis + * + * When defining your metadata schema, use `type: "numeric"` for timestamp fields: + * ```typescript + * const schema: MetadataFieldSchema[] = [ + * { name: "created_at", type: "numeric", options: { sortable: true } } + * ]; + * ``` + * + * @see https://redis.io/docs/latest/develop/ai/search-and-query/indexing/field-and-type-options/#numeric-fields + * + * @example + * ```typescript + * // Filter by exact date + * const filter = new TimestampFilter("created_at", "eq", new Date("2023-01-01")); + * // Generates: @created_at:[1672531200 1672531200] + * + * // Filter for dates after a specific time + * const filter = new TimestampFilter("created_at", "gt", new Date("2023-06-01")); + * // Generates: @created_at:[(1685577600 +inf] + * + * // Filter for dates in a range + * const filter = new TimestampFilter( + * "created_at", + * "between", + * [new Date("2023-01-01"), new Date("2023-12-31")] + * ); + * // Generates: @created_at:[1672531200 1703980800] + * + * // Using epoch timestamps directly + * const filter = new TimestampFilter("updated_at", "gte", 1672531200); + * + * // Using convenience methods + * const filter = Timestamp("created_at").gt(new Date("2023-01-01")); + * const filter2 = Timestamp("updated_at").between( + * new Date("2023-01-01"), + * new Date("2023-12-31") + * ); + * ``` + */ +export class TimestampFilter extends FilterExpression { + readonly filterType = "timestamp" as const; + + constructor( + private field: string, + private operator: "eq" | "ne" | "gt" | "gte" | "lt" | "lte" | "between", + private value: Date | number | [Date | number, Date | number], + private negate: boolean = false + ) { + super(); + } + + /** + * Creates a builder object for constructing timestamp filters. + * + * @param field - The name of the timestamp field to filter on + * @returns An object with comparison methods (eq, ne, gt, gte, lt, lte, between) + */ + static create(field: string) { + return { + eq: (value: Date | number) => new TimestampFilter(field, "eq", value), + ne: (value: Date | number) => new TimestampFilter(field, "ne", value), + gt: (value: Date | number) => new TimestampFilter(field, "gt", value), + gte: (value: Date | number) => new TimestampFilter(field, "gte", value), + lt: (value: Date | number) => new TimestampFilter(field, "lt", value), + lte: (value: Date | number) => new TimestampFilter(field, "lte", value), + between: (start: Date | number, end: Date | number) => + new TimestampFilter(field, "between", [start, end]), + }; + } + + /** + * Converts a Date object or number to Unix epoch timestamp (seconds). + * + * @param value - Date object or epoch timestamp + * @returns Unix epoch timestamp in seconds + */ + private toEpoch(value: Date | number): number { + return typeof value === "object" && value && "getTime" in value + ? Math.floor(value.getTime() / 1000) + : (value as number); + } + + toString(): string { + let rangeStr: string; + + switch (this.operator) { + case "eq": { + const eqValue = this.toEpoch(this.value as Date | number); + rangeStr = `[${eqValue} ${eqValue}]`; + break; + } + case "ne": { + const neValue = this.toEpoch(this.value as Date | number); + return `(-@${this.field}:[${neValue} ${neValue}])`; + } + case "gt": { + const gtValue = this.toEpoch(this.value as Date | number); + rangeStr = `[(${gtValue} +inf]`; + break; + } + case "gte": { + const gteValue = this.toEpoch(this.value as Date | number); + rangeStr = `[${gteValue} +inf]`; + break; + } + case "lt": { + const ltValue = this.toEpoch(this.value as Date | number); + rangeStr = `[-inf (${ltValue}]`; + break; + } + case "lte": { + const lteValue = this.toEpoch(this.value as Date | number); + rangeStr = `[-inf ${lteValue}]`; + break; + } + case "between": { + if (Array.isArray(this.value)) { + const startValue = this.toEpoch(this.value[0]); + const endValue = this.toEpoch(this.value[1]); + rangeStr = `[${startValue} ${endValue}]`; + } else { + throw new Error("Between operator requires array of two values"); + } + break; + } + default: + throw new Error(`Unknown timestamp operator: ${this.operator}`); + } + + const filter = `@${this.field}:${rangeStr}`; + return this.negate ? `(-${filter})` : filter; + } +} + +// Convenience functions for creating filters (similar to Python RedisVL) + +/** + * Create a tag filter for exact matching on tag fields. + * + * This is a convenience function that provides a fluent API for building tag filters. + * Tag filters are used for exact-match categorical filtering. + * + * @param field - The name of the tag field to filter on + * @returns An object with `eq` and `ne` methods for creating tag filters + * + * @see https://redis.io/docs/latest/develop/ai/search-and-query/advanced-concepts/tags/ + * + * @example + * ```typescript + * // Single value match + * const filter = Tag("category").eq("electronics"); + * + * // Multiple values (OR logic) + * const filter = Tag("category").eq(["electronics", "books"]); + * + * // Negation + * const filter = Tag("status").ne("archived"); + * + * // Combine with other filters + * const complexFilter = Tag("category").eq("electronics") + * .and(Num("price").lt(100)); + * ``` + */ +export function Tag(field: string) { + return TagFilter.create(field); +} + +/** + * Create a numeric filter for range and exact matching on numeric fields. + * + * This is a convenience function that provides a fluent API for building numeric filters. + * Numeric filters support range queries and exact matching on numerical values. + * + * @param field - The name of the numeric field to filter on + * @returns An object with comparison methods (eq, ne, gt, gte, lt, lte, between) + * + * @see https://redis.io/docs/latest/develop/ai/search-and-query/query/exact-match/#numeric-field + * @see https://redis.io/docs/latest/develop/ai/search-and-query/query/range/ + * + * @example + * ```typescript + * // Exact match + * const filter = Num("price").eq(99.99); + * + * // Range queries + * const filter = Num("price").between(50, 200); + * const filter2 = Num("rating").gte(4.5); + * const filter3 = Num("stock").gt(0); + * + * // Combine with other filters + * const complexFilter = Num("price").between(50, 200) + * .and(Tag("category").eq("electronics")); + * ``` + */ +export function Num(field: string) { + return NumericFilter.create(field); +} + +/** + * Create a text filter for full-text search on text fields. + * + * This is a convenience function that provides a fluent API for building text filters. + * Text filters support exact phrases, wildcard patterns, fuzzy matching, and tokenized search. + * + * @param field - The name of the text field to filter on + * @returns An object with text search methods (eq, ne, match, wildcard, fuzzy) + * + * @see https://redis.io/docs/latest/develop/ai/search-and-query/query/full-text/ + * + * @example + * ```typescript + * // Exact phrase + * const filter = Text("title").eq("wireless headphones"); + * + * // Wildcard search + * const filter = Text("title").wildcard("*phone*"); + * + * // Fuzzy search (tolerates typos) + * const filter = Text("description").fuzzy("blutooth"); + * + * // Tokenized word matching + * const filter = Text("description").match("wireless bluetooth"); + * ``` + */ +export function Text(field: string) { + return TextFilter.create(field); +} + +/** + * Create a geographic filter for location-based searches. + * + * This is a convenience function that provides a fluent API for building geo filters. + * Geo filters support radius-based geographic queries using longitude/latitude coordinates. + * + * @param field - The name of the geo field to filter on + * @returns An object with `within` and `outside` methods for creating geo filters + * + * @see https://redis.io/docs/latest/develop/ai/search-and-query/query/geo-spatial/ + * + * @example + * ```typescript + * // Find locations within radius + * const filter = Geo("location").within(-122.4194, 37.7749, 10, "km"); + * + * // Find locations outside radius + * const filter = Geo("location").outside(-74.0060, 40.7128, 50, "mi"); + * + * // Combine with other filters + * const complexFilter = Geo("store_location").within(-122.4194, 37.7749, 5, "km") + * .and(Tag("store_type").eq("retail")); + * ``` + */ +export function Geo(field: string) { + return GeoFilter.create(field); +} + +/** + * Create a timestamp filter for date/time-based searches. + * + * This is a convenience function that provides a fluent API for building timestamp filters. + * Timestamp filters work with Date objects or Unix epoch timestamps and support range queries. + * + * **Important**: Timestamps are stored as NUMERIC fields in Redis (Unix epoch timestamps). + * When defining your schema, use `type: "numeric"` for timestamp fields. + * + * @param field - The name of the numeric field containing timestamp data + * @returns An object with comparison methods (eq, ne, gt, gte, lt, lte, between) + * + * @see https://redis.io/docs/latest/develop/ai/search-and-query/indexing/field-and-type-options/#numeric-fields + * + * @example + * ```typescript + * // Define schema with numeric type for timestamps + * const schema: MetadataFieldSchema[] = [ + * { name: "created_at", type: "numeric", options: { sortable: true } } + * ]; + * + * // Filter by date + * const filter = Timestamp("created_at").gt(new Date("2023-01-01")); + * + * // Date range + * const filter = Timestamp("created_at").between( + * new Date("2023-01-01"), + * new Date("2023-12-31") + * ); + * + * // Using epoch timestamps + * const filter = Timestamp("updated_at").gte(1672531200); + * + * // Combine with other filters + * const complexFilter = Timestamp("created_at").gt(new Date("2023-01-01")) + * .and(Tag("status").eq("published")); + * ``` + */ +export function Timestamp(field: string) { + return TimestampFilter.create(field); +} + +/** + * Create a custom filter with raw RediSearch query syntax. + * + * This is a convenience function for creating custom filters that use raw RediSearch + * query syntax. The provided query string will be used as-is without any modification. + * + * Use this when you need advanced RediSearch features not covered by the other filter + * types, or when you want complete control over the query syntax. + * + * **Warning**: You are responsible for ensuring the query syntax is valid RediSearch syntax. + * + * @param query - The raw RediSearch query string + * @returns A CustomFilter instance + * + * @see https://redis.io/docs/latest/develop/ai/search-and-query/query/ + * + * @example + * ```typescript + * // Simple custom query + * const filter = Custom("@category:{electronics}"); + * + * // Complex query with multiple conditions + * const filter = Custom("(@category:{electronics} @price:[0 100])"); + * + * // Advanced RediSearch features + * const filter = Custom("@title:(wireless|bluetooth) @price:[50 200]"); + * + * // Combine with other filters + * const complexFilter = Custom("@category:{electronics}") + * .and(Num("price").lt(100)); + * ``` + */ +export function Custom(query: string): CustomFilter { + return new CustomFilter(query); +} diff --git a/libs/providers/langchain-redis/src/index.ts b/libs/providers/langchain-redis/src/index.ts index d5564f7ea878..eaa5b92ff4af 100644 --- a/libs/providers/langchain-redis/src/index.ts +++ b/libs/providers/langchain-redis/src/index.ts @@ -1,3 +1,5 @@ export * from "./chat_histories.js"; export * from "./vectorstores.js"; export * from "./caches.js"; +export * from "./filters.js"; +export * from "./schema.js"; diff --git a/libs/providers/langchain-redis/src/schema.ts b/libs/providers/langchain-redis/src/schema.ts new file mode 100644 index 000000000000..760e379367b9 --- /dev/null +++ b/libs/providers/langchain-redis/src/schema.ts @@ -0,0 +1,623 @@ +/** + * Schema definitions and utilities for Redis vector store indexing. + * + * This module provides types and utilities for defining metadata schemas, + * vector field configurations, and index options for Redis vector stores. + * + * @see https://redis.io/docs/latest/develop/ai/search-and-query/vectors/ + */ + +import type { createClient, RediSearchSchema } from "redis"; +import { SchemaFieldTypes, VectorAlgorithms } from "redis"; +import type { Document } from "@langchain/core/documents"; + +/** + * Default separator character for Redis TAG fields in HASH documents. + * @see https://redis.io/docs/latest/develop/ai/search-and-query/advanced-concepts/tags/ + */ +export const DEFAULT_TAG_SEPARATOR = ","; + +// Adapted from internal redis types which aren't exported +/** + * Type for creating a schema vector field. It includes the algorithm, + * distance metric, and initial capacity. + * + * @see https://redis.io/docs/latest/develop/ai/search-and-query/vectors/#search-with-vectors + */ +export type CreateSchemaVectorField< + T extends VectorAlgorithms, + A extends Record +> = { + /** The vector indexing algorithm to use */ + ALGORITHM: T; + /** The distance metric for similarity calculations */ + DISTANCE_METRIC: "L2" | "IP" | "COSINE"; + /** Initial capacity for the vector index */ + INITIAL_CAP?: number; +} & A; + +/** + * Type for creating a flat schema vector field. + * + * FLAT indexing performs brute-force search, which is accurate but slower for large datasets. + * Best for smaller datasets or when exact results are required. + * + * @example + * ```typescript + * const flatIndex: CreateSchemaFlatVectorField = { + * ALGORITHM: VectorAlgorithms.FLAT, + * DISTANCE_METRIC: "COSINE", + * BLOCK_SIZE: 1000 + * }; + * ``` + */ +export type CreateSchemaFlatVectorField = CreateSchemaVectorField< + VectorAlgorithms.FLAT, + { + /** Block size for the flat index */ + BLOCK_SIZE?: number; + } +>; + +/** + * Type for creating a HNSW schema vector field. + * + * HNSW (Hierarchical Navigable Small World) is an approximate nearest neighbor algorithm + * that provides fast search with good recall. Best for large datasets. + * + * @example + * ```typescript + * const hnswIndex: CreateSchemaHNSWVectorField = { + * ALGORITHM: VectorAlgorithms.HNSW, + * DISTANCE_METRIC: "COSINE", + * M: 16, + * EF_CONSTRUCTION: 200, + * EF_RUNTIME: 10 + * }; + * ``` + */ +export type CreateSchemaHNSWVectorField = CreateSchemaVectorField< + VectorAlgorithms.HNSW, + { + /** Number of outgoing edges per node (default: 16) */ + M?: number; + /** Number of neighbors to explore during construction (default: 200) */ + EF_CONSTRUCTION?: number; + /** Number of neighbors to explore during search (default: 10) */ + EF_RUNTIME?: number; + } +>; + +/** + * Internal type for Redis index creation options. + * Extracted from the Redis client's ft.create method signature. + */ +export type CreateIndexOptions = NonNullable< + Parameters["ft"]["create"]>[3] +>; + +/** + * Supported languages for RediSearch text indexing. + */ +export type RedisSearchLanguages = `${NonNullable< + CreateIndexOptions["LANGUAGE"] +>}`; + +/** + * Options for creating a Redis vector store index. + * + * These options control various aspects of index creation including + * language settings, stopwords, and index behavior. + */ +export type RedisVectorStoreIndexOptions = Omit< + CreateIndexOptions, + "LANGUAGE" +> & { LANGUAGE?: RedisSearchLanguages }; + +/** + * Metadata field schema definition for proper indexing. + * + * Defines how individual metadata fields should be indexed in Redis. + * Each field can have a specific type (tag, text, numeric, geo) + * and type-specific options. + * + * Note: For timestamp fields, use type "numeric" and store values as Unix epoch timestamps. + * The serialization/deserialization utilities will automatically handle Date object conversion. + * + * @see https://redis.io/docs/latest/develop/ai/search-and-query/indexing/field-and-type-options/ + * + * @example + * ```typescript + * const schema: MetadataFieldSchema[] = [ + * { name: "category", type: "tag", options: { separator: "," } }, + * { name: "price", type: "numeric", options: { sortable: true } }, + * { name: "description", type: "text", options: { weight: 2.0 } }, + * { name: "location", type: "geo" }, + * { name: "created_at", type: "numeric", options: { sortable: true } } // For timestamps + * ]; + * ``` + */ +export interface MetadataFieldSchema { + /** Field name in the metadata */ + name: string; + /** + * Field type for indexing. + * - tag: For categorical data with low cardinality (e.g., categories, labels) + * - text: For full-text search on human language text + * - numeric: For numeric values and timestamps (use Unix epoch for timestamps) + * - geo: For geographical coordinates (longitude, latitude) + */ + type: "tag" | "text" | "numeric" | "geo"; + /** Additional field options */ + options?: { + /** For tag fields: separator character (default: DEFAULT_TAG_SEPARATOR which is ",") */ + separator?: string; + /** For tag fields: case-sensitive matching (default: false) */ + caseSensitive?: boolean; + /** For text fields: weight for scoring (default: 1.0) */ + weight?: number; + /** For text fields: disable stemming (default: false) */ + noStem?: boolean; + /** For numeric fields: whether to enable sorting (default: false) */ + sortable?: boolean; + /** For all fields: whether to index the field (default: true, set to true to disable indexing) */ + noindex?: boolean; + }; +} + +/** + * Builds a RediSearch schema from metadata field definitions. + * + * This function builds up a schema based on the metadata field schema definitions. + * + * @param metadataSchema - Array of metadata field schema definitions + * @param defaultSchema - the default RediSearchSchema without considering metadata fields + * @returns a new RediSearchSchema with metadata fields added + * + * @see https://redis.io/docs/latest/develop/ai/search-and-query/indexing/schema-definition/ + * + * @example + * ```typescript + * const schema: RediSearchSchema = { + * content_vector: { type: SchemaFieldTypes.VECTOR, ... }, + * content: SchemaFieldTypes.TEXT + * }; + * + * const metadataSchema: MetadataFieldSchema[] = [ + * { name: "category", type: "tag" }, + * { name: "price", type: "numeric", options: { sortable: true } } + * ]; + * + * const updatedSchema = buildMetadataSchema(metadataSchema, schema); + * // updatedSchema includes category and price fields + * ``` + */ +export function buildMetadataSchema( + metadataSchema: MetadataFieldSchema[], + defaultSchema: RediSearchSchema +): RediSearchSchema { + // Create a new schema object to avoid mutating the input parameter + const updatedSchema = { ...defaultSchema }; + + for (const fieldSchema of metadataSchema) { + switch (fieldSchema.type) { + case "tag": { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const tagOptions: any = { + type: SchemaFieldTypes.TAG, + SEPARATOR: fieldSchema.options?.separator || DEFAULT_TAG_SEPARATOR, + }; + if (fieldSchema.options?.caseSensitive) { + tagOptions.CASESENSITIVE = true; + } + if (fieldSchema.options?.noindex) { + tagOptions.NOINDEX = true; + } + updatedSchema[fieldSchema.name] = tagOptions; + break; + } + case "text": { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const textOptions: any = { + type: SchemaFieldTypes.TEXT, + }; + if (fieldSchema.options?.weight !== undefined) { + textOptions.WEIGHT = fieldSchema.options.weight; + } + if (fieldSchema.options?.noStem) { + textOptions.NOSTEM = true; + } + if (fieldSchema.options?.noindex) { + textOptions.NOINDEX = true; + } + updatedSchema[fieldSchema.name] = textOptions; + break; + } + case "numeric": { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const numericOptions: any = { + type: SchemaFieldTypes.NUMERIC, + }; + if (fieldSchema.options?.sortable) { + numericOptions.SORTABLE = true; + } + if (fieldSchema.options?.noindex) { + numericOptions.NOINDEX = true; + } + updatedSchema[fieldSchema.name] = numericOptions; + break; + } + case "geo": { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const geoOptions: any = { + type: SchemaFieldTypes.GEO, + }; + if (fieldSchema.options?.noindex) { + geoOptions.NOINDEX = true; + } + updatedSchema[fieldSchema.name] = geoOptions; + break; + } + default: + // Default to text for unknown types + updatedSchema[fieldSchema.name] = { + type: SchemaFieldTypes.TEXT, + }; + } + } + + return updatedSchema; +} + +/** + * Serializes metadata field values for storage in Redis based on field type. + * + * Converts JavaScript values to the appropriate format for Redis storage: + * - Tag fields: Arrays joined with separator, or string values + * - Text fields: String values + * - Numeric fields: Number values (Date objects are automatically converted to Unix epoch timestamps in seconds) + * - Geo fields: "longitude,latitude" string format + * - Geoshape fields: WKT (Well-Known Text) format strings + * + * @param fieldSchema - The metadata field schema definition + * @param fieldValue - The value to serialize + * @returns The serialized value ready for Redis storage + * + * @see https://redis.io/docs/latest/develop/ai/search-and-query/indexing/field-and-type-options/ + * + * @example + * ```typescript + * const tagSchema = { name: "category", type: "tag" as const }; + * serializeMetadataField(tagSchema, ["electronics", "gadgets"]); + * // Returns: "electronics|gadgets" + * + * const geoSchema = { name: "location", type: "geo" as const }; + * serializeMetadataField(geoSchema, [-122.4194, 37.7749]); + * // Returns: "-122.4194,37.7749" + * + * const geoshapeSchema = { name: "area", type: "geoshape" as const }; + * serializeMetadataField(geoshapeSchema, "POLYGON((0 0, 0 10, 10 10, 10 0, 0 0))"); + * // Returns: "POLYGON((0 0, 0 10, 10 10, 10 0, 0 0))" + * + * const numericSchema = { name: "created_at", type: "numeric" as const }; + * serializeMetadataField(numericSchema, new Date("2023-01-01")); + * // Returns: 1672531200 (Unix epoch timestamp in seconds) + * + * serializeMetadataField(numericSchema, 42); + * // Returns: 42 + * ``` + */ +export function serializeMetadataField( + fieldSchema: MetadataFieldSchema, + fieldValue: unknown +): string | number { + switch (fieldSchema.type) { + case "tag": + return Array.isArray(fieldValue) + ? fieldValue.join( + fieldSchema.options?.separator || DEFAULT_TAG_SEPARATOR + ) + : String(fieldValue); + case "text": + return String(fieldValue); + case "numeric": { + // Convert Date objects to Unix epoch timestamps (seconds) + // Check if it's a Date by checking for getTime method + if ( + fieldValue && + typeof fieldValue === "object" && + "getTime" in fieldValue && + typeof (fieldValue as Date).getTime === "function" + ) { + return Math.floor((fieldValue as Date).getTime() / 1000); + } + return Number(fieldValue); + } + case "geo": + // Expect geo values as "longitude,latitude" string or [lon, lat] array + if (Array.isArray(fieldValue) && fieldValue.length === 2) { + return `${fieldValue[0]},${fieldValue[1]}`; + } + return String(fieldValue); + default: + return String(fieldValue); + } +} + +/** + * Deserializes metadata field values from Redis storage based on field type. + * + * Converts Redis-stored values back to JavaScript types: + * - Tag fields: Splits separator-delimited strings back to arrays (if separator is present) + * - Numeric fields: Converts to numbers (keeps as number, does NOT convert to Date) + * - Geo fields: Converts "longitude,latitude" strings to [lon, lat] arrays + * - Text fields: Returns as-is + * + * Note: Numeric fields are returned as numbers. If you stored a Date object as a Unix epoch + * timestamp, you'll need to manually convert it back to a Date object if needed: + * `new Date(numericValue * 1000)` + * + * @param fieldSchema - The metadata field schema definition + * @param fieldValue - The value from Redis to deserialize + * @returns The deserialized value in JavaScript format + * + * @see https://redis.io/docs/latest/develop/ai/search-and-query/indexing/field-and-type-options/ + * + * @example + * ```typescript + * const tagSchema = { name: "category", type: "tag" as const }; + * deserializeMetadataField(tagSchema, "electronics|gadgets"); + * // Returns: ["electronics", "gadgets"] + * + * const geoSchema = { name: "location", type: "geo" as const }; + * deserializeMetadataField(geoSchema, "-122.4194,37.7749"); + * // Returns: [-122.4194, 37.7749] + * + * const geoshapeSchema = { name: "area", type: "geoshape" as const }; + * deserializeMetadataField(geoshapeSchema, "POLYGON((0 0, 0 10, 10 10, 10 0, 0 0))"); + * // Returns: "POLYGON((0 0, 0 10, 10 10, 10 0, 0 0))" + * + * const numericSchema = { name: "created_at", type: "numeric" as const }; + * deserializeMetadataField(numericSchema, "1672531200"); + * // Returns: 1672531200 (number) + * // To convert to Date: new Date(1672531200 * 1000) + * + * deserializeMetadataField(numericSchema, "42"); + * // Returns: 42 + * ``` + */ +export function deserializeMetadataField( + fieldSchema: MetadataFieldSchema, + fieldValue: unknown +): unknown { + if (fieldValue === undefined || fieldValue === null) { + return fieldValue; + } + + switch (fieldSchema.type) { + case "tag": { + // Convert back from separator-delimited string if needed + const separator = fieldSchema.options?.separator || DEFAULT_TAG_SEPARATOR; + if (typeof fieldValue === "string" && fieldValue.includes(separator)) { + return fieldValue.split(separator); + } + return fieldValue; + } + case "numeric": + // Return as number (do not convert to Date automatically) + return Number(fieldValue); + case "geo": + // Convert back to [longitude, latitude] array if it's a string + if (typeof fieldValue === "string" && fieldValue.includes(",")) { + const [lon, lat] = fieldValue.split(",").map(Number); + return [lon, lat]; + } + return fieldValue; + default: + return fieldValue; + } +} + +/** + * Infers metadata schema from a collection of documents by analyzing their metadata fields. + * + * This function examines the metadata of all provided documents and attempts to infer + * the appropriate field type for each metadata key based on the values found. + * + * Type inference rules: + * - Strings in "lon,lat" format → geo + * - Numbers or Date objects → numeric + * - Array of any type → tag + * - All other types → text + * + * @param documents - Array of documents to analyze + * @returns Array of inferred metadata field schemas + * + * @example + * ```typescript + * const documents = [ + * { pageContent: "...", metadata: { category: "electronics", price: 99 } }, + * { pageContent: "...", metadata: { category: "books", price: 15 } }, + * ]; + * + * const schema = inferMetadataSchema(documents); + * // Returns: [ + * // { name: "text", type: "tag" }, + * // { name: "price", type: "numeric" } + * // ] + * ``` + */ +export function inferMetadataSchema( + documents: Document[] +): MetadataFieldSchema[] { + if (!documents || documents.length === 0) { + return []; + } + + // Collect all metadata fields and their values (preserve duplicates for cardinality analysis) + const fieldValues = new Map(); + + for (const doc of documents) { + if (!doc.metadata || typeof doc.metadata !== "object") { + continue; + } + + for (const [key, value] of Object.entries(doc.metadata)) { + if (value === undefined || value === null) { + continue; + } + + if (!fieldValues.has(key)) { + fieldValues.set(key, []); + } + fieldValues.get(key)!.push(value); + } + } + + // Infer type for each field + const schema: MetadataFieldSchema[] = []; + + for (const [fieldName, values] of fieldValues.entries()) { + const fieldType = inferFieldType(values); + schema.push({ name: fieldName, type: fieldType }); + } + + return schema; +} + +/** + * Checks if two metadata schemas have a mismatch. + * + * This function compares two metadata schema arrays to determine if they contain + * the same fields with matching types. The comparison is order-independent and + * only considers the non-optional properties (name and type) of each field. + * + * @param customSchema - The custom metadata schema to compare + * @param inferredSchema - The inferred metadata schema to compare against + * @returns `true` if there is a mismatch, `false` if the schemas match + * + * @example + * ```typescript + * const customSchema = [ + * { name: "category", type: "tag" }, + * { name: "price", type: "numeric" } + * ]; + * + * const inferredSchema = [ + * { name: "price", type: "numeric" }, + * { name: "category", type: "tag" } + * ]; + * + * checkForSchemaMismatch(customSchema, inferredSchema); + * // Returns: false (schemas match, order doesn't matter) + * + * const mismatchedSchema = [ + * { name: "category", type: "text" }, // Different type + * { name: "price", type: "numeric" } + * ]; + * + * checkForSchemaMismatch(customSchema, mismatchedSchema); + * // Returns: true (type mismatch for "category") + * ``` + */ +export function checkForSchemaMismatch( + customSchema: MetadataFieldSchema[], + inferredSchema: MetadataFieldSchema[] +): boolean { + // If lengths differ, there's a mismatch + if (customSchema.length !== inferredSchema.length) { + return true; + } + + // Create a map of inferred schema fields by name for quick lookup + const inferredMap = new Map(); + for (const field of inferredSchema) { + inferredMap.set(field.name, field); + } + + // Check if all custom schema fields exist in inferred schema with matching type + for (const customField of customSchema) { + const inferredField = inferredMap.get(customField.name); + + // If field doesn't exist in inferred schema, there's a mismatch + if (!inferredField) { + return true; + } + + // Compare non-optional properties: name and type + if (customField.type !== inferredField.type) { + return true; + } + } + + // All fields match + return false; +} + +/** + * Infers the appropriate field type for a metadata field based on its values. + * + * @param values - Array of sample values for this field + * @returns The inferred field type + */ +function inferFieldType(values: unknown[]): "tag" | "text" | "numeric" | "geo" { + if (values.length === 0) { + return "text"; // Default fallback + } + + // Check if all values are geo coordinates + const allGeo = values.every((value) => isGeoCoordinate(value)); + if (allGeo) { + return "geo"; + } + + // Check if all values are numeric or dates + const allNumeric = values.every((value) => isNumberOrDate(value)); + if (allNumeric) { + return "numeric"; + } + + const allArrays = values.every((value) => Array.isArray(value)); + if (allArrays) { + return "tag"; + } + + // Default to text for all other types + return "text"; +} + +/** + * Checks if a value represents a geo coordinate. + * + * @param value - The value to check + * @returns True if the value is a geo coordinate + */ +function isGeoCoordinate(value: unknown): boolean { + // Check for "longitude,latitude" string format + if (typeof value === "string") { + const parts = value.split(","); + if (parts.length === 2) { + const lon = parseFloat(parts[0].trim()); + const lat = parseFloat(parts[1].trim()); + return !Number.isNaN(lon) && !Number.isNaN(lat); + } + } + + return false; +} + +/** + * Checks if a value is a number or a Date object. + * + * @param value - The value to check + * @returns True if the value is a number or Date + */ +function isNumberOrDate(value: unknown): boolean { + return ( + typeof value === "number" || + (typeof value === "object" && + value !== null && + "getTime" in value && + typeof (value as Date).getTime === "function") + ); +} diff --git a/libs/providers/langchain-redis/src/tests/filters.test.ts b/libs/providers/langchain-redis/src/tests/filters.test.ts new file mode 100644 index 000000000000..35a8c8754995 --- /dev/null +++ b/libs/providers/langchain-redis/src/tests/filters.test.ts @@ -0,0 +1,580 @@ +/* eslint-disable @typescript-eslint/no-explicit-any */ +import { describe, test, expect } from "vitest"; +import { + FilterExpression, + AndFilter, + OrFilter, + TagFilter, + NumericFilter, + TextFilter, + GeoFilter, + TimestampFilter, + CustomFilter, + Tag, + Num, + Text, + Geo, + Timestamp, + Custom, +} from "../filters.js"; + +describe("TagFilter", () => { + test("creates correct query string for single value", () => { + const filter = new TagFilter("category", "electronics"); + expect(filter.toString()).toBe("@category:{electronics}"); + expect(filter.filterType).toBe("tag"); + }); + + test("creates correct query string for array values", () => { + const filter = new TagFilter("category", ["electronics", "books"]); + expect(filter.toString()).toBe("@category:{electronics|books}"); + }); + + test("creates correct query string for Set values", () => { + const filter = new TagFilter("category", new Set(["electronics", "books"])); + expect(filter.toString()).toBe("@category:{electronics|books}"); + }); + + test("creates correct query string for negation", () => { + const filter = new TagFilter("category", "electronics", true); + expect(filter.toString()).toBe("(-@category:{electronics})"); + }); + + test("returns wildcard for empty array", () => { + const filter = new TagFilter("category", []); + expect(filter.toString()).toBe("*"); + }); + + test("returns wildcard for empty Set", () => { + const filter = new TagFilter("category", new Set()); + expect(filter.toString()).toBe("*"); + }); + + test("Tag convenience function works", () => { + const filter = Tag("category").eq("electronics"); + expect(filter.toString()).toBe("@category:{electronics}"); + expect(filter).toBeInstanceOf(TagFilter); + }); + + test("Tag convenience function with ne works", () => { + const filter = Tag("category").ne("archived"); + expect(filter.toString()).toBe("(-@category:{archived})"); + }); +}); + +describe("NumericFilter", () => { + test("creates correct query string for eq", () => { + const filter = new NumericFilter("price", "eq", 100); + expect(filter.toString()).toBe("@price:[100 100]"); + expect(filter.filterType).toBe("numeric"); + }); + + test("creates correct query string for ne", () => { + const filter = new NumericFilter("price", "ne", 100); + expect(filter.toString()).toBe("(-@price:[100 100])"); + }); + + test("creates correct query string for gt", () => { + const filter = new NumericFilter("price", "gt", 50); + expect(filter.toString()).toBe("@price:[(50 +inf]"); + }); + + test("creates correct query string for gte", () => { + const filter = new NumericFilter("price", "gte", 50); + expect(filter.toString()).toBe("@price:[50 +inf]"); + }); + + test("creates correct query string for lt", () => { + const filter = new NumericFilter("price", "lt", 200); + expect(filter.toString()).toBe("@price:[-inf (200]"); + }); + + test("creates correct query string for lte", () => { + const filter = new NumericFilter("price", "lte", 200); + expect(filter.toString()).toBe("@price:[-inf 200]"); + }); + + test("creates correct query string for between", () => { + const filter = new NumericFilter("price", "between", [50, 200]); + expect(filter.toString()).toBe("@price:[50 200]"); + }); + + test("throws error for between without array", () => { + const filter = new NumericFilter("price", "between", 100 as any); + expect(() => filter.toString()).toThrow( + "Between operator requires array of two numbers" + ); + }); + + test("Num convenience function works", () => { + const eqFilter = Num("price").eq(100); + expect(eqFilter.toString()).toBe("@price:[100 100]"); + expect(eqFilter).toBeInstanceOf(NumericFilter); + + const betweenFilter = Num("price").between(50, 200); + expect(betweenFilter.toString()).toBe("@price:[50 200]"); + }); +}); + +describe("TextFilter", () => { + test("creates correct query string for exact match", () => { + const filter = new TextFilter("title", "wireless headphones", "exact"); + expect(filter.toString()).toBe('@title:("wireless headphones")'); + expect(filter.filterType).toBe("text"); + }); + + test("creates correct query string for match", () => { + const filter = new TextFilter("title", "wireless bluetooth", "match"); + expect(filter.toString()).toBe("@title:(wireless bluetooth)"); + }); + + test("creates correct query string for wildcard", () => { + const filter = new TextFilter("title", "head*", "wildcard"); + expect(filter.toString()).toBe("@title:(head*)"); + }); + + test("creates correct query string for fuzzy", () => { + const filter = new TextFilter("title", "headphone", "fuzzy"); + expect(filter.toString()).toBe("@title:(%%headphone%%)"); + }); + + test("creates correct query string for negation", () => { + const filter = new TextFilter("title", "laptop", "exact", true); + expect(filter.toString()).toBe('(-@title:("laptop"))'); + }); + + test("returns wildcard for empty query", () => { + const filter = new TextFilter("title", "", "exact"); + expect(filter.toString()).toBe("*"); + }); + + test("returns wildcard for whitespace-only query", () => { + const filter = new TextFilter("title", " ", "exact"); + expect(filter.toString()).toBe("*"); + }); + + test("Text convenience function works", () => { + const exactFilter = Text("title").eq("wireless headphones"); + expect(exactFilter.toString()).toBe('@title:("wireless headphones")'); + expect(exactFilter).toBeInstanceOf(TextFilter); + + const wildcardFilter = Text("title").wildcard("*phone*"); + expect(wildcardFilter.toString()).toBe("@title:(*phone*)"); + + const fuzzyFilter = Text("description").fuzzy("blutooth"); + expect(fuzzyFilter.toString()).toBe("@description:(%%blutooth%%)"); + + const neFilter = Text("title").ne("archived"); + expect(neFilter.toString()).toBe('(-@title:("archived"))'); + }); +}); + +describe("GeoFilter", () => { + test("creates correct query string for within", () => { + const filter = new GeoFilter("location", -122.4194, 37.7749, 10, "km"); + expect(filter.toString()).toBe("@location:[-122.4194 37.7749 10 km]"); + expect(filter.filterType).toBe("geo"); + }); + + test("creates correct query string for different units", () => { + const kmFilter = new GeoFilter("location", -122.4194, 37.7749, 10, "km"); + expect(kmFilter.toString()).toBe("@location:[-122.4194 37.7749 10 km]"); + + const miFilter = new GeoFilter("location", -122.4194, 37.7749, 5, "mi"); + expect(miFilter.toString()).toBe("@location:[-122.4194 37.7749 5 mi]"); + + const mFilter = new GeoFilter("location", -122.4194, 37.7749, 1000, "m"); + expect(mFilter.toString()).toBe("@location:[-122.4194 37.7749 1000 m]"); + + const ftFilter = new GeoFilter("location", -122.4194, 37.7749, 5000, "ft"); + expect(ftFilter.toString()).toBe("@location:[-122.4194 37.7749 5000 ft]"); + }); + + test("creates correct query string for outside (negation)", () => { + const filter = new GeoFilter( + "location", + -122.4194, + 37.7749, + 10, + "km", + true + ); + expect(filter.toString()).toBe("(-@location:[-122.4194 37.7749 10 km])"); + }); + + test("Geo convenience function works", () => { + const withinFilter = Geo("location").within(-122.4194, 37.7749, 10, "km"); + expect(withinFilter.toString()).toBe("@location:[-122.4194 37.7749 10 km]"); + expect(withinFilter).toBeInstanceOf(GeoFilter); + + const outsideFilter = Geo("location").outside(-74.006, 40.7128, 50, "mi"); + expect(outsideFilter.toString()).toBe( + "(-@location:[-74.006 40.7128 50 mi])" + ); + }); +}); + +describe("TimestampFilter", () => { + const testDate = new Date("2023-01-01T00:00:00Z"); + const testEpoch = Math.floor(testDate.getTime() / 1000); + + test("creates correct query string for eq with Date", () => { + const filter = new TimestampFilter("created_at", "eq", testDate); + expect(filter.toString()).toBe(`@created_at:[${testEpoch} ${testEpoch}]`); + expect(filter.filterType).toBe("timestamp"); + }); + + test("creates correct query string for eq with epoch", () => { + const filter = new TimestampFilter("created_at", "eq", testEpoch); + expect(filter.toString()).toBe(`@created_at:[${testEpoch} ${testEpoch}]`); + }); + + test("creates correct query string for ne", () => { + const filter = new TimestampFilter("created_at", "ne", testDate); + expect(filter.toString()).toBe( + `(-@created_at:[${testEpoch} ${testEpoch}])` + ); + }); + + test("creates correct query string for gt", () => { + const filter = new TimestampFilter("created_at", "gt", testDate); + expect(filter.toString()).toBe(`@created_at:[(${testEpoch} +inf]`); + }); + + test("creates correct query string for gte", () => { + const filter = new TimestampFilter("created_at", "gte", testDate); + expect(filter.toString()).toBe(`@created_at:[${testEpoch} +inf]`); + }); + + test("creates correct query string for lt", () => { + const filter = new TimestampFilter("created_at", "lt", testDate); + expect(filter.toString()).toBe(`@created_at:[-inf (${testEpoch}]`); + }); + + test("creates correct query string for lte", () => { + const filter = new TimestampFilter("created_at", "lte", testDate); + expect(filter.toString()).toBe(`@created_at:[-inf ${testEpoch}]`); + }); + + test("creates correct query string for between with Dates", () => { + const endDate = new Date("2023-12-31T23:59:59Z"); + const endEpoch = Math.floor(endDate.getTime() / 1000); + const filter = new TimestampFilter("created_at", "between", [ + testDate, + endDate, + ]); + expect(filter.toString()).toBe(`@created_at:[${testEpoch} ${endEpoch}]`); + }); + + test("creates correct query string for between with epochs", () => { + const endEpoch = 1703980799; + const filter = new TimestampFilter("created_at", "between", [ + testEpoch, + endEpoch, + ]); + expect(filter.toString()).toBe(`@created_at:[${testEpoch} ${endEpoch}]`); + }); + + test("throws error for between without array", () => { + const filter = new TimestampFilter( + "created_at", + "between", + testDate as any + ); + expect(() => filter.toString()).toThrow( + "Between operator requires array of two values" + ); + }); + + test("Timestamp convenience function works", () => { + const gtFilter = Timestamp("created_at").gt(testDate); + expect(gtFilter.toString()).toBe(`@created_at:[(${testEpoch} +inf]`); + expect(gtFilter).toBeInstanceOf(TimestampFilter); + + const betweenFilter = Timestamp("created_at").between( + testDate, + new Date("2023-12-31") + ); + expect(betweenFilter).toBeInstanceOf(TimestampFilter); + }); +}); + +describe("CustomFilter", () => { + test("returns query string unmodified", () => { + const filter = new CustomFilter("@category:{electronics}"); + expect(filter.toString()).toBe("@category:{electronics}"); + expect(filter.filterType).toBe("custom"); + }); + + test("handles complex custom query", () => { + const filter = new CustomFilter("(@category:{electronics} @price:[0 100])"); + expect(filter.toString()).toBe("(@category:{electronics} @price:[0 100])"); + }); + + test("handles advanced RediSearch syntax", () => { + const filter = new CustomFilter( + "@title:(wireless|bluetooth) @price:[50 200]" + ); + expect(filter.toString()).toBe( + "@title:(wireless|bluetooth) @price:[50 200]" + ); + }); + + test("handles empty string", () => { + const filter = new CustomFilter(""); + expect(filter.toString()).toBe(""); + }); + + test("handles wildcard", () => { + const filter = new CustomFilter("*"); + expect(filter.toString()).toBe("*"); + }); + + test("Custom convenience function works", () => { + const filter = Custom("@brand:{Apple}"); + expect(filter.toString()).toBe("@brand:{Apple}"); + expect(filter).toBeInstanceOf(CustomFilter); + }); + + test("can be combined with other filters using and()", () => { + const customFilter = Custom("@category:{electronics}"); + const priceFilter = Num("price").lt(100); + const combined = customFilter.and(priceFilter); + + expect(combined).toBeInstanceOf(AndFilter); + expect(combined.toString()).toBe( + "(@category:{electronics} @price:[-inf (100])" + ); + }); + + test("can be combined with other filters using or()", () => { + const customFilter = Custom("@category:{electronics}"); + const tagFilter = Tag("brand").eq("Apple"); + const combined = customFilter.or(tagFilter); + + expect(combined).toBeInstanceOf(OrFilter); + expect(combined.toString()).toBe( + "(@category:{electronics}|@brand:{Apple})" + ); + }); + + test("can be used in complex combinations", () => { + const customFilter = Custom("(@brand:{Apple} @year:[2020 +inf])"); + const priceFilter = Num("price").between(500, 2000); + const combined = customFilter.and(priceFilter); + + expect(combined.toString()).toBe( + "((@brand:{Apple} @year:[2020 +inf]) @price:[500 2000])" + ); + }); +}); + +describe("AndFilter", () => { + test("creates correct query string for AND combination", () => { + const tagFilter = new TagFilter("category", "electronics"); + const priceFilter = new NumericFilter("price", "lt", 100); + const andFilter = new AndFilter([tagFilter, priceFilter]); + + expect(andFilter.toString()).toBe( + "(@category:{electronics} @price:[-inf (100])" + ); + expect(andFilter.filterType).toBe("and"); + }); + + test("handles wildcard on left side", () => { + const wildcardFilter = new TagFilter("empty", []); + const priceFilter = new NumericFilter("price", "lt", 100); + const andFilter = new AndFilter([wildcardFilter, priceFilter]); + + expect(andFilter.toString()).toBe("@price:[-inf (100]"); + }); + + test("handles wildcard on right side", () => { + const tagFilter = new TagFilter("category", "electronics"); + const wildcardFilter = new TagFilter("empty", []); + const andFilter = new AndFilter([tagFilter, wildcardFilter]); + + expect(andFilter.toString()).toBe("@category:{electronics}"); + }); + + test("and() method works on FilterExpression", () => { + const tagFilter = Tag("category").eq("electronics"); + const priceFilter = Num("price").lt(100); + const combined = tagFilter.and(priceFilter); + + expect(combined).toBeInstanceOf(AndFilter); + expect(combined.toString()).toBe( + "(@category:{electronics} @price:[-inf (100])" + ); + }); + + test("chaining multiple and() calls works", () => { + const filter1 = Tag("category").eq("electronics"); + const filter2 = Num("price").lt(100); + const filter3 = Num("rating").gte(4); + const combined = filter1.and(filter2).and(filter3); + + expect(combined.toString()).toBe( + "((@category:{electronics} @price:[-inf (100]) @rating:[4 +inf])" + ); + }); +}); + +describe("OrFilter", () => { + test("creates correct query string for OR combination", () => { + const tagFilter = new TagFilter("category", "electronics"); + const priceFilter = new NumericFilter("price", "gt", 500); + const orFilter = new OrFilter([tagFilter, priceFilter]); + + expect(orFilter.toString()).toBe( + "(@category:{electronics}|@price:[(500 +inf])" + ); + expect(orFilter.filterType).toBe("or"); + }); + + test("handles wildcard on left side", () => { + const wildcardFilter = new TagFilter("empty", []); + const priceFilter = new NumericFilter("price", "lt", 100); + const orFilter = new OrFilter([wildcardFilter, priceFilter]); + + expect(orFilter.toString()).toBe("*"); + }); + + test("handles wildcard on right side", () => { + const tagFilter = new TagFilter("category", "electronics"); + const wildcardFilter = new TagFilter("empty", []); + const orFilter = new OrFilter([tagFilter, wildcardFilter]); + + expect(orFilter.toString()).toBe("*"); + }); + + test("or() method works on FilterExpression", () => { + const tagFilter = Tag("category").eq("electronics"); + const priceFilter = Num("price").gt(500); + const combined = tagFilter.or(priceFilter); + + expect(combined).toBeInstanceOf(OrFilter); + expect(combined.toString()).toBe( + "(@category:{electronics}|@price:[(500 +inf])" + ); + }); + + test("chaining multiple or() calls works", () => { + const filter1 = Tag("category").eq("electronics"); + const filter2 = Tag("category").eq("books"); + const filter3 = Tag("category").eq("clothing"); + const combined = filter1.or(filter2).or(filter3); + + expect(combined.toString()).toBe( + "((@category:{electronics}|@category:{books})|@category:{clothing})" + ); + }); +}); + +describe("Complex Filter Combinations", () => { + test("combines AND and OR filters", () => { + const categoryFilter = Tag("category").eq("electronics"); + const priceFilter = Num("price").lt(100); + const ratingFilter = Num("rating").gte(4); + + // (category=electronics AND price<100) OR rating>=4 + const combined = categoryFilter.and(priceFilter).or(ratingFilter); + + expect(combined.toString()).toBe( + "((@category:{electronics} @price:[-inf (100])|@rating:[4 +inf])" + ); + }); + + test("combines all filter types", () => { + const tagFilter = Tag("category").eq("electronics"); + const textFilter = Text("title").match("wireless"); + const numFilter = Num("price").between(50, 200); + const geoFilter = Geo("location").within(-122.4194, 37.7749, 10, "km"); + const tsFilter = Timestamp("created_at").gt(new Date("2023-01-01")); + + const combined = tagFilter + .and(textFilter) + .and(numFilter) + .and(geoFilter) + .and(tsFilter); + + expect(combined).toBeInstanceOf(AndFilter); + expect(combined.toString()).toContain("@category:{electronics}"); + expect(combined.toString()).toContain("@title:(wireless)"); + expect(combined.toString()).toContain("@price:[50 200]"); + expect(combined.toString()).toContain( + "@location:[-122.4194 37.7749 10 km]" + ); + expect(combined.toString()).toContain("@created_at:"); + }); + + test("nested combinations work correctly", () => { + // (category=electronics OR category=books) AND price<100 + const electronicsFilter = Tag("category").eq("electronics"); + const booksFilter = Tag("category").eq("books"); + const priceFilter = Num("price").lt(100); + + const categoryOr = electronicsFilter.or(booksFilter); + const combined = categoryOr.and(priceFilter); + + expect(combined.toString()).toBe( + "((@category:{electronics}|@category:{books}) @price:[-inf (100])" + ); + }); + + test("complex nested structure", () => { + // ((category=electronics AND price<100) OR (category=books AND price<20)) AND rating>=4 + const electronics = Tag("category") + .eq("electronics") + .and(Num("price").lt(100)); + const books = Tag("category").eq("books").and(Num("price").lt(20)); + const rating = Num("rating").gte(4); + + const combined = electronics.or(books).and(rating); + + expect(combined.toString()).toContain("@category:{electronics}"); + expect(combined.toString()).toContain("@category:{books}"); + expect(combined.toString()).toContain("@rating:[4 +inf]"); + }); +}); + +describe("FilterExpression base class", () => { + test("all filter types extend FilterExpression", () => { + const tagFilter = new TagFilter("category", "electronics"); + const numFilter = new NumericFilter("price", "eq", 100); + const textFilter = new TextFilter("title", "laptop", "exact"); + const geoFilter = new GeoFilter("location", -122.4194, 37.7749, 10, "km"); + const tsFilter = new TimestampFilter("created_at", "eq", new Date()); + const customFilter = new CustomFilter("@field:{value}"); + const andFilter = new AndFilter([tagFilter, numFilter]); + const orFilter = new OrFilter([tagFilter, numFilter]); + + expect(tagFilter).toBeInstanceOf(FilterExpression); + expect(numFilter).toBeInstanceOf(FilterExpression); + expect(textFilter).toBeInstanceOf(FilterExpression); + expect(geoFilter).toBeInstanceOf(FilterExpression); + expect(tsFilter).toBeInstanceOf(FilterExpression); + expect(customFilter).toBeInstanceOf(FilterExpression); + expect(andFilter).toBeInstanceOf(FilterExpression); + expect(orFilter).toBeInstanceOf(FilterExpression); + }); + + test("all filters have filterType property", () => { + expect(new TagFilter("f", "v").filterType).toBe("tag"); + expect(new NumericFilter("f", "eq", 1).filterType).toBe("numeric"); + expect(new TextFilter("f", "v", "exact").filterType).toBe("text"); + expect(new GeoFilter("f", 0, 0, 1, "km").filterType).toBe("geo"); + expect(new TimestampFilter("f", "eq", new Date()).filterType).toBe( + "timestamp" + ); + expect(new CustomFilter("@f:{v}").filterType).toBe("custom"); + expect( + new AndFilter([new TagFilter("f", "v"), new TagFilter("f2", "v2")]) + .filterType + ).toBe("and"); + expect( + new OrFilter([new TagFilter("f", "v"), new TagFilter("f2", "v2")]) + .filterType + ).toBe("or"); + }); +}); diff --git a/libs/providers/langchain-redis/src/tests/schema.test.ts b/libs/providers/langchain-redis/src/tests/schema.test.ts new file mode 100644 index 000000000000..29e624728504 --- /dev/null +++ b/libs/providers/langchain-redis/src/tests/schema.test.ts @@ -0,0 +1,832 @@ +/* eslint-disable @typescript-eslint/no-explicit-any */ +import { describe, test, expect } from "vitest"; +import { SchemaFieldTypes } from "redis"; +import { Document } from "@langchain/core/documents"; +import { + buildMetadataSchema, + serializeMetadataField, + deserializeMetadataField, + inferMetadataSchema, + checkForSchemaMismatch, + MetadataFieldSchema, + DEFAULT_TAG_SEPARATOR, +} from "../schema.js"; + +describe("buildMetadataSchema", () => { + test("builds schema for tag field with default options", () => { + const metadataSchema: MetadataFieldSchema[] = [ + { name: "category", type: "tag" }, + ]; + const schema: any = {}; + + const result = buildMetadataSchema(metadataSchema, schema); + + expect(result.category).toEqual({ + type: SchemaFieldTypes.TAG, + SEPARATOR: DEFAULT_TAG_SEPARATOR, + }); + }); + + test("builds schema for tag field with custom separator", () => { + const metadataSchema: MetadataFieldSchema[] = [ + { name: "category", type: "tag", options: { separator: "," } }, + ]; + const schema: any = {}; + + const result = buildMetadataSchema(metadataSchema, schema); + + expect(result.category).toEqual({ + type: SchemaFieldTypes.TAG, + SEPARATOR: ",", + }); + }); + + test("builds schema for tag field with case sensitive option", () => { + const metadataSchema: MetadataFieldSchema[] = [ + { name: "category", type: "tag", options: { caseSensitive: true } }, + ]; + const schema: any = {}; + + const result = buildMetadataSchema(metadataSchema, schema); + + expect(result.category).toEqual({ + type: SchemaFieldTypes.TAG, + SEPARATOR: DEFAULT_TAG_SEPARATOR, + CASESENSITIVE: true, + }); + }); + + test("builds schema for tag field with noindex option", () => { + const metadataSchema: MetadataFieldSchema[] = [ + { name: "category", type: "tag", options: { noindex: true } }, + ]; + const schema: any = {}; + + const result = buildMetadataSchema(metadataSchema, schema); + + expect(result.category).toEqual({ + type: SchemaFieldTypes.TAG, + SEPARATOR: DEFAULT_TAG_SEPARATOR, + NOINDEX: true, + }); + }); + + test("builds schema for text field with default options", () => { + const metadataSchema: MetadataFieldSchema[] = [ + { name: "description", type: "text" }, + ]; + const schema: any = {}; + + const result = buildMetadataSchema(metadataSchema, schema); + + expect(result.description).toEqual({ + type: SchemaFieldTypes.TEXT, + }); + }); + + test("builds schema for text field with weight option", () => { + const metadataSchema: MetadataFieldSchema[] = [ + { name: "description", type: "text", options: { weight: 2.0 } }, + ]; + const schema: any = {}; + + const result = buildMetadataSchema(metadataSchema, schema); + + expect(result.description).toEqual({ + type: SchemaFieldTypes.TEXT, + WEIGHT: 2.0, + }); + }); + + test("builds schema for text field with noStem option", () => { + const metadataSchema: MetadataFieldSchema[] = [ + { name: "description", type: "text", options: { noStem: true } }, + ]; + const schema: any = {}; + + const result = buildMetadataSchema(metadataSchema, schema); + + expect(result.description).toEqual({ + type: SchemaFieldTypes.TEXT, + NOSTEM: true, + }); + }); + + test("builds schema for text field with noindex option", () => { + const metadataSchema: MetadataFieldSchema[] = [ + { name: "description", type: "text", options: { noindex: true } }, + ]; + const schema: any = {}; + + const result = buildMetadataSchema(metadataSchema, schema); + + expect(result.description).toEqual({ + type: SchemaFieldTypes.TEXT, + NOINDEX: true, + }); + }); + + test("builds schema for numeric field with default options", () => { + const metadataSchema: MetadataFieldSchema[] = [ + { name: "price", type: "numeric" }, + ]; + const schema: any = {}; + + const result = buildMetadataSchema(metadataSchema, schema); + + expect(result.price).toEqual({ + type: SchemaFieldTypes.NUMERIC, + }); + }); + + test("builds schema for numeric field with sortable option", () => { + const metadataSchema: MetadataFieldSchema[] = [ + { name: "price", type: "numeric", options: { sortable: true } }, + ]; + const schema: any = {}; + + const result = buildMetadataSchema(metadataSchema, schema); + + expect(result.price).toEqual({ + type: SchemaFieldTypes.NUMERIC, + SORTABLE: true, + }); + }); + + test("builds schema for numeric field with noindex option", () => { + const metadataSchema: MetadataFieldSchema[] = [ + { name: "price", type: "numeric", options: { noindex: true } }, + ]; + const schema: any = {}; + + const result = buildMetadataSchema(metadataSchema, schema); + + expect(result.price).toEqual({ + type: SchemaFieldTypes.NUMERIC, + NOINDEX: true, + }); + }); + + test("builds schema for geo field", () => { + const metadataSchema: MetadataFieldSchema[] = [ + { name: "location", type: "geo" }, + ]; + const schema: any = {}; + + const result = buildMetadataSchema(metadataSchema, schema); + + expect(result.location).toEqual({ + type: SchemaFieldTypes.GEO, + }); + }); + + test("builds schema for geo field with noindex option", () => { + const metadataSchema: MetadataFieldSchema[] = [ + { name: "location", type: "geo", options: { noindex: true } }, + ]; + const schema: any = {}; + + const result = buildMetadataSchema(metadataSchema, schema); + + expect(result.location).toEqual({ + type: SchemaFieldTypes.GEO, + NOINDEX: true, + }); + }); + + test("builds schema for multiple fields", () => { + const metadataSchema: MetadataFieldSchema[] = [ + { name: "category", type: "tag" }, + { name: "price", type: "numeric", options: { sortable: true } }, + { name: "description", type: "text", options: { weight: 2.0 } }, + { name: "location", type: "geo" }, + ]; + const schema: any = {}; + + const result = buildMetadataSchema(metadataSchema, schema); + + expect(result.category).toBeDefined(); + expect(result.price).toBeDefined(); + expect(result.description).toBeDefined(); + expect(result.location).toBeDefined(); + expect(Object.keys(result)).toHaveLength(4); + }); + + test("handles unknown field type by defaulting to text", () => { + const metadataSchema: MetadataFieldSchema[] = [ + { name: "unknown", type: "unknown" as any }, + ]; + const schema: any = {}; + + const result = buildMetadataSchema(metadataSchema, schema); + + expect(result.unknown).toEqual({ + type: SchemaFieldTypes.TEXT, + }); + }); + + test("preserves existing schema fields", () => { + const metadataSchema: MetadataFieldSchema[] = [ + { name: "category", type: "tag" }, + ]; + const schema: any = { + existingField: { type: SchemaFieldTypes.TEXT }, + }; + + const result = buildMetadataSchema(metadataSchema, schema); + + expect(result.existingField).toBeDefined(); + expect(result.category).toBeDefined(); + expect(Object.keys(result)).toHaveLength(2); + }); +}); + +describe("serializeMetadataField", () => { + test("serializes tag field with string value", () => { + const schema: MetadataFieldSchema = { name: "category", type: "tag" }; + const result = serializeMetadataField(schema, "electronics"); + expect(result).toBe("electronics"); + }); + + test("serializes tag field with array value using default separator", () => { + const schema: MetadataFieldSchema = { name: "category", type: "tag" }; + const result = serializeMetadataField(schema, ["electronics", "gadgets"]); + expect(result).toBe("electronics,gadgets"); + }); + + test("serializes tag field with array value using custom separator", () => { + const schema: MetadataFieldSchema = { + name: "category", + type: "tag", + options: { separator: "|" }, + }; + const result = serializeMetadataField(schema, ["electronics", "gadgets"]); + expect(result).toBe("electronics|gadgets"); + }); + + test("serializes text field", () => { + const schema: MetadataFieldSchema = { name: "description", type: "text" }; + const result = serializeMetadataField(schema, "A great product"); + expect(result).toBe("A great product"); + }); + + test("serializes numeric field with number", () => { + const schema: MetadataFieldSchema = { name: "price", type: "numeric" }; + const result = serializeMetadataField(schema, 99.99); + expect(result).toBe(99.99); + }); + + test("serializes numeric field with Date object", () => { + const schema: MetadataFieldSchema = { name: "created_at", type: "numeric" }; + const date = new Date("2023-01-01T00:00:00Z"); + const result = serializeMetadataField(schema, date); + expect(result).toBe(Math.floor(date.getTime() / 1000)); + }); + + test("serializes geo field with array", () => { + const schema: MetadataFieldSchema = { name: "location", type: "geo" }; + const result = serializeMetadataField(schema, [-122.4194, 37.7749]); + expect(result).toBe("-122.4194,37.7749"); + }); + + test("serializes geo field with string", () => { + const schema: MetadataFieldSchema = { name: "location", type: "geo" }; + const result = serializeMetadataField(schema, "-122.4194,37.7749"); + expect(result).toBe("-122.4194,37.7749"); + }); + + test("serializes unknown type as string", () => { + const schema: MetadataFieldSchema = { + name: "unknown", + type: "unknown" as any, + }; + const result = serializeMetadataField(schema, "some value"); + expect(result).toBe("some value"); + }); +}); + +describe("deserializeMetadataField", () => { + test("deserializes tag field with simple string", () => { + const schema: MetadataFieldSchema = { name: "category", type: "tag" }; + const result = deserializeMetadataField(schema, "electronics"); + expect(result).toBe("electronics"); + }); + + test("deserializes tag field with separator-delimited string using default separator", () => { + const schema: MetadataFieldSchema = { name: "category", type: "tag" }; + const result = deserializeMetadataField(schema, "electronics,gadgets"); + expect(result).toEqual(["electronics", "gadgets"]); + }); + + test("deserializes tag field with separator-delimited string using custom separator", () => { + const schema: MetadataFieldSchema = { + name: "category", + type: "tag", + options: { separator: "|" }, + }; + const result = deserializeMetadataField(schema, "electronics|gadgets"); + expect(result).toEqual(["electronics", "gadgets"]); + }); + + test("deserializes tag field without separator as-is", () => { + const schema: MetadataFieldSchema = { name: "category", type: "tag" }; + const result = deserializeMetadataField(schema, "electronics"); + expect(result).toBe("electronics"); + }); + + test("deserializes text field", () => { + const schema: MetadataFieldSchema = { name: "description", type: "text" }; + const result = deserializeMetadataField(schema, "A great product"); + expect(result).toBe("A great product"); + }); + + test("deserializes numeric field from string", () => { + const schema: MetadataFieldSchema = { name: "price", type: "numeric" }; + const result = deserializeMetadataField(schema, "99.99"); + expect(result).toBe(99.99); + }); + + test("deserializes numeric field from number", () => { + const schema: MetadataFieldSchema = { name: "price", type: "numeric" }; + const result = deserializeMetadataField(schema, 99.99); + expect(result).toBe(99.99); + }); + + test("deserializes numeric field (timestamp) as number, not Date", () => { + const schema: MetadataFieldSchema = { name: "created_at", type: "numeric" }; + const epoch = 1672531200; + const result = deserializeMetadataField(schema, epoch.toString()); + expect(result).toBe(epoch); + expect(typeof result).toBe("number"); + }); + + test("deserializes geo field from string to array", () => { + const schema: MetadataFieldSchema = { name: "location", type: "geo" }; + const result = deserializeMetadataField(schema, "-122.4194,37.7749"); + expect(result).toEqual([-122.4194, 37.7749]); + }); + + test("deserializes geo field without comma as-is", () => { + const schema: MetadataFieldSchema = { name: "location", type: "geo" }; + const result = deserializeMetadataField(schema, "invalid"); + expect(result).toBe("invalid"); + }); + + test("handles undefined value", () => { + const schema: MetadataFieldSchema = { name: "category", type: "tag" }; + const result = deserializeMetadataField(schema, undefined); + expect(result).toBeUndefined(); + }); + + test("handles null value", () => { + const schema: MetadataFieldSchema = { name: "category", type: "tag" }; + const result = deserializeMetadataField(schema, null); + expect(result).toBeNull(); + }); + + test("deserializes unknown type as-is", () => { + const schema: MetadataFieldSchema = { + name: "unknown", + type: "unknown" as any, + }; + const result = deserializeMetadataField(schema, "some value"); + expect(result).toBe("some value"); + }); +}); + +describe("Serialization and Deserialization Round-trip", () => { + test("tag field round-trip with array", () => { + const schema: MetadataFieldSchema = { name: "category", type: "tag" }; + const original = ["electronics", "gadgets"]; + const serialized = serializeMetadataField(schema, original); + const deserialized = deserializeMetadataField(schema, serialized); + expect(deserialized).toEqual(original); + }); + + test("tag field round-trip with custom separator", () => { + const schema: MetadataFieldSchema = { + name: "category", + type: "tag", + options: { separator: "|" }, + }; + const original = ["electronics", "gadgets", "tech"]; + const serialized = serializeMetadataField(schema, original); + const deserialized = deserializeMetadataField(schema, serialized); + expect(deserialized).toEqual(original); + }); + + test("numeric field round-trip with Date", () => { + const schema: MetadataFieldSchema = { name: "created_at", type: "numeric" }; + const originalDate = new Date("2023-01-01T00:00:00Z"); + const serialized = serializeMetadataField(schema, originalDate); + const deserialized = deserializeMetadataField(schema, serialized); + + // Deserialized value is a number (epoch timestamp) + expect(typeof deserialized).toBe("number"); + expect(deserialized).toBe(Math.floor(originalDate.getTime() / 1000)); + + // Can be converted back to Date + const reconstructedDate = new Date((deserialized as number) * 1000); + expect(reconstructedDate.getTime()).toBe(originalDate.getTime()); + }); + + test("geo field round-trip with array", () => { + const schema: MetadataFieldSchema = { name: "location", type: "geo" }; + const original = [-122.4194, 37.7749]; + const serialized = serializeMetadataField(schema, original); + const deserialized = deserializeMetadataField(schema, serialized); + expect(deserialized).toEqual(original); + }); + + test("text field round-trip", () => { + const schema: MetadataFieldSchema = { name: "description", type: "text" }; + const original = "A great product with amazing features"; + const serialized = serializeMetadataField(schema, original); + const deserialized = deserializeMetadataField(schema, serialized); + expect(deserialized).toBe(original); + }); + + test("numeric field round-trip with number", () => { + const schema: MetadataFieldSchema = { name: "price", type: "numeric" }; + const original = 99.99; + const serialized = serializeMetadataField(schema, original); + const deserialized = deserializeMetadataField(schema, serialized); + expect(deserialized).toBe(original); + }); +}); + +describe("Edge Cases", () => { + test("handles empty array for tag field", () => { + const schema: MetadataFieldSchema = { name: "category", type: "tag" }; + const serialized = serializeMetadataField(schema, []); + expect(serialized).toBe(""); + }); + + test("handles single-element array for tag field", () => { + const schema: MetadataFieldSchema = { name: "category", type: "tag" }; + const serialized = serializeMetadataField(schema, ["electronics"]); + expect(serialized).toBe("electronics"); + }); + + test("handles zero for numeric field", () => { + const schema: MetadataFieldSchema = { name: "price", type: "numeric" }; + const serialized = serializeMetadataField(schema, 0); + expect(serialized).toBe(0); + const deserialized = deserializeMetadataField(schema, serialized); + expect(deserialized).toBe(0); + }); + + test("handles negative numbers for numeric field", () => { + const schema: MetadataFieldSchema = { + name: "temperature", + type: "numeric", + }; + const serialized = serializeMetadataField(schema, -10.5); + expect(serialized).toBe(-10.5); + }); + + test("handles negative coordinates for geo field", () => { + const schema: MetadataFieldSchema = { name: "location", type: "geo" }; + const serialized = serializeMetadataField(schema, [-122.4194, -37.7749]); + expect(serialized).toBe("-122.4194,-37.7749"); + const deserialized = deserializeMetadataField(schema, serialized); + expect(deserialized).toEqual([-122.4194, -37.7749]); + }); + + test("handles empty string for text field", () => { + const schema: MetadataFieldSchema = { name: "description", type: "text" }; + const serialized = serializeMetadataField(schema, ""); + expect(serialized).toBe(""); + }); +}); + +describe("inferMetadataSchema", () => { + test("infers numeric type for number fields", () => { + const documents = [ + new Document({ pageContent: "doc1", metadata: { price: 99 } }), + new Document({ pageContent: "doc2", metadata: { price: 150 } }), + new Document({ pageContent: "doc3", metadata: { price: 75 } }), + ]; + + const schema = inferMetadataSchema(documents); + + expect(schema).toHaveLength(1); + expect(schema[0]).toEqual({ name: "price", type: "numeric" }); + }); + + test("infers tag type for short categorical strings", () => { + const documents = [ + new Document({ pageContent: "doc1", metadata: { category: ["tech"] } }), + new Document({ pageContent: "doc2", metadata: { category: ["books"] } }), + new Document({ pageContent: "doc3", metadata: { category: ["tech"] } }), + new Document({ pageContent: "doc4", metadata: { category: ["books"] } }), + new Document({ pageContent: "doc5", metadata: { category: ["tech"] } }), + new Document({ + pageContent: "doc6", + metadata: { category: ["books", "tech"] }, + }), + ]; + + const schema = inferMetadataSchema(documents); + + expect(schema).toHaveLength(1); + expect(schema[0]).toEqual({ name: "category", type: "tag" }); + }); + + test("infers text type for long strings", () => { + const documents = [ + new Document({ + pageContent: "doc1", + metadata: { + description: + "This is a very long description that should be indexed as text for full-text search", + }, + }), + new Document({ + pageContent: "doc2", + metadata: { + description: + "Another long description with different content for testing purposes", + }, + }), + ]; + + const schema = inferMetadataSchema(documents); + + expect(schema).toHaveLength(1); + expect(schema[0]).toEqual({ name: "description", type: "text" }); + }); + + test("infers geo type for coordinate arrays", () => { + const documents = [ + new Document({ + pageContent: "doc1", + metadata: { location: "-122.4194, 37.7749" }, + }), + new Document({ + pageContent: "doc2", + metadata: { location: "-118.2437, 34.0522" }, + }), + ]; + + const schema = inferMetadataSchema(documents); + + expect(schema).toHaveLength(1); + expect(schema[0]).toEqual({ name: "location", type: "geo" }); + }); + + test("infers geo type for coordinate strings", () => { + const documents = [ + new Document({ + pageContent: "doc1", + metadata: { location: "-122.4194,37.7749" }, + }), + new Document({ + pageContent: "doc2", + metadata: { location: "-118.2437,34.0522" }, + }), + ]; + + const schema = inferMetadataSchema(documents); + + expect(schema).toHaveLength(1); + expect(schema[0]).toEqual({ name: "location", type: "geo" }); + }); + + test("infers numeric type for Date objects", () => { + const documents = [ + new Document({ + pageContent: "doc1", + metadata: { created_at: new Date("2023-01-01") }, + }), + new Document({ + pageContent: "doc2", + metadata: { created_at: new Date("2023-06-15") }, + }), + ]; + + const schema = inferMetadataSchema(documents); + + expect(schema).toHaveLength(1); + expect(schema[0]).toEqual({ name: "created_at", type: "numeric" }); + }); + + test("infers schema for multiple fields", () => { + const documents = [ + new Document({ + pageContent: "doc1", + metadata: { + category: ["tech"], + price: 99, + description: "A great product with many features", + location: "-122.4194, 37.7749", + }, + }), + new Document({ + pageContent: "doc2", + metadata: { + category: ["books"], + price: 15, + description: "An interesting book about technology", + location: "-118.2437, 34.0522", + }, + }), + new Document({ + pageContent: "doc3", + metadata: { + category: ["tech"], + price: 50, + description: "Another tech product for testing", + location: "-73.935242, 40.730610", + }, + }), + new Document({ + pageContent: "doc4", + metadata: { + category: ["books"], + price: 25, + description: "A comprehensive guide to programming", + location: "-0.127758, 51.507351]", + }, + }), + ]; + + const schema = inferMetadataSchema(documents); + + expect(schema).toHaveLength(4); + expect(schema).toContainEqual({ name: "category", type: "tag" }); + expect(schema).toContainEqual({ name: "price", type: "numeric" }); + expect(schema).toContainEqual({ name: "description", type: "text" }); + expect(schema).toContainEqual({ name: "location", type: "geo" }); + }); + + test("handles empty documents array", () => { + const schema = inferMetadataSchema([]); + expect(schema).toEqual([]); + }); + + test("handles documents with no metadata", () => { + const documents = [ + new Document({ pageContent: "doc1" }), + new Document({ pageContent: "doc2" }), + ]; + + const schema = inferMetadataSchema(documents); + expect(schema).toEqual([]); + }); + + test("handles documents with null/undefined metadata values", () => { + const documents = [ + new Document({ pageContent: "doc1", metadata: { status: null } }), + new Document({ pageContent: "doc2", metadata: { status: undefined } }), + new Document({ pageContent: "doc3", metadata: { status: "new" } }), + new Document({ pageContent: "doc4", metadata: { status: "new" } }), + new Document({ pageContent: "doc5", metadata: { status: "old" } }), + new Document({ pageContent: "doc6", metadata: { status: "new" } }), + ]; + + const schema = inferMetadataSchema(documents); + + expect(schema).toHaveLength(1); + expect(schema[0]).toEqual({ name: "status", type: "text" }); + }); +}); + +describe("checkForSchemaMismatch", () => { + test("returns false when schemas match exactly", () => { + const customSchema: MetadataFieldSchema[] = [ + { name: "category", type: "tag" }, + { name: "price", type: "numeric" }, + ]; + + const inferredSchema: MetadataFieldSchema[] = [ + { name: "category", type: "tag" }, + { name: "price", type: "numeric" }, + ]; + + const result = checkForSchemaMismatch(customSchema, inferredSchema); + expect(result).toBe(false); + }); + + test("returns false when schemas match in different order", () => { + const customSchema: MetadataFieldSchema[] = [ + { name: "price", type: "numeric" }, + { name: "category", type: "tag" }, + ]; + + const inferredSchema: MetadataFieldSchema[] = [ + { name: "category", type: "tag" }, + { name: "price", type: "numeric" }, + ]; + + const result = checkForSchemaMismatch(customSchema, inferredSchema); + expect(result).toBe(false); + }); + + test("returns true when field types differ", () => { + const customSchema: MetadataFieldSchema[] = [ + { name: "category", type: "text" }, // Different type + { name: "price", type: "numeric" }, + ]; + + const inferredSchema: MetadataFieldSchema[] = [ + { name: "category", type: "tag" }, + { name: "price", type: "numeric" }, + ]; + + const result = checkForSchemaMismatch(customSchema, inferredSchema); + expect(result).toBe(true); + }); + + test("returns true when custom schema has extra fields", () => { + const customSchema: MetadataFieldSchema[] = [ + { name: "category", type: "tag" }, + { name: "price", type: "numeric" }, + { name: "extra", type: "text" }, // Extra field + ]; + + const inferredSchema: MetadataFieldSchema[] = [ + { name: "category", type: "tag" }, + { name: "price", type: "numeric" }, + ]; + + const result = checkForSchemaMismatch(customSchema, inferredSchema); + expect(result).toBe(true); + }); + + test("returns true when custom schema is missing fields", () => { + const customSchema: MetadataFieldSchema[] = [ + { name: "category", type: "tag" }, + // Missing price field + ]; + + const inferredSchema: MetadataFieldSchema[] = [ + { name: "category", type: "tag" }, + { name: "price", type: "numeric" }, + ]; + + const result = checkForSchemaMismatch(customSchema, inferredSchema); + expect(result).toBe(true); + }); + + test("ignores optional properties when comparing schemas", () => { + const customSchema: MetadataFieldSchema[] = [ + { + name: "category", + type: "tag", + options: { separator: "|", caseSensitive: true }, + }, + { name: "price", type: "numeric", options: { sortable: true } }, + ]; + + const inferredSchema: MetadataFieldSchema[] = [ + { name: "category", type: "tag" }, + { name: "price", type: "numeric" }, + ]; + + const result = checkForSchemaMismatch(customSchema, inferredSchema); + expect(result).toBe(false); + }); + + test("handles empty schemas", () => { + const customSchema: MetadataFieldSchema[] = []; + const inferredSchema: MetadataFieldSchema[] = []; + + const result = checkForSchemaMismatch(customSchema, inferredSchema); + expect(result).toBe(false); + }); + + test("returns true when field names differ", () => { + const customSchema: MetadataFieldSchema[] = [ + { name: "category", type: "tag" }, + { name: "price", type: "numeric" }, + ]; + + const inferredSchema: MetadataFieldSchema[] = [ + { name: "category", type: "tag" }, + { name: "cost", type: "numeric" }, // Different name + ]; + + const result = checkForSchemaMismatch(customSchema, inferredSchema); + expect(result).toBe(true); + }); + + test("handles schemas with all field types", () => { + const customSchema: MetadataFieldSchema[] = [ + { name: "category", type: "tag" }, + { name: "description", type: "text" }, + { name: "price", type: "numeric" }, + { name: "location", type: "geo" }, + ]; + + const inferredSchema: MetadataFieldSchema[] = [ + { name: "category", type: "tag" }, + { name: "description", type: "text" }, + { name: "price", type: "numeric" }, + { name: "location", type: "geo" }, + ]; + + const result = checkForSchemaMismatch(customSchema, inferredSchema); + expect(result).toBe(false); + }); +}); diff --git a/libs/providers/langchain-redis/src/tests/vectorstores.int.test.ts b/libs/providers/langchain-redis/src/tests/vectorstores.int.test.ts index 206e5e773434..a0b9432240d6 100644 --- a/libs/providers/langchain-redis/src/tests/vectorstores.int.test.ts +++ b/libs/providers/langchain-redis/src/tests/vectorstores.int.test.ts @@ -1,4 +1,4 @@ -import { test, expect, afterAll, beforeAll, describe } from "vitest"; +import { test, expect, describe } from "vitest"; import { RedisClientType, createClient } from "redis"; import { v4 as uuidv4 } from "uuid"; @@ -7,99 +7,1243 @@ import { Document } from "@langchain/core/documents"; import { SyntheticEmbeddings } from "@langchain/core/utils/testing"; import { RedisVectorStore } from "../vectorstores.js"; +import { Geo, Tag, Num, Text, Timestamp } from "../filters.js"; describe("RedisVectorStore", () => { - let vectorStore: RedisVectorStore; - let client: RedisClientType; + test("auto-generated ids", async () => { + const client = createClient({ url: process.env.REDIS_URL }); + await client.connect(); + + const vectorStore = new RedisVectorStore(new SyntheticEmbeddings(), { + redisClient: client as RedisClientType, + indexName: "test-auto-ids", + keyPrefix: "test-auto:", + }); + + try { + const pageContent = faker.lorem.sentence(5); - beforeAll(async () => { - client = createClient({ url: process.env.REDIS_URL }); + await vectorStore.addDocuments([ + { pageContent, metadata: { foo: "bar" } }, + ]); + + const results = await vectorStore.similaritySearch(pageContent, 1); + + expect(results).toEqual([ + new Document({ metadata: { foo: "bar" }, pageContent }), + ]); + } finally { + await vectorStore.delete({ deleteAll: true }); + await client.quit(); + } + }); + + test("user-provided keys", async () => { + const client = createClient({ url: process.env.REDIS_URL }); await client.connect(); - vectorStore = new RedisVectorStore(new SyntheticEmbeddings(), { + const vectorStore = new RedisVectorStore(new SyntheticEmbeddings(), { redisClient: client as RedisClientType, - indexName: "test-index", - keyPrefix: "test:", + indexName: "test-user-keys", + keyPrefix: "test-user:", }); + + try { + const documentKey = `test-user:${uuidv4()}`; + const pageContent = faker.lorem.sentence(5); + + await vectorStore.addDocuments([{ pageContent, metadata: {} }], { + keys: [documentKey], + }); + + const results = await vectorStore.similaritySearch(pageContent, 1); + + expect(results).toEqual([new Document({ metadata: {}, pageContent })]); + } finally { + await vectorStore.delete({ deleteAll: true }); + await client.quit(); + } }); - afterAll(async () => { - await vectorStore.delete({ deleteAll: true }); - await client.quit(); + test("(legacy) metadata filtering", async () => { + const client = createClient({ url: process.env.REDIS_URL }); + await client.connect(); + + const vectorStore = new RedisVectorStore(new SyntheticEmbeddings(), { + redisClient: client as RedisClientType, + indexName: "test-metadata-filter", + keyPrefix: "test-filter:", + filter: ["sentence"], + }); + + try { + const pageContent = faker.lorem.sentence(5); + + await vectorStore.addDocuments([ + { pageContent, metadata: { foo: "bar" } }, + { pageContent, metadata: { foo: "filter by this sentence" } }, + { pageContent, metadata: { foo: "qux" } }, + ]); + + // If the filter wasn't working, we'd get all 3 documents back + const results = await vectorStore.similaritySearch(pageContent, 3); + + expect(results).toEqual([ + new Document({ + metadata: { foo: "filter by this sentence" }, + pageContent, + }), + ]); + } finally { + await vectorStore.delete({ deleteAll: true }); + await client.quit(); + } }); - test("auto-generated ids", async () => { - const pageContent = faker.lorem.sentence(5); + test("delete documents by ids", async () => { + const client = createClient({ url: process.env.REDIS_URL }); + await client.connect(); - await vectorStore.addDocuments([{ pageContent, metadata: { foo: "bar" } }]); + const vectorStore = new RedisVectorStore(new SyntheticEmbeddings(), { + redisClient: client as RedisClientType, + indexName: "test-delete-ids", + keyPrefix: "test-delete:", + }); - const results = await vectorStore.similaritySearch(pageContent, 1); + try { + const documentIds = ["doc1", "doc2"]; + const documentKeys = documentIds.map((id) => `test-delete:${id}`); + const pageContent = faker.lorem.sentence(5); - expect(results).toEqual([ - new Document({ metadata: { foo: "bar" }, pageContent }), - ]); + const documents = documentKeys.map((key) => ({ + pageContent, + metadata: { + id: key, + }, + })); + + await vectorStore.addDocuments(documents, { + keys: documentKeys, + }); + + const results = await vectorStore.similaritySearch(pageContent, 2); + expect(results).toHaveLength(2); + expect(results.map((result) => result.metadata.id)).toEqual(documentKeys); + + await vectorStore.delete({ ids: [documentIds[0]] }); + + const results2 = await vectorStore.similaritySearch(pageContent, 2); + expect(results2).toHaveLength(1); + expect(results2.map((result) => result.metadata.id)).toEqual( + documentKeys.slice(1) + ); + } finally { + await vectorStore.delete({ deleteAll: true }); + await client.quit(); + } }); - test("user-provided keys", async () => { - const documentKey = `test:${uuidv4()}`; - const pageContent = faker.lorem.sentence(5); + test("geo metadata filtering with vector search", async () => { + const geoClient = createClient({ url: process.env.REDIS_URL }); + await geoClient.connect(); - await vectorStore.addDocuments([{ pageContent, metadata: {} }], { - keys: [documentKey], + const geoVectorStore = new RedisVectorStore(new SyntheticEmbeddings(), { + redisClient: geoClient as RedisClientType, + indexName: "test-geo-index", + keyPrefix: "test-geo:", + customSchema: [ + { name: "location", type: "geo" }, + { name: "name", type: "text" }, + { name: "category", type: "tag" }, + ], }); - const results = await vectorStore.similaritySearch(pageContent, 1); + try { + const pageContent = "A great restaurant with amazing food"; + + // Add documents with geo coordinates + // San Francisco: -122.4194, 37.7749 + // New York: -74.0060, 40.7128 + // Los Angeles: -118.2437, 34.0522 + // Seattle: -122.3321, 47.6062 + await geoVectorStore.addDocuments([ + { + pageContent, + metadata: { + name: "Restaurant in San Francisco", + location: [-122.4194, 37.7749], // Array format + category: "restaurant", + }, + }, + { + pageContent, + metadata: { + name: "Restaurant in New York", + location: "-74.0060,40.7128", // String format + category: "restaurant", + }, + }, + { + pageContent, + metadata: { + name: "Restaurant in Los Angeles", + location: [-118.2437, 34.0522], + category: "restaurant", + }, + }, + { + pageContent, + metadata: { + name: "Restaurant in Seattle", + location: [-122.3321, 47.6062], + category: "restaurant", + }, + }, + ]); + + // First, verify all documents were indexed + const allDocs = await geoVectorStore.similaritySearch(pageContent, 10); + expect(allDocs.length).toBe(4); + + // Test 1: Find restaurants within 100km of San Francisco + // Should find: San Francisco only + const sfFilter = Geo("location").within(-122.4194, 37.7749, 100, "km"); + const sfResults = await geoVectorStore.similaritySearch( + pageContent, + 10, + sfFilter + ); + + expect(sfResults.length).toBe(1); + expect(sfResults[0].metadata.name).toContain("San Francisco"); + expect(sfResults.some((r) => r.metadata.name.includes("New York"))).toBe( + false + ); + + // Test 2: Find restaurants within 1000km of San Francisco + // Note: Actual distances from SF: + // - Los Angeles: ~559 km (within range) + // - Seattle: ~1094 km (outside 1000km range) + // - New York: ~4139 km (far outside range) + const westCoastFilter = Geo("location").within( + -122.4194, + 37.7749, + 1000, + "km" + ); + const westCoastResults = await geoVectorStore.similaritySearch( + pageContent, + 10, + westCoastFilter + ); + + expect(westCoastResults.length).toBe(2); + expect( + westCoastResults.some((r) => r.metadata.name.includes("San Francisco")) + ).toBe(true); + expect( + westCoastResults.some((r) => r.metadata.name.includes("Los Angeles")) + ).toBe(true); + expect( + westCoastResults.some((r) => r.metadata.name.includes("Seattle")) + ).toBe(false); + expect( + westCoastResults.some((r) => r.metadata.name.includes("New York")) + ).toBe(false); + + // Test 3: Find restaurants within 5000km of San Francisco + // Should find: All restaurants + const allFilter = Geo("location").within(-122.4194, 37.7749, 5000, "km"); + const allResults = await geoVectorStore.similaritySearch( + pageContent, + 10, + allFilter + ); + + expect(allResults.length).toBe(4); + + // Test 4: Find restaurants outside 100km of San Francisco + // Should find: New York, Los Angeles (not San Francisco, possibly not Seattle) + const outsideFilter = Geo("location").outside( + -122.4194, + 37.7749, + 100, + "km" + ); + const outsideResults = await geoVectorStore.similaritySearch( + pageContent, + 10, + outsideFilter + ); + + expect(outsideResults.length).toBeGreaterThanOrEqual(2); + expect( + outsideResults.some((r) => r.metadata.name.includes("San Francisco")) + ).toBe(false); + expect( + outsideResults.some((r) => r.metadata.name.includes("New York")) + ).toBe(true); - expect(results).toEqual([new Document({ metadata: {}, pageContent })]); + // Test 5: Verify geo coordinates are returned correctly + const firstResult = allResults[0]; + expect(firstResult.metadata.location).toBeDefined(); + // Location should be returned as [lon, lat] array after deserialization + expect(Array.isArray(firstResult.metadata.location)).toBe(true); + expect(firstResult.metadata.location).toHaveLength(2); + } finally { + // Clean up + await geoVectorStore.delete({ deleteAll: true }); + await geoClient.quit(); + } }); - test("metadata filtering", async () => { - await vectorStore.dropIndex(); - const pageContent = faker.lorem.sentence(5); - const uuid = uuidv4(); + test("tag filter - single value eq", async () => { + const tagClient = createClient({ url: process.env.REDIS_URL }); + await tagClient.connect(); - await vectorStore.addDocuments([ - { pageContent, metadata: { foo: "bar" } }, - { pageContent, metadata: { foo: uuid } }, - { pageContent, metadata: { foo: "qux" } }, - ]); + const tagVectorStore = new RedisVectorStore(new SyntheticEmbeddings(), { + redisClient: tagClient as RedisClientType, + indexName: "test-tag-index", + keyPrefix: "test-tag:", + customSchema: [ + { name: "category", type: "tag" }, + { name: "brand", type: "tag" }, + ], + }); + + try { + const pageContent = "Product description"; + + await tagVectorStore.addDocuments([ + { pageContent, metadata: { category: "electronics", brand: "Apple" } }, + { pageContent, metadata: { category: "books", brand: "Penguin" } }, + { + pageContent, + metadata: { category: "electronics", brand: "Samsung" }, + }, + { pageContent, metadata: { category: "clothing", brand: "Nike" } }, + ]); - // If the filter wasn't working, we'd get all 3 documents back - const results = await vectorStore.similaritySearch(pageContent, 3, [ - `${uuid}`, - ]); + // Test single value eq + const filter = Tag("category").eq("electronics"); + const results = await tagVectorStore.similaritySearch( + pageContent, + 10, + filter + ); - expect(results).toEqual([ - new Document({ metadata: { foo: uuid }, pageContent }), - ]); + expect(results.length).toBe(2); + expect(results.every((r) => r.metadata.category === "electronics")).toBe( + true + ); + } finally { + await tagVectorStore.delete({ deleteAll: true }); + await tagClient.quit(); + } }); - test("delete documents by ids", async () => { - const documentIds = ["doc1", "doc2"]; - const documentKeys = documentIds.map((id) => `test:${id}`); - const pageContent = faker.lorem.sentence(5); + test("tag filter - multiple values eq (OR logic)", async () => { + const tagClient = createClient({ url: process.env.REDIS_URL }); + await tagClient.connect(); + + const tagVectorStore = new RedisVectorStore(new SyntheticEmbeddings(), { + redisClient: tagClient as RedisClientType, + indexName: "test-tag-multi-index", + keyPrefix: "test-tag-multi:", + customSchema: [{ name: "category", type: "tag" }], + }); + + try { + const pageContent = "Product description"; + + await tagVectorStore.addDocuments([ + { pageContent, metadata: { category: "electronics" } }, + { pageContent, metadata: { category: "books" } }, + { pageContent, metadata: { category: "clothing" } }, + { pageContent, metadata: { category: "sports" } }, + ]); + + // Test multiple values (OR logic) + const filter = Tag("category").eq(["electronics", "books"]); + const results = await tagVectorStore.similaritySearch( + pageContent, + 10, + filter + ); + + expect(results.length).toBe(2); + expect( + results.every( + (r) => + r.metadata.category === "electronics" || + r.metadata.category === "books" + ) + ).toBe(true); + } finally { + await tagVectorStore.delete({ deleteAll: true }); + await tagClient.quit(); + } + }); + + test("tag filter - ne (negation)", async () => { + const tagClient = createClient({ url: process.env.REDIS_URL }); + await tagClient.connect(); + + const tagVectorStore = new RedisVectorStore(new SyntheticEmbeddings(), { + redisClient: tagClient as RedisClientType, + indexName: "test-tag-ne-index", + keyPrefix: "test-tag-ne:", + customSchema: [{ name: "category", type: "tag" }], + }); + + try { + const pageContent = "Product description"; + + await tagVectorStore.addDocuments([ + { pageContent, metadata: { category: "electronics" } }, + { pageContent, metadata: { category: "books" } }, + { pageContent, metadata: { category: "clothing" } }, + ]); + + // Test negation + const filter = Tag("category").ne("electronics"); + const results = await tagVectorStore.similaritySearch( + pageContent, + 10, + filter + ); + + expect(results.length).toBe(2); + expect(results.every((r) => r.metadata.category !== "electronics")).toBe( + true + ); + } finally { + await tagVectorStore.delete({ deleteAll: true }); + await tagClient.quit(); + } + }); + + test("numeric filter - eq, gt, gte, lt, lte", async () => { + const numClient = createClient({ url: process.env.REDIS_URL }); + await numClient.connect(); + + const numVectorStore = new RedisVectorStore(new SyntheticEmbeddings(), { + redisClient: numClient as RedisClientType, + indexName: "test-num-index", + keyPrefix: "test-num:", + customSchema: [ + { name: "price", type: "numeric" }, + { name: "rating", type: "numeric" }, + ], + }); + + try { + const pageContent = "Product description"; + + await numVectorStore.addDocuments([ + { pageContent, metadata: { price: 50, rating: 4.5 } }, + { pageContent, metadata: { price: 100, rating: 4.0 } }, + { pageContent, metadata: { price: 150, rating: 4.8 } }, + { pageContent, metadata: { price: 200, rating: 3.5 } }, + ]); + + // Test eq + const eqFilter = Num("price").eq(100); + const eqResults = await numVectorStore.similaritySearch( + pageContent, + 10, + eqFilter + ); + expect(eqResults.length).toBe(1); + expect(eqResults[0].metadata.price).toBe(100); + + // Test gt + const gtFilter = Num("price").gt(100); + const gtResults = await numVectorStore.similaritySearch( + pageContent, + 10, + gtFilter + ); + expect(gtResults.length).toBe(2); + expect(gtResults.every((r) => r.metadata.price > 100)).toBe(true); + + // Test gte + const gteFilter = Num("price").gte(100); + const gteResults = await numVectorStore.similaritySearch( + pageContent, + 10, + gteFilter + ); + expect(gteResults.length).toBe(3); + expect(gteResults.every((r) => r.metadata.price >= 100)).toBe(true); - const documents = documentKeys.map((key) => ({ - pageContent, - metadata: { - id: key, - }, - })); + // Test lt + const ltFilter = Num("price").lt(150); + const ltResults = await numVectorStore.similaritySearch( + pageContent, + 10, + ltFilter + ); + expect(ltResults.length).toBe(2); + expect(ltResults.every((r) => r.metadata.price < 150)).toBe(true); - await vectorStore.addDocuments(documents, { - keys: documentKeys, + // Test lte + const lteFilter = Num("price").lte(150); + const lteResults = await numVectorStore.similaritySearch( + pageContent, + 10, + lteFilter + ); + expect(lteResults.length).toBe(3); + expect(lteResults.every((r) => r.metadata.price <= 150)).toBe(true); + } finally { + await numVectorStore.delete({ deleteAll: true }); + await numClient.quit(); + } + }); + + test("numeric filter - between and ne", async () => { + const numClient = createClient({ url: process.env.REDIS_URL }); + await numClient.connect(); + + const numVectorStore = new RedisVectorStore(new SyntheticEmbeddings(), { + redisClient: numClient as RedisClientType, + indexName: "test-num-between-index", + keyPrefix: "test-num-between:", + customSchema: [{ name: "price", type: "numeric" }], }); - const results = await vectorStore.similaritySearch(pageContent, 2); - expect(results).toHaveLength(2); - expect(results.map((result) => result.metadata.id)).toEqual(documentKeys); + try { + const pageContent = "Product description"; - await vectorStore.delete({ ids: [documentIds[0]] }); + await numVectorStore.addDocuments([ + { pageContent, metadata: { price: 50 } }, + { pageContent, metadata: { price: 100 } }, + { pageContent, metadata: { price: 150 } }, + { pageContent, metadata: { price: 200 } }, + ]); - const results2 = await vectorStore.similaritySearch(pageContent, 2); - expect(results2).toHaveLength(1); - expect(results2.map((result) => result.metadata.id)).toEqual( - documentKeys.slice(1) + // Test between + const betweenFilter = Num("price").between(75, 175); + const betweenResults = await numVectorStore.similaritySearch( + pageContent, + 10, + betweenFilter + ); + expect(betweenResults.length).toBe(2); + expect( + betweenResults.every( + (r) => r.metadata.price >= 75 && r.metadata.price <= 175 + ) + ).toBe(true); + + // Test ne + const neFilter = Num("price").ne(100); + const neResults = await numVectorStore.similaritySearch( + pageContent, + 10, + neFilter + ); + expect(neResults.length).toBe(3); + expect(neResults.every((r) => r.metadata.price !== 100)).toBe(true); + } finally { + await numVectorStore.delete({ deleteAll: true }); + await numClient.quit(); + } + }); + + test("text filter - exact match", async () => { + const textClient = createClient({ url: process.env.REDIS_URL }); + await textClient.connect(); + + const textVectorStore = new RedisVectorStore(new SyntheticEmbeddings(), { + redisClient: textClient as RedisClientType, + indexName: "test-text-index", + keyPrefix: "test-text:", + customSchema: [ + { name: "title", type: "text" }, + { name: "description", type: "text" }, + ], + }); + + try { + const pageContent = "Product description"; + + await textVectorStore.addDocuments([ + { + pageContent, + metadata: { + title: "wireless headphones", + description: "bluetooth audio", + }, + }, + { + pageContent, + metadata: { title: "wired headphones", description: "audio cable" }, + }, + { + pageContent, + metadata: { title: "wireless speaker", description: "bluetooth" }, + }, + ]); + + // Test exact match + const exactFilter = Text("title").eq("wireless headphones"); + const exactResults = await textVectorStore.similaritySearch( + pageContent, + 10, + exactFilter + ); + expect(exactResults.length).toBe(1); + expect(exactResults[0].metadata.title).toBe("wireless headphones"); + } finally { + await textVectorStore.delete({ deleteAll: true }); + await textClient.quit(); + } + }); + + test("text filter - wildcard and match", async () => { + const textClient = createClient({ url: process.env.REDIS_URL }); + await textClient.connect(); + + const textVectorStore = new RedisVectorStore(new SyntheticEmbeddings(), { + redisClient: textClient as RedisClientType, + indexName: "test-text-wildcard-index", + keyPrefix: "test-text-wildcard:", + customSchema: [{ name: "title", type: "text" }], + }); + + try { + const pageContent = "Product description"; + + await textVectorStore.addDocuments([ + { pageContent, metadata: { title: "wireless headphones" } }, + { pageContent, metadata: { title: "wired headphones" } }, + { pageContent, metadata: { title: "wireless speaker" } }, + { pageContent, metadata: { title: "bluetooth earbuds" } }, + ]); + + // Test wildcard + const wildcardFilter = Text("title").wildcard("*headphones*"); + const wildcardResults = await textVectorStore.similaritySearch( + pageContent, + 10, + wildcardFilter + ); + expect(wildcardResults.length).toBe(2); + expect( + wildcardResults.every((r) => r.metadata.title.includes("headphones")) + ).toBe(true); + + // Test match (tokenized search) + const matchFilter = Text("title").match("wireless"); + const matchResults = await textVectorStore.similaritySearch( + pageContent, + 10, + matchFilter + ); + expect(matchResults.length).toBe(2); + expect( + matchResults.every((r) => r.metadata.title.includes("wireless")) + ).toBe(true); + } finally { + await textVectorStore.delete({ deleteAll: true }); + await textClient.quit(); + } + }); + + test("text filter - fuzzy and ne", async () => { + const textClient = createClient({ url: process.env.REDIS_URL }); + await textClient.connect(); + + const textVectorStore = new RedisVectorStore(new SyntheticEmbeddings(), { + redisClient: textClient as RedisClientType, + indexName: "test-text-fuzzy-index", + keyPrefix: "test-text-fuzzy:", + customSchema: [{ name: "title", type: "text" }], + }); + + try { + const pageContent = "Product description"; + + await textVectorStore.addDocuments([ + { pageContent, metadata: { title: "bluetooth speaker" } }, + { pageContent, metadata: { title: "wireless headphones" } }, + { pageContent, metadata: { title: "wired earbuds" } }, + ]); + + // Test fuzzy (allows typos) + const fuzzyFilter = Text("title").fuzzy("blutooth"); + const fuzzyResults = await textVectorStore.similaritySearch( + pageContent, + 10, + fuzzyFilter + ); + // Fuzzy search should find "bluetooth" even with typo + expect(fuzzyResults.length).toBeGreaterThanOrEqual(1); + expect( + fuzzyResults.some((r) => r.metadata.title.includes("bluetooth")) + ).toBe(true); + + // Test ne (negation) + const neFilter = Text("title").ne("bluetooth speaker"); + const neResults = await textVectorStore.similaritySearch( + pageContent, + 10, + neFilter + ); + expect(neResults.length).toBe(2); + expect( + neResults.every((r) => r.metadata.title !== "bluetooth speaker") + ).toBe(true); + } finally { + await textVectorStore.delete({ deleteAll: true }); + await textClient.quit(); + } + }); + + test("timestamp filter - Date objects", async () => { + const tsClient = createClient({ url: process.env.REDIS_URL }); + await tsClient.connect(); + + const tsVectorStore = new RedisVectorStore(new SyntheticEmbeddings(), { + redisClient: tsClient as RedisClientType, + indexName: "test-timestamp-index", + keyPrefix: "test-timestamp:", + customSchema: [ + { name: "created_at", type: "numeric" }, + { name: "updated_at", type: "numeric" }, + ], + }); + + try { + const pageContent = "Document content"; + const date1 = new Date("2023-01-01T00:00:00Z"); + const date2 = new Date("2023-06-01T00:00:00Z"); + const date3 = new Date("2023-09-01T00:00:00Z"); + const date4 = new Date("2023-12-01T00:00:00Z"); + + await tsVectorStore.addDocuments([ + { + pageContent, + metadata: { + created_at: Math.floor(date1.getTime() / 1000), + updated_at: Math.floor(date1.getTime() / 1000), + }, + }, + { + pageContent, + metadata: { + created_at: Math.floor(date2.getTime() / 1000), + updated_at: Math.floor(date2.getTime() / 1000), + }, + }, + { + pageContent, + metadata: { + created_at: Math.floor(date3.getTime() / 1000), + updated_at: Math.floor(date3.getTime() / 1000), + }, + }, + { + pageContent, + metadata: { + created_at: Math.floor(date4.getTime() / 1000), + updated_at: Math.floor(date4.getTime() / 1000), + }, + }, + ]); + + // Test gt with Date + const gtFilter = Timestamp("created_at").gt(date2); + const gtResults = await tsVectorStore.similaritySearch( + pageContent, + 10, + gtFilter + ); + expect(gtResults.length).toBe(2); + expect( + gtResults.every( + (r) => r.metadata.created_at > Math.floor(date2.getTime() / 1000) + ) + ).toBe(true); + + // Test gte with Date + const gteFilter = Timestamp("created_at").gte(date2); + const gteResults = await tsVectorStore.similaritySearch( + pageContent, + 10, + gteFilter + ); + expect(gteResults.length).toBe(3); + + // Test lt with Date + const ltFilter = Timestamp("created_at").lt(date3); + const ltResults = await tsVectorStore.similaritySearch( + pageContent, + 10, + ltFilter + ); + expect(ltResults.length).toBe(2); + + // Test between with Date + const betweenFilter = Timestamp("created_at").between(date2, date3); + const betweenResults = await tsVectorStore.similaritySearch( + pageContent, + 10, + betweenFilter + ); + expect(betweenResults.length).toBe(2); + } finally { + await tsVectorStore.delete({ deleteAll: true }); + await tsClient.quit(); + } + }); + + test("timestamp filter - epoch timestamps", async () => { + const tsClient = createClient({ url: process.env.REDIS_URL }); + await tsClient.connect(); + + const tsVectorStore = new RedisVectorStore(new SyntheticEmbeddings(), { + redisClient: tsClient as RedisClientType, + indexName: "test-timestamp-epoch-index", + keyPrefix: "test-timestamp-epoch:", + customSchema: [{ name: "created_at", type: "numeric" }], + }); + + try { + const pageContent = "Document content"; + const epoch1 = 1672531200; // 2023-01-01 + const epoch2 = 1685577600; // 2023-06-01 + const epoch3 = 1693526400; // 2023-09-01 + + await tsVectorStore.addDocuments([ + { pageContent, metadata: { created_at: epoch1 } }, + { pageContent, metadata: { created_at: epoch2 } }, + { pageContent, metadata: { created_at: epoch3 } }, + ]); + + // Test eq with epoch + const eqFilter = Timestamp("created_at").eq(epoch2); + const eqResults = await tsVectorStore.similaritySearch( + pageContent, + 10, + eqFilter + ); + expect(eqResults.length).toBe(1); + expect(eqResults[0].metadata.created_at).toBe(epoch2); + + // Test ne with epoch + const neFilter = Timestamp("created_at").ne(epoch2); + const neResults = await tsVectorStore.similaritySearch( + pageContent, + 10, + neFilter + ); + expect(neResults.length).toBe(2); + expect(neResults.every((r) => r.metadata.created_at !== epoch2)).toBe( + true + ); + } finally { + await tsVectorStore.delete({ deleteAll: true }); + await tsClient.quit(); + } + }); + + test("combined filters - AND operation", async () => { + const combinedClient = createClient({ url: process.env.REDIS_URL }); + await combinedClient.connect(); + + const combinedVectorStore = new RedisVectorStore( + new SyntheticEmbeddings(), + { + redisClient: combinedClient as RedisClientType, + indexName: "test-combined-and-index", + keyPrefix: "test-combined-and:", + customSchema: [ + { name: "category", type: "tag" }, + { name: "price", type: "numeric" }, + { name: "rating", type: "numeric" }, + ], + } + ); + + try { + const pageContent = "Product description"; + + await combinedVectorStore.addDocuments([ + { + pageContent, + metadata: { category: "electronics", price: 50, rating: 4.5 }, + }, + { + pageContent, + metadata: { category: "electronics", price: 150, rating: 4.8 }, + }, + { + pageContent, + metadata: { category: "books", price: 20, rating: 4.2 }, + }, + { + pageContent, + metadata: { category: "electronics", price: 200, rating: 3.5 }, + }, + ]); + + // Test AND: category=electronics AND price < 100 + const andFilter = Tag("category") + .eq("electronics") + .and(Num("price").lt(100)); + const andResults = await combinedVectorStore.similaritySearch( + pageContent, + 10, + andFilter + ); + expect(andResults.length).toBe(1); + expect(andResults[0].metadata.category).toBe("electronics"); + expect(andResults[0].metadata.price).toBe(50); + + // Test multiple AND: category=electronics AND price >= 100 AND rating > 4 + const multiAndFilter = Tag("category") + .eq("electronics") + .and(Num("price").gte(100)) + .and(Num("rating").gt(4)); + const multiAndResults = await combinedVectorStore.similaritySearch( + pageContent, + 10, + multiAndFilter + ); + expect(multiAndResults.length).toBe(1); + expect(multiAndResults[0].metadata.price).toBe(150); + expect(multiAndResults[0].metadata.rating).toBe(4.8); + } finally { + await combinedVectorStore.delete({ deleteAll: true }); + await combinedClient.quit(); + } + }); + + test("combined filters - OR operation", async () => { + const combinedClient = createClient({ url: process.env.REDIS_URL }); + await combinedClient.connect(); + + const combinedVectorStore = new RedisVectorStore( + new SyntheticEmbeddings(), + { + redisClient: combinedClient as RedisClientType, + indexName: "test-combined-or-index", + keyPrefix: "test-combined-or:", + customSchema: [ + { name: "category", type: "tag" }, + { name: "price", type: "numeric" }, + ], + } + ); + + try { + const pageContent = "Product description"; + + await combinedVectorStore.addDocuments([ + { pageContent, metadata: { category: "electronics", price: 50 } }, + { pageContent, metadata: { category: "books", price: 20 } }, + { pageContent, metadata: { category: "clothing", price: 80 } }, + { pageContent, metadata: { category: "electronics", price: 200 } }, + ]); + + // Test OR: category=books OR price > 150 + const orFilter = Tag("category").eq("books").or(Num("price").gt(150)); + const orResults = await combinedVectorStore.similaritySearch( + pageContent, + 10, + orFilter + ); + expect(orResults.length).toBe(2); + expect( + orResults.some((r) => r.metadata.category === "books") && + orResults.some((r) => r.metadata.price > 150) + ).toBe(true); + } finally { + await combinedVectorStore.delete({ deleteAll: true }); + await combinedClient.quit(); + } + }); + + test("combined filters - complex nested AND/OR", async () => { + const combinedClient = createClient({ url: process.env.REDIS_URL }); + await combinedClient.connect(); + + const combinedVectorStore = new RedisVectorStore( + new SyntheticEmbeddings(), + { + redisClient: combinedClient as RedisClientType, + indexName: "test-combined-complex-index", + keyPrefix: "test-combined-complex:", + customSchema: [ + { name: "category", type: "tag" }, + { name: "brand", type: "tag" }, + { name: "price", type: "numeric" }, + ], + } ); + + try { + const pageContent = "Product description"; + + await combinedVectorStore.addDocuments([ + { + pageContent, + metadata: { category: "electronics", brand: "Apple", price: 999 }, + }, + { + pageContent, + metadata: { category: "electronics", brand: "Samsung", price: 799 }, + }, + { + pageContent, + metadata: { category: "books", brand: "Penguin", price: 15 }, + }, + { + pageContent, + metadata: { category: "electronics", brand: "Sony", price: 599 }, + }, + ]); + + // Test complex: (category=electronics AND price < 800) OR brand=Penguin + const complexFilter = Tag("category") + .eq("electronics") + .and(Num("price").lt(800)) + .or(Tag("brand").eq("Penguin")); + const complexResults = await combinedVectorStore.similaritySearch( + pageContent, + 10, + complexFilter + ); + expect(complexResults.length).toBe(3); + expect( + complexResults.some((r) => r.metadata.brand === "Samsung") && + complexResults.some((r) => r.metadata.brand === "Sony") && + complexResults.some((r) => r.metadata.brand === "Penguin") + ).toBe(true); + } finally { + await combinedVectorStore.delete({ deleteAll: true }); + await combinedClient.quit(); + } + }); + + test("combined filters - mixing all filter types", async () => { + const mixedClient = createClient({ url: process.env.REDIS_URL }); + await mixedClient.connect(); + + const mixedVectorStore = new RedisVectorStore(new SyntheticEmbeddings(), { + redisClient: mixedClient as RedisClientType, + indexName: "test-mixed-filters-index", + keyPrefix: "test-mixed-filters:", + customSchema: [ + { name: "category", type: "tag" }, + { name: "title", type: "text" }, + { name: "price", type: "numeric" }, + { name: "created_at", type: "numeric" }, + ], + }); + + try { + const pageContent = "Product description"; + const date1 = new Date("2023-01-01T00:00:00Z"); + const date2 = new Date("2023-06-01T00:00:00Z"); + + await mixedVectorStore.addDocuments([ + { + pageContent, + metadata: { + category: "electronics", + title: "wireless headphones", + price: 99, + created_at: Math.floor(date1.getTime() / 1000), + }, + }, + { + pageContent, + metadata: { + category: "electronics", + title: "bluetooth speaker", + price: 149, + created_at: Math.floor(date2.getTime() / 1000), + }, + }, + { + pageContent, + metadata: { + category: "books", + title: "wireless networking guide", + price: 29, + created_at: Math.floor(date2.getTime() / 1000), + }, + }, + ]); + + // Test mixing Tag, Text, Numeric, and Timestamp filters + // Find: category=electronics AND title contains "wireless" AND price < 120 AND created after 2022 + const mixedFilter = Tag("category") + .eq("electronics") + .and(Text("title").match("wireless")) + .and(Num("price").lt(120)) + .and(Timestamp("created_at").gt(new Date("2022-01-01T00:00:00Z"))); + + const mixedResults = await mixedVectorStore.similaritySearch( + pageContent, + 10, + mixedFilter + ); + + expect(mixedResults.length).toBe(1); + expect(mixedResults[0].metadata.category).toBe("electronics"); + expect(mixedResults[0].metadata.title).toBe("wireless headphones"); + expect(mixedResults[0].metadata.price).toBe(99); + } finally { + await mixedVectorStore.delete({ deleteAll: true }); + await mixedClient.quit(); + } + }); + + test("tag filter - Set values", async () => { + const tagClient = createClient({ url: process.env.REDIS_URL }); + await tagClient.connect(); + + const tagVectorStore = new RedisVectorStore(new SyntheticEmbeddings(), { + redisClient: tagClient as RedisClientType, + indexName: "test-tag-set-index", + keyPrefix: "test-tag-set:", + customSchema: [{ name: "category", type: "tag" }], + }); + + try { + const pageContent = "Product description"; + + await tagVectorStore.addDocuments([ + { pageContent, metadata: { category: "electronics" } }, + { pageContent, metadata: { category: "books" } }, + { pageContent, metadata: { category: "clothing" } }, + { pageContent, metadata: { category: "sports" } }, + ]); + + // Test with Set values + const filter = Tag("category").eq(new Set(["electronics", "books"])); + const results = await tagVectorStore.similaritySearch( + pageContent, + 10, + filter + ); + + expect(results.length).toBe(2); + expect( + results.every( + (r) => + r.metadata.category === "electronics" || + r.metadata.category === "books" + ) + ).toBe(true); + } finally { + await tagVectorStore.delete({ deleteAll: true }); + await tagClient.quit(); + } + }); + + test("legacy metadata format detection and compatibility", async () => { + const client = createClient({ url: process.env.REDIS_URL }); + await client.connect(); + + const indexName = "test-legacy-metadata-index"; + const keyPrefix = "test-legacy:"; + + try { + // Step 1: Create a vector store with legacy metadata format (single TEXT field for metadata) + // This simulates an old index created before the new metadata schema feature + const legacyVectorStore = new RedisVectorStore( + new SyntheticEmbeddings(), + { + redisClient: client as RedisClientType, + indexName, + keyPrefix, + customSchema: [{ name: "metadata", type: "text" }], + } + ); + + const pageContent = faker.lorem.sentence(5); + + // Add documents with metadata stored as JSON in a single field + await legacyVectorStore.addDocuments([ + { pageContent, metadata: { category: "electronics", price: 99 } }, + { pageContent, metadata: { category: "books", price: 15 } }, + { pageContent, metadata: { category: "electronics", price: 149 } }, + ]); + + // Verify data was added + const legacyResults = await legacyVectorStore.similaritySearch( + pageContent, + 3 + ); + expect(legacyResults.length).toBe(3); + expect(legacyResults[0].metadata.category).toBeDefined(); + expect(legacyResults[0].metadata.price).toBeDefined(); + + // Step 2: Create a new vector store instance accessing the same index + // This simulates opening an existing legacy index with the latest version + const newVectorStore = new RedisVectorStore(new SyntheticEmbeddings(), { + redisClient: client as RedisClientType, + indexName, + keyPrefix, + // No customSchema provided - should detect legacy format + }); + + // Step 3: Verify the index state is detected as "legacy" + const indexState = await newVectorStore.checkIndexState(); + expect(indexState).toBe("legacy"); + + // Ensure the schema is set up by calling createIndex (this happens automatically on addDocuments) + // but we need to call it explicitly here since we're not adding documents yet + await newVectorStore.createIndex([], 1536); + + // Step 4: Verify we can still read the data correctly + const newResults = await newVectorStore.similaritySearch(pageContent, 3); + expect(newResults.length).toBe(3); + + // Metadata should be correctly deserialized from the JSON field + expect(newResults[0].metadata.category).toBeDefined(); + expect(newResults[0].metadata.price).toBeDefined(); + expect(typeof newResults[0].metadata.category).toBe("string"); + expect(typeof newResults[0].metadata.price).toBe("number"); + + // Step 5: Verify we can add new documents to the legacy index + await newVectorStore.addDocuments([ + { pageContent, metadata: { category: "clothing", price: 49 } }, + ]); + + const updatedResults = await newVectorStore.similaritySearch( + pageContent, + 4 + ); + expect(updatedResults.length).toBe(4); + + // Step 6: Verify legacy filter still works + const filteredResults = await newVectorStore.similaritySearch( + pageContent, + 3, + "electronics" + ); + expect(filteredResults.length).toBeGreaterThan(0); + expect( + filteredResults.every((r) => + JSON.stringify(r.metadata).includes("electronics") + ) + ).toBe(true); + } finally { + // Clean up + const cleanupStore = new RedisVectorStore(new SyntheticEmbeddings(), { + redisClient: client as RedisClientType, + indexName, + keyPrefix, + }); + await cleanupStore.delete({ deleteAll: true }); + await client.quit(); + } }); }); diff --git a/libs/providers/langchain-redis/src/tests/vectorstores.test.ts b/libs/providers/langchain-redis/src/tests/vectorstores.test.ts index 5adcca9d21a6..249ed1cc30ca 100644 --- a/libs/providers/langchain-redis/src/tests/vectorstores.test.ts +++ b/libs/providers/langchain-redis/src/tests/vectorstores.test.ts @@ -1,9 +1,22 @@ /* eslint-disable @typescript-eslint/no-explicit-any */ import { vi, test, expect, describe } from "vitest"; import { FakeEmbeddings } from "@langchain/core/utils/testing"; +import { Document } from "@langchain/core/documents"; import { SchemaFieldTypes } from "redis"; -import { RedisVectorStore, RedisVectorStoreConfig } from "../vectorstores.js"; +import { + RedisVectorStore, + TagFilter, + NumericFilter, + TextFilter, + GeoFilter, + TimestampFilter, + Tag, + Num, + Text, + Geo, + Timestamp, +} from "../vectorstores.js"; const createRedisClientMockup = () => { const hSetMock = vi.fn(); @@ -40,6 +53,7 @@ test("RedisVectorStore with external keys", async () => { const store = new RedisVectorStore(embeddings, { redisClient: client as any, indexName: "documents", + filter: "1", }); expect(store).toBeDefined(); @@ -61,7 +75,7 @@ test("RedisVectorStore with external keys", async () => { expect(client.hSet).toHaveBeenCalledWith("id1", { content_vector: Buffer.from(new Float32Array([0.1, 0.2, 0.3, 0.4]).buffer), content: "hello", - metadata: `{\\"a\\"\\:1,\\"b\\"\\:{\\"nested\\"\\:[1,{\\"a\\"\\:4}]}}`, + metadata: `{"a":1,"b":{"nested":[1,{"a":4}]}}`, }); const results = await store.similaritySearch("goodbye", 1); @@ -105,7 +119,11 @@ test("RedisVectorStore with TTL", async () => { expect(client.hSet).toHaveBeenCalledTimes(1); expect(client.expire).toHaveBeenCalledTimes(1); - expect(client.expire).toHaveBeenCalledWith("doc:documents:0", ttl); + + // Verify expire was called with the correct TTL, regardless of the UUID + const expireCall = (client.expire as any).mock.calls[0]; + expect(expireCall[0]).toMatch(/^doc:documents:/); // Key starts with the expected prefix + expect(expireCall[1]).toBe(ttl); // TTL value is correct }); test("RedisVectorStore with filters", async () => { @@ -123,12 +141,12 @@ test("RedisVectorStore with filters", async () => { expect(client.ft.search).toHaveBeenCalledWith( "documents", - "@metadata:(a|b|c) => [KNN 1 @content_vector $vector AS vector_score]", + "(@metadata: a,b,c) => [KNN 1 @content_vector $vector AS vector_score]", { PARAMS: { vector: Buffer.from(new Float32Array([0.1, 0.2, 0.3, 0.4]).buffer), }, - RETURN: ["metadata", "content", "vector_score"], + RETURN: ["content", "vector_score"], SORTBY: "vector_score", DIALECT: 2, LIMIT: { @@ -154,12 +172,12 @@ test("RedisVectorStore with raw filter", async () => { expect(client.ft.search).toHaveBeenCalledWith( "documents", - "@metadata:(a b c) => [KNN 1 @content_vector $vector AS vector_score]", + "(@metadata: a b c) => [KNN 1 @content_vector $vector AS vector_score]", { PARAMS: { vector: Buffer.from(new Float32Array([0.1, 0.2, 0.3, 0.4]).buffer), }, - RETURN: ["metadata", "content", "vector_score"], + RETURN: ["content", "vector_score"], SORTBY: "vector_score", DIALECT: 2, LIMIT: { @@ -216,7 +234,7 @@ describe("RedisVectorStore createIndex when index does not exist", () => { redisClient: client as any, indexName: "documents", }); - store.checkIndexExists = vi.fn().mockResolvedValue(false); + store.checkIndexState = vi.fn().mockResolvedValue("none"); await store.createIndex(); @@ -251,7 +269,7 @@ describe("RedisVectorStore createIndex when index does not exist", () => { LANGUAGE: "German", }, }); - store.checkIndexExists = vi.fn().mockResolvedValue(false); + store.checkIndexState = vi.fn().mockResolvedValue("none"); await store.createIndex(); @@ -310,548 +328,285 @@ describe("RedisVectorStore delete", () => { }); }); -describe("RedisVectorStore with Custom Schema", () => { - const createRedisClientWithCustomSchema = () => { - const hSetMock = vi.fn(); - const expireMock = vi.fn(); - const delMock = vi.fn().mockResolvedValue(1); - - return { - ft: { - info: vi.fn().mockResolvedValue({ - numDocs: 0, - }), - create: vi.fn(), - search: vi.fn().mockResolvedValue({ - total: 2, - documents: [ - { - value: { - content_vector: Buffer.from( - new Float32Array([0.1, 0.2, 0.3, 0.4]).buffer - ), - content: "Test document 1", - metadata: - '{"category":"tech","score":95,"tags":"javascript,nodejs"}', - "metadata.userId": "user123", - "metadata.category": "tech", - "metadata.score": 95, - "metadata.tags": "javascript,nodejs", - vector_score: 0.1, - }, - }, - { - value: { - content_vector: Buffer.from( - new Float32Array([0.2, 0.3, 0.4, 0.5]).buffer - ), - content: "Test document 2", - metadata: - '{"category":"science","score":87,"tags":"python,ai"}', - "metadata.userId": "user456", - "metadata.category": "science", - "metadata.score": 87, - "metadata.tags": "python,ai", - vector_score: 0.2, - }, - }, - ], - }), - dropIndex: vi.fn(), - }, - hSet: hSetMock, - expire: expireMock, - del: delMock, - multi: vi.fn().mockImplementation(() => ({ - exec: vi.fn(), - hSet: hSetMock, - expire: expireMock, - })), - }; - }; - - test("creates index with custom schema fields", async () => { - const client = createRedisClientWithCustomSchema(); - const embeddings = new FakeEmbeddings(); - - const customSchema: RedisVectorStoreConfig["customSchema"] = { - userId: { type: SchemaFieldTypes.TEXT, required: true, SORTABLE: true }, - category: { type: SchemaFieldTypes.TAG, SORTABLE: true, SEPARATOR: "," }, - score: { type: SchemaFieldTypes.NUMERIC, SORTABLE: true }, - tags: { type: SchemaFieldTypes.TAG, SEPARATOR: ",", CASESENSITIVE: true }, - description: { type: SchemaFieldTypes.TEXT, NOSTEM: true, WEIGHT: 2.0 }, - }; - - const store = new RedisVectorStore(embeddings, { - redisClient: client as any, - indexName: "test-custom-schema", - customSchema, - }); - - store.checkIndexExists = vi.fn().mockResolvedValue(false); - await store.createIndex(); - - expect(client.ft.create).toHaveBeenCalledWith( - "test-custom-schema", - expect.objectContaining({ - content_vector: expect.any(Object), - content: "TEXT", - metadata: "TEXT", - "metadata.userId": { - type: SchemaFieldTypes.TEXT, - SORTABLE: true, - }, - "metadata.category": { - type: SchemaFieldTypes.TAG, - SORTABLE: true, - SEPARATOR: ",", - }, - "metadata.score": { - type: SchemaFieldTypes.NUMERIC, - SORTABLE: true, - }, - "metadata.tags": { - type: SchemaFieldTypes.TAG, - SORTABLE: undefined, - SEPARATOR: ",", - }, - "metadata.description": { - type: SchemaFieldTypes.TEXT, - SORTABLE: undefined, - }, - }), - expect.any(Object) - ); - }); - - test("validates metadata against custom schema - success", async () => { - const client = createRedisClientWithCustomSchema(); - const embeddings = new FakeEmbeddings(); - - const customSchema: RedisVectorStoreConfig["customSchema"] = { - userId: { type: SchemaFieldTypes.TEXT, required: true }, - category: { type: SchemaFieldTypes.TAG }, - score: { type: SchemaFieldTypes.NUMERIC }, - tags: { type: SchemaFieldTypes.TAG }, - }; - - const store = new RedisVectorStore(embeddings, { - redisClient: client as any, - indexName: "test-validation", - customSchema, - }); - - const validDocument = { - pageContent: "Valid document", - metadata: { - userId: "user123", - category: "tech", - score: 95, - tags: ["javascript", "nodejs"], - }, - }; - - // Should not throw - await store.addDocuments([validDocument]); - expect(client.hSet).toHaveBeenCalled(); - }); - - test("validates metadata against custom schema - missing required field", async () => { - const client = createRedisClientWithCustomSchema(); - const embeddings = new FakeEmbeddings(); - - const customSchema: RedisVectorStoreConfig["customSchema"] = { - userId: { type: SchemaFieldTypes.TEXT, required: true }, - category: { type: SchemaFieldTypes.TAG }, - }; - - const store = new RedisVectorStore(embeddings, { - redisClient: client as any, - indexName: "test-validation-error", - customSchema, - }); - - const invalidDocument = { - pageContent: "Invalid document", - metadata: { - category: "tech", - // Missing required userId - }, - }; - - await expect(store.addDocuments([invalidDocument])).rejects.toThrow( - "Required metadata field 'userId' is missing" - ); - }); - - test("validates metadata against custom schema - wrong type", async () => { - const client = createRedisClientWithCustomSchema(); - const embeddings = new FakeEmbeddings(); - - const customSchema: RedisVectorStoreConfig["customSchema"] = { - score: { type: SchemaFieldTypes.NUMERIC, required: true }, - }; - - const store = new RedisVectorStore(embeddings, { - redisClient: client as any, - indexName: "test-type-validation", - customSchema, - }); - - const invalidDocument = { - pageContent: "Invalid document", - metadata: { - score: "not-a-number", // Should be number - }, - }; - - await expect(store.addDocuments([invalidDocument])).rejects.toThrow( - "Metadata field 'score' must be a number, got string" - ); - }); - - test("stores individual metadata fields for indexing", async () => { - const client = createRedisClientWithCustomSchema(); +describe("Metadata Schema Tests", () => { + test("RedisVectorStore with metadata schema", async () => { + const client = createRedisClientMockup(); const embeddings = new FakeEmbeddings(); - const customSchema: RedisVectorStoreConfig["customSchema"] = { - userId: { type: SchemaFieldTypes.TEXT }, - category: { type: SchemaFieldTypes.TAG }, - score: { type: SchemaFieldTypes.NUMERIC }, - tags: { type: SchemaFieldTypes.TAG, SEPARATOR: "," }, - }; - const store = new RedisVectorStore(embeddings, { redisClient: client as any, - indexName: "test-indexing", - customSchema, + indexName: "documents", + customSchema: [ + { name: "category", type: "tag" }, + { name: "price", type: "numeric" }, + { name: "title", type: "text" }, + { name: "location", type: "geo" }, + { name: "created_at", type: "numeric" }, // Timestamps are stored as numeric fields + ], }); - const document = { - pageContent: "Test document", - metadata: { - userId: "user123", - category: "tech", - score: 95, - tags: ["javascript", "nodejs"], - }, - }; - - await store.addDocuments([document], { keys: ["doc1"] }); - - expect(client.hSet).toHaveBeenCalledWith("doc1", { - content_vector: expect.any(Buffer), - content: "Test document", - metadata: expect.any(String), - "metadata.userId": "user123", - "metadata.category": "tech", - "metadata.score": 95, - "metadata.tags": "javascript,nodejs", // Array joined with separator - }); + expect(store).toBeDefined(); + expect(store.customSchema).toHaveLength(5); }); - test("similaritySearchVectorWithScoreAndMetadata with custom filtering", async () => { - const client = createRedisClientWithCustomSchema(); + test("Advanced filter with metadata schema", async () => { + const client = createRedisClientMockup(); const embeddings = new FakeEmbeddings(); - const customSchema: RedisVectorStoreConfig["customSchema"] = { - userId: { type: SchemaFieldTypes.TEXT }, - category: { type: SchemaFieldTypes.TAG }, - score: { type: SchemaFieldTypes.NUMERIC }, - }; - const store = new RedisVectorStore(embeddings, { redisClient: client as any, - indexName: "test-custom-search", - customSchema, + indexName: "documents", + customSchema: [ + { name: "category", type: "tag" }, + { name: "price", type: "numeric" }, + ], }); - const metadataFilter = { - category: "tech", - score: { min: 90, max: 100 }, - }; + const complexFilter = Tag("category") + .eq("electronics") + .and(Num("price").between(50, 200)); - const results = await store.similaritySearchVectorWithScoreAndMetadata( - [0.1, 0.2, 0.3, 0.4], - 2, - metadataFilter - ); + await store.similaritySearch("test query", 1, complexFilter); expect(client.ft.search).toHaveBeenCalledWith( - "test-custom-search", - "@metadata.category:{tech} @metadata.score:[90 100] => [KNN 2 @content_vector $vector AS vector_score]", - { + "documents", + "(@category:{electronics} @price:[50 200]) => [KNN 1 @content_vector $vector AS vector_score]", + expect.objectContaining({ PARAMS: { - vector: expect.any(Buffer), + vector: Buffer.from(new Float32Array([0.1, 0.2, 0.3, 0.4]).buffer), }, - RETURN: [ - "metadata", - "content", - "vector_score", - "metadata.userId", - "metadata.category", - "metadata.score", - ], + RETURN: ["content", "vector_score", "category", "price"], SORTBY: "vector_score", DIALECT: 2, LIMIT: { from: 0, - size: 2, + size: 1, }, - } + }) ); - - expect(results).toHaveLength(2); - expect(results[0][0].metadata).toEqual({ - category: "tech", - score: 95, - userId: "user123", - tags: "javascript,nodejs", - }); }); - test("buildCustomQuery with numeric range filters", async () => { - const client = createRedisClientWithCustomSchema(); + test("Backward compatibility with legacy filters", async () => { + const client = createRedisClientMockup(); const embeddings = new FakeEmbeddings(); - const customSchema = { - score: { type: SchemaFieldTypes.NUMERIC }, - price: { type: SchemaFieldTypes.NUMERIC }, - }; - const store = new RedisVectorStore(embeddings, { redisClient: client as any, - indexName: "test-numeric-filters", - customSchema, + indexName: "documents", }); - // Test different numeric filter formats - const [query1] = store.buildCustomQuery([0.1, 0.2, 0.3, 0.4], 5, { - score: { min: 80, max: 100 }, - price: { min: 50 }, // Only minimum - }); + // Test legacy array filter + await store.similaritySearch("test query", 1, ["electronics", "books"]); - expect(query1).toBe( - "@metadata.score:[80 100] @metadata.price:[50 +inf] => [KNN 5 @content_vector $vector AS vector_score]" + expect(client.ft.search).toHaveBeenCalledWith( + "documents", + "(@metadata: electronics,books) => [KNN 1 @content_vector $vector AS vector_score]", + expect.objectContaining({ + RETURN: ["content", "vector_score"], + }) ); - // Test exact numeric match - const [query2] = store.buildCustomQuery([0.1, 0.2, 0.3, 0.4], 5, { - score: 95, // Exact match - }); + // Test legacy string filter + await store.similaritySearch("test query", 1, "electronics"); - expect(query2).toBe( - "@metadata.score:[95 95] => [KNN 5 @content_vector $vector AS vector_score]" + expect(client.ft.search).toHaveBeenCalledWith( + "documents", + "(@metadata: electronics) => [KNN 1 @content_vector $vector AS vector_score]", + expect.any(Object) ); }); - test("buildCustomQuery with tag filters", async () => { - const client = createRedisClientWithCustomSchema(); + test("Schema generation with metadata schema and no legacy filter", async () => { + const client = createRedisClientMockup(); const embeddings = new FakeEmbeddings(); - const customSchema = { - category: { type: SchemaFieldTypes.TAG }, - tags: { type: SchemaFieldTypes.TAG }, - }; - const store = new RedisVectorStore(embeddings, { redisClient: client as any, - indexName: "test-tag-filters", - customSchema, - }); - - // Test single tag - const [query1] = store.buildCustomQuery([0.1, 0.2, 0.3, 0.4], 5, { - category: "tech", + indexName: "documents", + customSchema: [ + { name: "category", type: "tag" }, + { name: "price", type: "numeric" }, + ], }); - expect(query1).toBe( - "@metadata.category:{tech} => [KNN 5 @content_vector $vector AS vector_score]" - ); + store.checkIndexState = vi.fn().mockResolvedValue("none"); - // Test multiple tags (OR operation) - const [query2] = store.buildCustomQuery([0.1, 0.2, 0.3, 0.4], 5, { - category: ["tech", "science"], - }); + await store.createIndex(); - expect(query2).toBe( - "@metadata.category:({tech}|{science}) => [KNN 5 @content_vector $vector AS vector_score]" + // Verify that ft.create was called with the correct schema + expect(client.ft.create).toHaveBeenCalledWith( + "documents", + expect.objectContaining({ + content_vector: expect.any(Object), + content: expect.any(String), + // metadata field should NOT be in the schema when using customSchema without legacy filter + category: expect.any(Object), + price: expect.any(Object), + }), + expect.any(Object) ); + + // Verify metadata field is NOT in the schema + const schemaArg = (client.ft.create as any).mock.calls[0][1]; + expect(schemaArg.metadata).toBeUndefined(); }); - test("buildCustomQuery with text filters", async () => { - const client = createRedisClientWithCustomSchema(); + test("Schema generation with metadata schema and legacy string filter", async () => { + const client = createRedisClientMockup(); const embeddings = new FakeEmbeddings(); - const customSchema = { - title: { type: SchemaFieldTypes.TEXT }, - description: { type: SchemaFieldTypes.TEXT }, - }; - const store = new RedisVectorStore(embeddings, { redisClient: client as any, - indexName: "test-text-filters", - customSchema, + indexName: "documents", + customSchema: [ + { name: "category", type: "tag" }, + { name: "price", type: "numeric" }, + ], + filter: "electronics", // Legacy string filter }); - const [query] = store.buildCustomQuery([0.1, 0.2, 0.3, 0.4], 5, { - title: "javascript tutorial", - }); + store.checkIndexState = vi.fn().mockResolvedValue("none"); - expect(query).toBe( - "@metadata.title:(javascript tutorial) => [KNN 5 @content_vector $vector AS vector_score]" - ); + await store.createIndex(); + + // Verify that ft.create was called with the correct schema including metadata field + const schemaArg = (client.ft.create as any).mock.calls[0][1]; + expect(schemaArg.category).toBeDefined(); + expect(schemaArg.price).toBeDefined(); }); - test("buildCustomQuery with mixed filter types", async () => { - const client = createRedisClientWithCustomSchema(); + test("Schema generation with metadata schema and legacy array filter", async () => { + const client = createRedisClientMockup(); const embeddings = new FakeEmbeddings(); - const customSchema = { - category: { type: SchemaFieldTypes.TAG }, - score: { type: SchemaFieldTypes.NUMERIC }, - title: { type: SchemaFieldTypes.TEXT }, - }; - const store = new RedisVectorStore(embeddings, { redisClient: client as any, - indexName: "test-mixed-filters", - customSchema, + indexName: "documents", + customSchema: [ + { name: "category", type: "tag" }, + { name: "price", type: "numeric" }, + ], + filter: ["electronics", "books"], // Legacy array filter }); - const [query] = store.buildCustomQuery([0.1, 0.2, 0.3, 0.4], 5, { - category: "tech", - score: { min: 90 }, - title: "javascript", - }); + store.checkIndexState = vi.fn().mockResolvedValue("none"); - expect(query).toBe( - "@metadata.category:{tech} @metadata.score:[90 +inf] @metadata.title:(javascript) => [KNN 5 @content_vector $vector AS vector_score]" - ); + await store.createIndex(); + + // Verify that ft.create was called with the correct schema including metadata field + const schemaArg = (client.ft.create as any).mock.calls[0][1]; + expect(schemaArg.category).toBeDefined(); + expect(schemaArg.price).toBeDefined(); }); - test("includes custom schema fields in search return fields", async () => { - const client = createRedisClientWithCustomSchema(); + test("Schema generation without metadata schema includes metadata field", async () => { + const client = createRedisClientMockup(); const embeddings = new FakeEmbeddings(); - const customSchema = { - userId: { type: SchemaFieldTypes.TEXT }, - category: { type: SchemaFieldTypes.TAG }, - score: { type: SchemaFieldTypes.NUMERIC }, - }; - const store = new RedisVectorStore(embeddings, { redisClient: client as any, - indexName: "test-return-fields", - customSchema, + indexName: "documents", }); - await store.similaritySearch("test query", 2); + store.checkIndexState = vi.fn().mockResolvedValue("none"); - expect(client.ft.search).toHaveBeenCalledWith( - "test-return-fields", - expect.any(String), - expect.objectContaining({ - RETURN: [ - "metadata", - "content", - "vector_score", - "metadata.userId", - "metadata.category", - "metadata.score", - ], - }) - ); + await store.createIndex(); + + // Verify that ft.create was called with the correct schema including metadata field + const schemaArg = (client.ft.create as any).mock.calls[0][1]; + expect(schemaArg.metadata).toBeDefined(); }); - test("handles optional metadata fields correctly", async () => { - const client = createRedisClientWithCustomSchema(); + test("Automatic schema inference from documents", async () => { + const client = createRedisClientMockup(); const embeddings = new FakeEmbeddings(); - const customSchema = { - userId: { type: SchemaFieldTypes.TEXT, required: true }, - category: { type: SchemaFieldTypes.TAG, required: false }, // Optional - score: { type: SchemaFieldTypes.NUMERIC }, // Optional (default) - }; - const store = new RedisVectorStore(embeddings, { redisClient: client as any, - indexName: "test-optional-fields", - customSchema, + indexName: "test-auto-infer", + // No customSchema provided, no legacy filter }); - const documentWithPartialMetadata = { - pageContent: "Test document", - metadata: { - userId: "user123", - // category and score are optional and not provided - }, - }; + store.checkIndexState = vi.fn().mockResolvedValue("none"); - // Should not throw for missing optional fields - await store.addDocuments([documentWithPartialMetadata]); - expect(client.hSet).toHaveBeenCalled(); - }); + const documents = [ + new Document({ + pageContent: "doc1", + metadata: { category: "tech", price: 99, location: "-122.4, 37.7]" }, + }), + new Document({ + pageContent: "doc2", + metadata: { category: "books", price: 15, location: "-118.2, 34.0" }, + }), + new Document({ + pageContent: "doc3", + metadata: { category: "tech", price: 50, location: "-73.9, 40.7" }, + }), + new Document({ + pageContent: "doc4", + metadata: { category: "tech", price: 75, location: "-0.1, 51.5" }, + }), + new Document({ + pageContent: "doc5", + metadata: { category: "books", price: 20, location: "2.3, 48.9" }, + }), + new Document({ + pageContent: "doc6", + metadata: { category: "tech", price: 120, location: "139.7, 35.7" }, + }), + ]; - test("ignores unknown schema fields in metadata filter", async () => { - const client = createRedisClientWithCustomSchema(); - const embeddings = new FakeEmbeddings(); + await store.createIndex(documents, 1536); - const customSchema = { - category: { type: SchemaFieldTypes.TAG }, - }; + const schemaArg = (client.ft.create as any).mock.calls[0][1]; - const store = new RedisVectorStore(embeddings, { - redisClient: client as any, - indexName: "test-unknown-fields", - customSchema, - }); + // Should NOT include metadata field (using inferred schema instead) + expect(schemaArg.metadata).toBeUndefined(); - const [query] = store.buildCustomQuery([0.1, 0.2, 0.3, 0.4], 5, { - category: "tech", - unknownField: "ignored", // This should be ignored - }); + // Should include inferred metadata fields + expect(schemaArg.category).toBeDefined(); + expect(schemaArg.category.type).toBe(SchemaFieldTypes.TEXT); - // Should only include the known schema field - expect(query).toBe( - "@metadata.category:{tech} => [KNN 5 @content_vector $vector AS vector_score]" - ); + expect(schemaArg.price).toBeDefined(); + expect(schemaArg.price.type).toBe(SchemaFieldTypes.NUMERIC); + + expect(schemaArg.location).toBeDefined(); + expect(schemaArg.location.type).toBe(SchemaFieldTypes.GEO); }); - test("works without custom schema (backward compatibility)", async () => { + test("Automatic schema inference with legacy filter still includes metadata field", async () => { const client = createRedisClientMockup(); const embeddings = new FakeEmbeddings(); const store = new RedisVectorStore(embeddings, { redisClient: client as any, - indexName: "test-no-schema", - // No customSchema provided + indexName: "test-auto-infer-legacy", + filter: "tech", // Legacy string filter }); - const document = { - pageContent: "Test document", - metadata: { any: "field" }, - }; + store.checkIndexState = vi.fn().mockResolvedValue("none"); - // Should work normally without custom schema - await store.addDocuments([document]); - expect(client.hSet).toHaveBeenCalledWith( - expect.any(String), - expect.objectContaining({ - content: "Test document", - metadata: expect.any(String), - // Should not have individual metadata fields - }) - ); + const documents = [ + new Document({ + pageContent: "doc1", + metadata: { category: "tech", price: 99 }, + }), + new Document({ + pageContent: "doc2", + metadata: { category: "books", price: 15 }, + }), + ]; - // Should not include custom schema fields in return - await store.similaritySearch("test", 1); - expect(client.ft.search).toHaveBeenCalledWith( - expect.any(String), - expect.any(String), - expect.objectContaining({ - RETURN: ["metadata", "content", "vector_score"], // No custom fields - }) - ); + await store.createIndex(documents, 1536); + + const schemaArg = (client.ft.create as any).mock.calls[0][1]; + + // Should include metadata field for legacy compatibility + expect(schemaArg.metadata).toBeDefined(); + expect(schemaArg.metadata.type).toBe(SchemaFieldTypes.TEXT); + + // Should NOT include inferred fields (legacy mode) + expect(schemaArg.category).toBeUndefined(); + expect(schemaArg.price).toBeUndefined(); }); }); diff --git a/libs/providers/langchain-redis/src/vectorstores.ts b/libs/providers/langchain-redis/src/vectorstores.ts index 98d77a5cb6c0..40a9caeb32f0 100644 --- a/libs/providers/langchain-redis/src/vectorstores.ts +++ b/libs/providers/langchain-redis/src/vectorstores.ts @@ -2,78 +2,76 @@ import { Document } from "@langchain/core/documents"; import type { EmbeddingsInterface } from "@langchain/core/embeddings"; import { VectorStore } from "@langchain/core/vectorstores"; import type { - createClient, createCluster, + createClient, RediSearchSchema, SearchOptions, } from "redis"; +import { v4 as uuidv4 } from "uuid"; import { SchemaFieldTypes, VectorAlgorithms } from "redis"; - -// Adapated from internal redis types which aren't exported -/** - * Type for creating a schema vector field. It includes the algorithm, - * distance metric, and initial capacity. - */ -export type CreateSchemaVectorField< - T extends VectorAlgorithms, - A extends Record -> = { - ALGORITHM: T; - DISTANCE_METRIC: "L2" | "IP" | "COSINE"; - INITIAL_CAP?: number; -} & A; -/** - * Type for creating a flat schema vector field. It extends - * CreateSchemaVectorField with a block size property. - */ -export type CreateSchemaFlatVectorField = CreateSchemaVectorField< - VectorAlgorithms.FLAT, - { - BLOCK_SIZE?: number; - } ->; -/** - * Type for creating a HNSW schema vector field. It extends - * CreateSchemaVectorField with M, EF_CONSTRUCTION, and EF_RUNTIME - * properties. - */ -export type CreateSchemaHNSWVectorField = CreateSchemaVectorField< - VectorAlgorithms.HNSW, - { - M?: number; - EF_CONSTRUCTION?: number; - EF_RUNTIME?: number; - } ->; - -type CreateIndexOptions = NonNullable< - Parameters["ft"]["create"]>[3] ->; - -export type RedisSearchLanguages = `${NonNullable< - CreateIndexOptions["LANGUAGE"] ->}`; - -export type RedisVectorStoreIndexOptions = Omit< +import { + FilterExpression, + AndFilter, + OrFilter, + TagFilter, + NumericFilter, + TextFilter, + GeoFilter, + TimestampFilter, + Custom, + CustomFilter, + Tag, + Num, + Text, + Geo, + Timestamp, +} from "./filters.js"; +import type { + CreateSchemaVectorField, + CreateSchemaFlatVectorField, + CreateSchemaHNSWVectorField, CreateIndexOptions, - "LANGUAGE" -> & { - LANGUAGE?: RedisSearchLanguages; + RedisSearchLanguages, + RedisVectorStoreIndexOptions, + MetadataFieldSchema, +} from "./schema.js"; +import { + buildMetadataSchema, + serializeMetadataField, + deserializeMetadataField, + inferMetadataSchema, + checkForSchemaMismatch, +} from "./schema.js"; + +// Re-export filter classes and functions for backward compatibility +export { + FilterExpression, + AndFilter, + OrFilter, + TagFilter, + NumericFilter, + TextFilter, + GeoFilter, + TimestampFilter, + Tag, + Num, + Text, + Geo, + Timestamp, + Custom, + CustomFilter, }; -/** - * Interface for custom schema field definitions - */ -export interface CustomSchemaField { - type: SchemaFieldTypes; - required?: boolean; - SORTABLE?: boolean | "UNF"; - NOINDEX?: boolean; - SEPARATOR?: string; // For TAG fields - CASESENSITIVE?: true; // For TAG fields (Redis expects true, not boolean) - NOSTEM?: true; // For TEXT fields (Redis expects true, not boolean) - WEIGHT?: number; // For TEXT fields -} +// Re-export schema types and utilities for backward compatibility +export type { + CreateSchemaVectorField, + CreateSchemaFlatVectorField, + CreateSchemaHNSWVectorField, + CreateIndexOptions, + RedisSearchLanguages, + RedisVectorStoreIndexOptions, + MetadataFieldSchema, +}; /** * Interface for the configuration of the RedisVectorStore. It includes @@ -93,7 +91,7 @@ export interface RedisVectorStoreConfig { vectorKey?: string; filter?: RedisVectorStoreFilterType; ttl?: number; // ttl in second - customSchema?: Record; // Custom schema fields for metadata + customSchema?: MetadataFieldSchema[]; } /** @@ -106,12 +104,12 @@ export interface RedisAddOptions { } /** - * Type for the filter used in the RedisVectorStore. It is an array of - * strings. - * If a string is passed instead of an array the value is used directly, this - * allows custom filters to be passed. + * Type for the filter used in the RedisVectorStore. Supports multiple formats: + * - string[]: Array of strings for simple OR filtering (legacy format) + * - string: Raw Redis query string for custom filters (legacy format) + * - FilterExpression: Advanced filter expressions (recommended approach) */ -export type RedisVectorStoreFilterType = string[] | string; +export type RedisVectorStoreFilterType = string[] | string | FilterExpression; /** * Class representing a RedisVectorStore. It extends the VectorStore class @@ -143,65 +141,12 @@ export class RedisVectorStore extends VectorStore { ttl?: number; - customSchema?: Record; + customSchema?: MetadataFieldSchema[]; _vectorstoreType(): string { return "redis"; } - /** - * Validates metadata against the custom schema if defined - * @param metadata The metadata object to validate - * @throws Error if validation fails - */ - private validateMetadata(metadata: Record): void { - if (!this.customSchema) { - return; // No schema defined, skip validation - } - - for (const [fieldName, fieldConfig] of Object.entries(this.customSchema)) { - const value = metadata[fieldName]; - - // Check if required field is missing - if (fieldConfig.required && (value === undefined || value === null)) { - throw new Error(`Required metadata field '${fieldName}' is missing`); - } - - // Skip validation for optional fields that are not provided - if (value === undefined || value === null) { - continue; - } - - // Basic type validation based on schema field type - switch (fieldConfig.type) { - case SchemaFieldTypes.NUMERIC: - if (typeof value !== "number") { - throw new Error( - `Metadata field '${fieldName}' must be a number, got ${typeof value}` - ); - } - break; - case SchemaFieldTypes.TAG: - if (typeof value !== "string" && !Array.isArray(value)) { - throw new Error( - `Metadata field '${fieldName}' must be a string or array, got ${typeof value}` - ); - } - break; - case SchemaFieldTypes.TEXT: - if (typeof value !== "string") { - throw new Error( - `Metadata field '${fieldName}' must be a string, got ${typeof value}` - ); - } - break; - default: - // For other field types, skip validation - break; - } - } - } - constructor( embeddings: EmbeddingsInterface, _dbConfig: RedisVectorStoreConfig @@ -263,68 +208,39 @@ export class RedisVectorStore extends VectorStore { throw new Error("No vectors provided"); } // check if the index exists and create it if it doesn't - await this.createIndex(vectors[0].length); - - const info = await this.redisClient.ft.info(this.indexName); - const lastKeyCount = - parseInt( - info.numDocs || - // @ts-expect-error - num_docs is not typed as not used by all redis connectors - info.num_docs, - 10 - ) || 0; - - // Validate all metadata against custom schema first - if (this.customSchema) { - for (let idx = 0; idx < documents.length; idx += 1) { - const metadata = - documents[idx] && documents[idx].metadata - ? documents[idx].metadata - : {}; - this.validateMetadata(metadata); - } - } + await this.createIndex(documents, vectors[0].length); const multi = this.redisClient.multi(); await Promise.all( vectors.map(async (vector, idx) => { const key = - keys && keys.length - ? keys[idx] - : `${this.keyPrefix}${idx + lastKeyCount}`; + keys && keys.length ? keys[idx] : `${this.keyPrefix}${uuidv4()}`; + const metadata = documents[idx] && documents[idx].metadata ? documents[idx].metadata : {}; - // Prepare hash fields - const hashFields: Record = { + const hashFields: Record = { [this.vectorKey]: this.getFloat32Buffer(vector), [this.contentKey]: documents[idx].pageContent, - [this.metadataKey]: this.escapeSpecialChars(JSON.stringify(metadata)), }; - // Add individual metadata fields for indexing if custom schema is defined - if (this.customSchema) { - for (const [fieldName, fieldConfig] of Object.entries( - this.customSchema - )) { - const fieldValue = metadata[fieldName]; - if (fieldValue !== undefined && fieldValue !== null) { - const indexedFieldName = `${this.metadataKey}.${fieldName}`; - - // Handle different field types appropriately - if ( - fieldConfig.type === SchemaFieldTypes.TAG && - Array.isArray(fieldValue) - ) { - // For TAG arrays, join with separator (default comma) - const separator = fieldConfig.SEPARATOR || ","; - hashFields[indexedFieldName] = fieldValue.join(separator); - } else { - // For other types, store as-is - hashFields[indexedFieldName] = fieldValue; + // Handle metadata based on schema configuration + if (this.customSchema && this.customSchema.length > 0) { + if (this.customSchema[0].name === this.metadataKey) { + // handling legacy metadata schema for simple filters (string or array of string) + hashFields[this.metadataKey] = JSON.stringify(metadata); + } else { + // Store individual metadata fields for proper indexing + for (const fieldSchema of this.customSchema) { + const fieldValue = metadata[fieldSchema.name]; + if (fieldValue !== undefined && fieldValue !== null) { + hashFields[fieldSchema.name] = serializeMetadataField( + fieldSchema, + fieldValue + ); } } } @@ -376,67 +292,40 @@ export class RedisVectorStore extends VectorStore { if (res.value) { const document = res.value; if (document.vector_score) { - result.push([ - new Document({ - pageContent: (document[this.contentKey] ?? "") as string, - metadata: JSON.parse( - this.unEscapeSpecialChars( - (document.metadata ?? "{}") as string - ) - ), - }), - Number(document.vector_score), - ]); - } - } - } - } - - return result; - } + // Reconstruct metadata from individual fields if schema is configured + let metadata: Record = {}; - /** - * Method for performing a similarity search with custom metadata filtering. - * Uses the custom schema fields for efficient filtering. - * @param query The query vector. - * @param k The number of nearest neighbors to return. - * @param metadataFilter Object with metadata field filters using custom schema. - * @returns A promise that resolves to an array of documents and their scores. - */ - async similaritySearchVectorWithScoreAndMetadata( - query: number[], - k: number, - metadataFilter?: Record - ): Promise<[Document, number][]> { - const results = await this.redisClient.ft.search( - this.indexName, - ...this.buildCustomQuery(query, k, metadataFilter) - ); - const result: [Document, number][] = []; + if (this.customSchema && this.customSchema.length > 0) { + // Build metadata from individual schema fields + for (const fieldSchema of this.customSchema) { + // Skip the metadata JSON field itself - it's used for legacy filter support + // and will be parsed separately below + if (fieldSchema.name === this.metadataKey) { + continue; + } + const fieldValue = document[fieldSchema.name]; + if (fieldValue !== undefined && fieldValue !== null) { + metadata[fieldSchema.name] = deserializeMetadataField( + fieldSchema, + fieldValue + ); + } + } + } - if (results.total) { - for (const res of results.documents) { - if (res.value) { - const document = res.value; - if (document.vector_score) { - // Reconstruct metadata from both the JSON field and individual fields - let metadata: Record = {}; + // Also try to parse the JSON metadata field for any additional fields try { - metadata = JSON.parse( - this.unEscapeSpecialChars((document.metadata ?? "{}") as string) + const jsonMetadata = JSON.parse( + this.unEscapeSpecialChars( + (document[this.metadataKey] ?? "{}") as string + ) ); - } catch { - // If JSON parsing fails, construct from individual fields - metadata = {}; - } - - // Add individual schema fields to metadata if they exist - if (this.customSchema) { - for (const fieldName of Object.keys(this.customSchema)) { - const fieldKey = `${this.metadataKey}.${fieldName}`; - if (document[fieldKey] !== undefined) { - metadata[fieldName] = document[fieldKey] as unknown; - } + // Merge with schema-based metadata, giving priority to schema fields + metadata = { ...jsonMetadata, ...metadata }; + } catch (error) { + // If JSON parsing fails, use only schema-based metadata + if (!this.customSchema || this.customSchema.length === 0) { + metadata = {}; } } @@ -514,9 +403,14 @@ export class RedisVectorStore extends VectorStore { * Method for checking if an index exists in the RedisVectorStore. * @returns A promise that resolves to a boolean indicating whether the index exists. */ - async checkIndexExists() { + async checkIndexState() { try { - await this.redisClient.ft.info(this.indexName); + const result = await this.redisClient.ft.info(this.indexName); + return result.attributes.some( + (attr) => attr.identifier === this.metadataKey + ) + ? "legacy" + : "default"; } catch (err) { // eslint-disable-next-line @typescript-eslint/no-explicit-any if ((err as any)?.message.includes("unknown command")) { @@ -525,24 +419,33 @@ export class RedisVectorStore extends VectorStore { ); } // index doesn't exist - return false; + return "none"; } - - return true; } /** * Method for creating an index in the RedisVectorStore. If the index * already exists, it does nothing. * @param dimensions The dimensions of the index + * @param documents Optional documents to infer metadata schema from * @returns A promise that resolves when the index has been created. */ - async createIndex(dimensions = 1536): Promise { - if (await this.checkIndexExists()) { - return; - } - - const schema: RediSearchSchema = { + async createIndex(documents?: Document[], dimensions = 1536): Promise { + // at that point we have decided on the metadata schema, which is needed even if the index already existed + // however we only create the index in case it doesn't exist + const indexState = await this.checkIndexState(); + + // Determine if we need compatibility mode for legacy string/string[] filters + const needsLegacyMetadataField = + typeof this.filter === "string" || + Array.isArray(this.filter) || + indexState === "legacy"; + + // at least one document needs to contain metadata so we can + const hasMetadata = documents && documents.some((doc) => doc.metadata); + + // default schema - any customizations or additional fields will be added on top of this + let schema: RediSearchSchema = { [this.vectorKey]: { type: SchemaFieldTypes.VECTOR, TYPE: "FLOAT32", @@ -550,46 +453,51 @@ export class RedisVectorStore extends VectorStore { ...this.indexOptions, }, [this.contentKey]: SchemaFieldTypes.TEXT, - [this.metadataKey]: SchemaFieldTypes.TEXT, }; - // Add custom metadata schema fields for better filtering and searching - if (this.customSchema) { - for (const [fieldName, fieldConfig] of Object.entries( - this.customSchema - )) { - // Create field name with metadata prefix (e.g., metadata.userId) - const indexedFieldName = `${this.metadataKey}.${fieldName}`; - - // Convert CustomSchemaField to proper Redis schema field - if (fieldConfig.type === SchemaFieldTypes.TAG) { - schema[indexedFieldName] = { - type: SchemaFieldTypes.TAG, - SORTABLE: fieldConfig.SORTABLE ? true : undefined, - SEPARATOR: (fieldConfig.SEPARATOR as string) || ",", - }; - } else if (fieldConfig.type === SchemaFieldTypes.NUMERIC) { - schema[indexedFieldName] = { - type: SchemaFieldTypes.NUMERIC, - SORTABLE: fieldConfig.SORTABLE ? true : undefined, - }; - } else if (fieldConfig.type === SchemaFieldTypes.TEXT) { - schema[indexedFieldName] = { - type: SchemaFieldTypes.TEXT, - SORTABLE: fieldConfig.SORTABLE ? true : undefined, - }; - } else { - // Fallback for other types - just use the field type directly - schema[indexedFieldName] = fieldConfig.type; - } + // --- METADATA PROCESSING + + if (this.customSchema && this.customSchema.length !== 0) { + // providing a custom metadata schema takes precedence above all other considerations + // in this case, no matter if the documents have metadata, if the filter is a simple filter, etc. we proceed + // with the custom schema, we do however warn the user if the schema does not match the metadata provided + if ( + !documents || + checkForSchemaMismatch( + this.customSchema, + inferMetadataSchema(documents) + ) + ) { + console.warn( + "The custom schema does not match the metadata schema inferred from the documents. " + + "This is not necessarily an issue, but could indicate an invalid custom schema." + ); } + } else if (!needsLegacyMetadataField && hasMetadata) { + // if we don't have a custom schema, but we have documents with metadata, we can infer the schema + // unless a legacy filter is provided, in which case we need to fall back to the legacy mode + this.customSchema = inferMetadataSchema(documents); + } else { + // If we don't have a custom schema or documents with metadata (needed to infer the metadata), irrespective of + // type of filter, we need to fall back to the legacy mode which would have only one text field for metadata + this.customSchema = [ + { + name: this.metadataKey, + type: "text", + }, + ]; } - await this.redisClient.ft.create( - this.indexName, - schema, - this.createIndexOptions - ); + schema = buildMetadataSchema(this.customSchema, schema); + + if (indexState === "none") { + // we create an index only if it doesn't exist + await this.redisClient.ft.create( + this.indexName, + schema, + this.createIndexOptions + ); + } } /** @@ -656,19 +564,17 @@ export class RedisVectorStore extends VectorStore { let hybridFields = "*"; // if a filter is set, modify the hybrid query - if (filter && filter.length) { - // `filter` is a list of strings, then it's applied using the OR operator in the metadata key - // for example: filter = ['foo', 'bar'] => this will filter all metadata containing either 'foo' OR 'bar' - hybridFields = `@${this.metadataKey}:(${this.prepareFilter(filter)})`; + if (filter) { + hybridFields = this.prepareFilter(filter); } const baseQuery = `${hybridFields} => [KNN ${k} @${this.vectorKey} $vector AS ${vectorScoreField}]`; // Include custom schema fields in return fields for better access - const returnFields = [this.metadataKey, this.contentKey, vectorScoreField]; + const returnFields = [this.contentKey, vectorScoreField]; if (this.customSchema) { - for (const fieldName of Object.keys(this.customSchema)) { - returnFields.push(`${this.metadataKey}.${fieldName}`); + for (const fieldName of this.customSchema) { + returnFields.push(`${fieldName.name}`); } } @@ -688,98 +594,48 @@ export class RedisVectorStore extends VectorStore { return [baseQuery, options]; } - /** - * Builds a query with custom metadata field filtering - * @param query The query vector - * @param k Number of results to return - * @param metadataFilter Object with metadata field filters - * @returns Query string and search options - */ - buildCustomQuery( - query: number[], - k: number, - metadataFilter?: Record - ): [string, SearchOptions] { - const vectorScoreField = "vector_score"; - - let hybridFields = "*"; + private prepareFilter(filter: RedisVectorStoreFilterType): string { + if (!filter) { + return "*"; + } - // Build filter using custom schema fields - if (metadataFilter && this.customSchema) { - const filterClauses: string[] = []; - - for (const [fieldName, value] of Object.entries(metadataFilter)) { - if (this.customSchema[fieldName]) { - const fieldConfig = this.customSchema[fieldName]; - const indexedFieldName = `${this.metadataKey}.${fieldName}`; - - if (fieldConfig.type === SchemaFieldTypes.NUMERIC) { - // Handle numeric range queries - if (typeof value === "object" && value !== null) { - if ("min" in value && "max" in value) { - filterClauses.push( - `@${indexedFieldName}:[${value.min} ${value.max}]` - ); - } else if ("min" in value) { - filterClauses.push(`@${indexedFieldName}:[${value.min} +inf]`); - } else if ("max" in value) { - filterClauses.push(`@${indexedFieldName}:[-inf ${value.max}]`); - } - } else { - // Exact numeric match - filterClauses.push(`@${indexedFieldName}:[${value} ${value}]`); - } - } else if (fieldConfig.type === SchemaFieldTypes.TAG) { - // Handle tag filtering - if (Array.isArray(value)) { - const tagFilter = value.map((v) => `{${v}}`).join("|"); - filterClauses.push(`@${indexedFieldName}:(${tagFilter})`); - } else { - filterClauses.push(`@${indexedFieldName}:{${value}}`); - } - } else if (fieldConfig.type === SchemaFieldTypes.TEXT) { - // Handle text search - filterClauses.push(`@${indexedFieldName}:(${value})`); - } + // Legacy filter support, works with TEXT fields only + if (Array.isArray(filter) || typeof filter === "string") { + let metadataField = this.metadataKey; + if ( + !this.customSchema?.some((field) => field.name === this.metadataKey) + ) { + // a rare case where the user has provided a simple filter, but the custom schema doesn't include the metadata + // field, for example no filter was provided during vector store creation and the index creation logic assumed + // we will be using inferred schema + const firstTextField = this.customSchema?.find( + (field) => field.type === "text" + )?.name; + if (firstTextField) { + metadataField = firstTextField; } } - if (filterClauses.length > 0) { - hybridFields = filterClauses.join(" "); + if (Array.isArray(filter)) { + const escapedFilter = filter + .map((v) => `${this.escapeSpecialChars(v)}`) + .join(","); + return `(@${metadataField}: ${escapedFilter})`; } + return `(@${metadataField}: ${filter})`; } - const baseQuery = `${hybridFields} => [KNN ${k} @${this.vectorKey} $vector AS ${vectorScoreField}]`; - - // Include custom schema fields in return fields - const returnFields = [this.metadataKey, this.contentKey, vectorScoreField]; - if (this.customSchema) { - for (const fieldName of Object.keys(this.customSchema)) { - returnFields.push(`${this.metadataKey}.${fieldName}`); - } + // Check for FilterExpression objects (but not arrays) + if ( + typeof filter === "object" && + "toString" in filter && + typeof filter.toString === "function" + ) { + // Use the filter expression's toString method + return filter.toString(); } - const options: SearchOptions = { - PARAMS: { - vector: this.getFloat32Buffer(query), - }, - RETURN: returnFields, - SORTBY: vectorScoreField, - DIALECT: 2, - LIMIT: { - from: 0, - size: k, - }, - }; - - return [baseQuery, options]; - } - - private prepareFilter(filter: RedisVectorStoreFilterType) { - if (Array.isArray(filter)) { - return filter.map(this.escapeSpecialChars).join("|"); - } - return filter; + return "*"; } /** diff --git a/libs/providers/langchain-redis/vitest.config.ts b/libs/providers/langchain-redis/vitest.config.ts index 84bd12d24080..b8e64a8e898f 100644 --- a/libs/providers/langchain-redis/vitest.config.ts +++ b/libs/providers/langchain-redis/vitest.config.ts @@ -9,6 +9,7 @@ export default defineConfig((env) => { test: { environment: "node", hideSkippedTests: true, + globals: true, testTimeout: 30_000, maxWorkers: 0.5, exclude: ["**/*.int.test.ts", ...configDefaults.exclude], @@ -53,7 +54,7 @@ export default defineConfig((env) => { name: "int", environment: "node", }, - }; + } satisfies UserConfigExport; } return { @@ -63,5 +64,5 @@ export default defineConfig((env) => { include: configDefaults.include, typecheck: { enabled: true }, }, - }; + } satisfies UserConfigExport; }); diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 56e5c35e6786..fd757c0b17cd 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -434,31 +434,31 @@ importers: version: 0.25.8 '@rolldown/binding-darwin-arm64': specifier: '*' - version: 1.0.0-beta.44 + version: 1.0.0-beta.45 '@rolldown/binding-darwin-x64': specifier: '*' - version: 1.0.0-beta.44 + version: 1.0.0-beta.45 '@rolldown/binding-linux-arm64-gnu': specifier: '*' - version: 1.0.0-beta.44 + version: 1.0.0-beta.45 '@rolldown/binding-linux-arm64-musl': specifier: '*' - version: 1.0.0-beta.44 + version: 1.0.0-beta.45 '@rolldown/binding-linux-x64-gnu': specifier: '*' - version: 1.0.0-beta.44 + version: 1.0.0-beta.45 '@rolldown/binding-linux-x64-musl': specifier: '*' - version: 1.0.0-beta.44 + version: 1.0.0-beta.45 '@rolldown/binding-win32-arm64-msvc': specifier: '*' - version: 1.0.0-beta.44 + version: 1.0.0-beta.45 '@rolldown/binding-win32-ia32-msvc': specifier: '*' - version: 1.0.0-beta.44 + version: 1.0.0-beta.45 '@rolldown/binding-win32-x64-msvc': specifier: '*' - version: 1.0.0-beta.44 + version: 1.0.0-beta.45 '@swc/core-win32-x64-msvc': specifier: '*' version: 1.13.3 @@ -3289,6 +3289,9 @@ importers: redis: specifier: ^4.6.13 version: 4.7.1 + uuid: + specifier: ^10.0.0 + version: 10.0.0 devDependencies: '@faker-js/faker': specifier: ^8.4.0 @@ -3323,9 +3326,6 @@ importers: typescript: specifier: ~5.8.3 version: 5.8.3 - uuid: - specifier: ^10.0.0 - version: 10.0.0 vitest: specifier: ^3.2.4 version: 3.2.4(@types/debug@4.1.12)(@types/node@24.9.0)(jiti@2.5.1)(jsdom@26.1.0(bufferutil@4.0.9)(utf-8-validate@6.0.5))(terser@5.44.0)(tsx@4.20.3)(yaml@2.8.1) @@ -7264,8 +7264,8 @@ packages: cpu: [arm64] os: [darwin] - '@rolldown/binding-darwin-arm64@1.0.0-beta.44': - resolution: {integrity: sha512-PxAW1PXLPmCzfhfKIS53kwpjLGTUdIfX4Ht+l9mj05C3lYCGaGowcNsYi2rdxWH24vSTmeK+ajDNRmmmrK0M7g==} + '@rolldown/binding-darwin-arm64@1.0.0-beta.45': + resolution: {integrity: sha512-xjCv4CRVsSnnIxTuyH1RDJl5OEQ1c9JYOwfDAHddjJDxCw46ZX9q80+xq7Eok7KC4bRSZudMJllkvOKv0T9SeA==} engines: {node: ^20.19.0 || >=22.12.0} cpu: [arm64] os: [darwin] @@ -7275,8 +7275,8 @@ packages: cpu: [x64] os: [darwin] - '@rolldown/binding-darwin-x64@1.0.0-beta.44': - resolution: {integrity: sha512-/CtQqs1oO9uSb5Ju60rZvsdjE7Pzn8EK2ISAdl2jedjMzeD/4neNyCbwyJOAPzU+GIQTZVyrFZJX+t7HXR1R/g==} + '@rolldown/binding-darwin-x64@1.0.0-beta.45': + resolution: {integrity: sha512-ddcO9TD3D/CLUa/l8GO8LHzBOaZqWg5ClMy3jICoxwCuoz47h9dtqPsIeTiB6yR501LQTeDsjA4lIFd7u3Ljfw==} engines: {node: ^20.19.0 || >=22.12.0} cpu: [x64] os: [darwin] @@ -7296,8 +7296,8 @@ packages: cpu: [arm64] os: [linux] - '@rolldown/binding-linux-arm64-gnu@1.0.0-beta.44': - resolution: {integrity: sha512-kRRKGZI4DXWa6ANFr3dLA85aSVkwPdgXaRjfanwY84tfc3LncDiIjyWCb042e3ckPzYhHSZ3LmisO+cdOIYL6Q==} + '@rolldown/binding-linux-arm64-gnu@1.0.0-beta.45': + resolution: {integrity: sha512-LE1gjAwQRrbCOorJJ7LFr10s5vqYf5a00V5Ea9wXcT2+56n5YosJkcp8eQ12FxRBv2YX8dsdQJb+ZTtYJwb6XQ==} engines: {node: ^20.19.0 || >=22.12.0} cpu: [arm64] os: [linux] @@ -7307,8 +7307,8 @@ packages: cpu: [arm64] os: [linux] - '@rolldown/binding-linux-arm64-musl@1.0.0-beta.44': - resolution: {integrity: sha512-hMtiN9xX1NhxXBa2U3Up4XkVcsVp2h73yYtMDY59z9CDLEZLrik9RVLhBL5QtoX4zZKJ8HZKJtWuGYvtmkCbIQ==} + '@rolldown/binding-linux-arm64-musl@1.0.0-beta.45': + resolution: {integrity: sha512-tdy8ThO/fPp40B81v0YK3QC+KODOmzJzSUOO37DinQxzlTJ026gqUSOM8tzlVixRbQJltgVDCTYF8HNPRErQTA==} engines: {node: ^20.19.0 || >=22.12.0} cpu: [arm64] os: [linux] @@ -7323,8 +7323,8 @@ packages: cpu: [x64] os: [linux] - '@rolldown/binding-linux-x64-gnu@1.0.0-beta.44': - resolution: {integrity: sha512-rd1LzbpXQuR8MTG43JB9VyXDjG7ogSJbIkBpZEHJ8oMKzL6j47kQT5BpIXrg3b5UVygW9QCI2fpFdMocT5Kudg==} + '@rolldown/binding-linux-x64-gnu@1.0.0-beta.45': + resolution: {integrity: sha512-lS082ROBWdmOyVY/0YB3JmsiClaWoxvC+dA8/rbhyB9VLkvVEaihLEOr4CYmrMse151C4+S6hCw6oa1iewox7g==} engines: {node: ^20.19.0 || >=22.12.0} cpu: [x64] os: [linux] @@ -7334,8 +7334,8 @@ packages: cpu: [x64] os: [linux] - '@rolldown/binding-linux-x64-musl@1.0.0-beta.44': - resolution: {integrity: sha512-qI2IiPqmPRW25exXkuQr3TlweCDc05YvvbSDRPCuPsWkwb70dTiSoXn8iFxT4PWqTi71wWHg1Wyta9PlVhX5VA==} + '@rolldown/binding-linux-x64-musl@1.0.0-beta.45': + resolution: {integrity: sha512-Hi73aYY0cBkr1/SvNQqH8Cd+rSV6S9RB5izCv0ySBcRnd/Wfn5plguUoGYwBnhHgFbh6cPw9m2dUVBR6BG1gxA==} engines: {node: ^20.19.0 || >=22.12.0} cpu: [x64] os: [linux] @@ -7350,8 +7350,8 @@ packages: cpu: [arm64] os: [win32] - '@rolldown/binding-win32-arm64-msvc@1.0.0-beta.44': - resolution: {integrity: sha512-cF1LJdDIX02cJrFrX3wwQ6IzFM7I74BYeKFkzdcIA4QZ0+2WA7/NsKIgjvrunupepWb1Y6PFWdRlHSaz5AW1Wg==} + '@rolldown/binding-win32-arm64-msvc@1.0.0-beta.45': + resolution: {integrity: sha512-zyzAjItHPUmxg6Z8SyRhLdXlJn3/D9KL5b9mObUrBHhWS/GwRH4665xCiFqeuktAhhWutqfc+rOV2LjK4VYQGQ==} engines: {node: ^20.19.0 || >=22.12.0} cpu: [arm64] os: [win32] @@ -7361,8 +7361,8 @@ packages: cpu: [ia32] os: [win32] - '@rolldown/binding-win32-ia32-msvc@1.0.0-beta.44': - resolution: {integrity: sha512-5uaJonDafhHiMn+iEh7qUp3QQ4Gihv3lEOxKfN8Vwadpy0e+5o28DWI42DpJ9YBYMrVy4JOWJ/3etB/sptpUwA==} + '@rolldown/binding-win32-ia32-msvc@1.0.0-beta.45': + resolution: {integrity: sha512-wODcGzlfxqS6D7BR0srkJk3drPwXYLu7jPHN27ce2c4PUnVVmJnp9mJzUQGT4LpmHmmVdMZ+P6hKvyTGBzc1CA==} engines: {node: ^20.19.0 || >=22.12.0} cpu: [ia32] os: [win32] @@ -7372,8 +7372,8 @@ packages: cpu: [x64] os: [win32] - '@rolldown/binding-win32-x64-msvc@1.0.0-beta.44': - resolution: {integrity: sha512-vsqhWAFJkkmgfBN/lkLCWTXF1PuPhMjfnAyru48KvF7mVh2+K7WkKYHezF3Fjz4X/mPScOcIv+g6cf6wnI6eWg==} + '@rolldown/binding-win32-x64-msvc@1.0.0-beta.45': + resolution: {integrity: sha512-wiU40G1nQo9rtfvF9jLbl79lUgjfaD/LTyUEw2Wg/gdF5OhjzpKMVugZQngO+RNdwYaNj+Fs+kWBWfp4VXPMHA==} engines: {node: ^20.19.0 || >=22.12.0} cpu: [x64] os: [win32] @@ -21360,13 +21360,13 @@ snapshots: '@rolldown/binding-darwin-arm64@1.0.0-beta.30': optional: true - '@rolldown/binding-darwin-arm64@1.0.0-beta.44': + '@rolldown/binding-darwin-arm64@1.0.0-beta.45': optional: true '@rolldown/binding-darwin-x64@1.0.0-beta.30': optional: true - '@rolldown/binding-darwin-x64@1.0.0-beta.44': + '@rolldown/binding-darwin-x64@1.0.0-beta.45': optional: true '@rolldown/binding-freebsd-x64@1.0.0-beta.30': @@ -21378,13 +21378,13 @@ snapshots: '@rolldown/binding-linux-arm64-gnu@1.0.0-beta.30': optional: true - '@rolldown/binding-linux-arm64-gnu@1.0.0-beta.44': + '@rolldown/binding-linux-arm64-gnu@1.0.0-beta.45': optional: true '@rolldown/binding-linux-arm64-musl@1.0.0-beta.30': optional: true - '@rolldown/binding-linux-arm64-musl@1.0.0-beta.44': + '@rolldown/binding-linux-arm64-musl@1.0.0-beta.45': optional: true '@rolldown/binding-linux-arm64-ohos@1.0.0-beta.30': @@ -21393,13 +21393,13 @@ snapshots: '@rolldown/binding-linux-x64-gnu@1.0.0-beta.30': optional: true - '@rolldown/binding-linux-x64-gnu@1.0.0-beta.44': + '@rolldown/binding-linux-x64-gnu@1.0.0-beta.45': optional: true '@rolldown/binding-linux-x64-musl@1.0.0-beta.30': optional: true - '@rolldown/binding-linux-x64-musl@1.0.0-beta.44': + '@rolldown/binding-linux-x64-musl@1.0.0-beta.45': optional: true '@rolldown/binding-wasm32-wasi@1.0.0-beta.30': @@ -21410,19 +21410,19 @@ snapshots: '@rolldown/binding-win32-arm64-msvc@1.0.0-beta.30': optional: true - '@rolldown/binding-win32-arm64-msvc@1.0.0-beta.44': + '@rolldown/binding-win32-arm64-msvc@1.0.0-beta.45': optional: true '@rolldown/binding-win32-ia32-msvc@1.0.0-beta.30': optional: true - '@rolldown/binding-win32-ia32-msvc@1.0.0-beta.44': + '@rolldown/binding-win32-ia32-msvc@1.0.0-beta.45': optional: true '@rolldown/binding-win32-x64-msvc@1.0.0-beta.30': optional: true - '@rolldown/binding-win32-x64-msvc@1.0.0-beta.44': + '@rolldown/binding-win32-x64-msvc@1.0.0-beta.45': optional: true '@rolldown/pluginutils@1.0.0-beta.30': {}