diff --git a/README-vector-functions.md b/README-vector-functions.md new file mode 100644 index 0000000000..484c6d8924 --- /dev/null +++ b/README-vector-functions.md @@ -0,0 +1,70 @@ +# Vector Concat and Slice Functions for libSQL + +This project adds two new vector functions to libSQL to maintain compatibility with Turso: + +1. `vector_concat(X, Y)` - Concatenates two vectors of the same type. +2. `vector_slice(X, start_idx, end_idx)` - Extracts a subvector from start_idx (inclusive) to end_idx (exclusive). + +## Implementation + +The implementation is based on the existing vector functions in libSQL. The new functions are added to the `vector.c` file in the `libsql-sqlite3/src` directory. + +### `vector_concat(X, Y)` + +This function concatenates two vectors of the same type. It performs the following steps: + +1. Parse the two input vectors +2. Check that both vectors are of the same type +3. Allocate a new vector with dimensions equal to the sum of the dimensions of the input vectors +4. Copy the data from both vectors into the new vector +5. Return the new vector + +### `vector_slice(X, start_idx, end_idx)` + +This function extracts a slice of a vector from start_idx (inclusive) to end_idx (exclusive). It performs the following steps: + +1. Parse the input vector +2. Validate the start and end indices: + - Both must be non-negative + - start_idx must not be greater than end_idx + - Both must be within the bounds of the vector +3. Allocate a new vector with dimensions equal to end_idx - start_idx +4. Copy the appropriate slice of data from the input vector to the new vector +5. Return the new vector + +Note: FLOAT1BIT vectors are not yet supported for the slice operation due to the complexity of bit-by-bit extraction. + +## Usage + +```sql +-- Create a test table with a vector column +CREATE TABLE test_vectors ( + id INTEGER PRIMARY KEY, + vec VECTOR +); + +-- Insert some test vectors +INSERT INTO test_vectors VALUES (1, vector32(1, 2, 3, 4, 5)); +INSERT INTO test_vectors VALUES (2, vector32(6, 7, 8, 9, 10)); + +-- Concatenate vectors +SELECT vector_extract(vector_concat(vec, vector32(11, 12, 13))) FROM test_vectors WHERE id = 1; +-- Returns: [1.0, 2.0, 3.0, 4.0, 5.0, 11.0, 12.0, 13.0] + +-- Slice a vector +SELECT vector_extract(vector_slice(vec, 1, 4)) FROM test_vectors WHERE id = 1; +-- Returns: [2.0, 3.0, 4.0] +``` + +## Testing + +A test file `test_vector_functions.sql` is provided to verify the implementation. + +## Building + +The implementation is integrated directly into the libSQL SQLite fork. To build it, follow the standard libSQL build process. + +## References + +- Turso Implementation: https://github.com/tursodatabase/turso/pull/2336 +- libSQL Issue: https://github.com/tursodatabase/libsql/issues/2136 diff --git a/libsql-sqlite3/src/vector.c b/libsql-sqlite3/src/vector.c index 51f8af5d05..7966750ef5 100644 --- a/libsql-sqlite3/src/vector.c +++ b/libsql-sqlite3/src/vector.c @@ -29,6 +29,9 @@ #include "sqliteInt.h" #include "vectorInt.h" +/* Include the implementation of vector_concat and vector_slice functions */ +#include "../../vector_func_impl.c" + #define MAX_FLOAT_CHAR_SZ 1024 /************************************************************************** @@ -1251,6 +1254,546 @@ static void libsqlVectorIdx(sqlite3_context *context, int argc, sqlite3_value ** sqlite3_result_value(context, argv[0]); } +/* +** Implementation of vector_concat(X, Y) function. +** Concatenates two vectors of same type. +*/ +static void vectorConcatFunc( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + char *pzErrMsg = NULL; + Vector *pVector1 = NULL, *pVector2 = NULL, *pTarget = NULL; + int type1, dims1, type2, dims2; + + if( argc != 2 ){ + sqlite3_result_error(context, "vector_concat requires exactly two arguments", -1); + goto out; + } + + /* Parse first vector */ + if( detectVectorParameters(argv[0], 0, &type1, &dims1, &pzErrMsg) != 0 ){ + sqlite3_result_error(context, pzErrMsg, -1); + sqlite3_free(pzErrMsg); + goto out; + } + pVector1 = vectorContextAlloc(context, type1, dims1); + if( pVector1 == NULL ){ + goto out; + } + if( vectorParseWithType(argv[0], pVector1, &pzErrMsg) < 0 ){ + sqlite3_result_error(context, pzErrMsg, -1); + sqlite3_free(pzErrMsg); + goto out; + } + + /* Parse second vector */ + if( detectVectorParameters(argv[1], 0, &type2, &dims2, &pzErrMsg) != 0 ){ + sqlite3_result_error(context, pzErrMsg, -1); + sqlite3_free(pzErrMsg); + goto out; + } + pVector2 = vectorContextAlloc(context, type2, dims2); + if( pVector2 == NULL ){ + goto out; + } + if( vectorParseWithType(argv[1], pVector2, &pzErrMsg) < 0 ){ + sqlite3_result_error(context, pzErrMsg, -1); + sqlite3_free(pzErrMsg); + goto out; + } + + /* Check if both vectors are of the same type */ + if( type1 != type2 ){ + sqlite3_result_error(context, "vector_concat: vectors must be of the same type", -1); + goto out; + } + + /* Allocate target vector */ + pTarget = vectorContextAlloc(context, type1, dims1 + dims2); + if( pTarget == NULL ){ + goto out; + } + + /* Copy data from both vectors into the target vector */ + switch( type1 ){ + case VECTOR_TYPE_FLOAT32: { + float *pDst = (float*)pTarget->data; + float *pSrc1 = (float*)pVector1->data; + float *pSrc2 = (float*)pVector2->data; + memcpy(pDst, pSrc1, dims1 * sizeof(float)); + memcpy(pDst + dims1, pSrc2, dims2 * sizeof(float)); + break; + } + case VECTOR_TYPE_FLOAT64: { + double *pDst = (double*)pTarget->data; + double *pSrc1 = (double*)pVector1->data; + double *pSrc2 = (double*)pVector2->data; + memcpy(pDst, pSrc1, dims1 * sizeof(double)); + memcpy(pDst + dims1, pSrc2, dims2 * sizeof(double)); + break; + } + case VECTOR_TYPE_FLOAT1BIT: { + u8 *pDst = (u8*)pTarget->data; + u8 *pSrc1 = (u8*)pVector1->data; + u8 *pSrc2 = (u8*)pVector2->data; + size_t size1 = (dims1 + 7) / 8; + size_t size2 = (dims2 + 7) / 8; + memcpy(pDst, pSrc1, size1); + memcpy(pDst + size1, pSrc2, size2); + break; + } + case VECTOR_TYPE_FLOAT8: { + u8 *pDst = (u8*)pTarget->data; + u8 *pSrc1 = (u8*)pVector1->data; + u8 *pSrc2 = (u8*)pVector2->data; + size_t size1 = dims1; + size_t size2 = dims2; + memcpy(pDst, pSrc1, size1); + memcpy(pDst + size1, pSrc2, size2); + + /* Copy parameters (alpha and shift) from the first vector */ + float *pParams1 = (float*)(pSrc1 + ALIGN(dims1, sizeof(float))); + float *pParams = (float*)(pDst + ALIGN(dims1 + dims2, sizeof(float))); + memcpy(pParams, pParams1, 2 * sizeof(float)); + break; + } + case VECTOR_TYPE_FLOAT16: { + u16 *pDst = (u16*)pTarget->data; + u16 *pSrc1 = (u16*)pVector1->data; + u16 *pSrc2 = (u16*)pVector2->data; + memcpy(pDst, pSrc1, dims1 * sizeof(u16)); + memcpy(pDst + dims1, pSrc2, dims2 * sizeof(u16)); + break; + } + case VECTOR_TYPE_FLOATB16: { + u16 *pDst = (u16*)pTarget->data; + u16 *pSrc1 = (u16*)pVector1->data; + u16 *pSrc2 = (u16*)pVector2->data; + memcpy(pDst, pSrc1, dims1 * sizeof(u16)); + memcpy(pDst + dims1, pSrc2, dims2 * sizeof(u16)); + break; + } + default: + sqlite3_result_error(context, "vector_concat: unsupported vector type", -1); + goto out; + } + + vectorMarshalToBlob(context, pTarget); + +out: + if( pTarget ){ + vectorFree(pTarget); + } + if( pVector2 ){ + vectorFree(pVector2); + } + if( pVector1 ){ + vectorFree(pVector1); + } +} + +/* +** Implementation of vector_slice(X, start_idx, end_idx) function. +** Extracts a subvector from start_idx (inclusive) to end_idx (exclusive). +*/ +static void vectorSliceFunc( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + char *pzErrMsg = NULL; + Vector *pVector = NULL, *pTarget = NULL; + int type, dims; + sqlite3_int64 start_idx, end_idx; + int new_dims; + + if( argc != 3 ){ + sqlite3_result_error(context, "vector_slice requires exactly three arguments", -1); + goto out; + } + + /* Parse the vector */ + if( detectVectorParameters(argv[0], 0, &type, &dims, &pzErrMsg) != 0 ){ + sqlite3_result_error(context, pzErrMsg, -1); + sqlite3_free(pzErrMsg); + goto out; + } + pVector = vectorContextAlloc(context, type, dims); + if( pVector == NULL ){ + goto out; + } + if( vectorParseWithType(argv[0], pVector, &pzErrMsg) < 0 ){ + sqlite3_result_error(context, pzErrMsg, -1); + sqlite3_free(pzErrMsg); + goto out; + } + + /* Get start and end indices */ + if( sqlite3_value_type(argv[1]) != SQLITE_INTEGER ){ + sqlite3_result_error(context, "vector_slice: start_idx must be an integer", -1); + goto out; + } + start_idx = sqlite3_value_int64(argv[1]); + + if( sqlite3_value_type(argv[2]) != SQLITE_INTEGER ){ + sqlite3_result_error(context, "vector_slice: end_idx must be an integer", -1); + goto out; + } + end_idx = sqlite3_value_int64(argv[2]); + + /* Validate indices */ + if( start_idx < 0 || end_idx < 0 ){ + sqlite3_result_error(context, "vector_slice: indices must be non-negative", -1); + goto out; + } + + if( start_idx > end_idx ){ + sqlite3_result_error(context, "vector_slice: start_idx must not be greater than end_idx", -1); + goto out; + } + + if( start_idx >= dims || end_idx > dims ){ + sqlite3_result_error(context, "vector_slice: indices out of bounds", -1); + goto out; + } + + new_dims = (int)(end_idx - start_idx); + pTarget = vectorContextAlloc(context, type, new_dims); + if( pTarget == NULL ){ + goto out; + } + + /* Copy the appropriate slice of data */ + switch( type ){ + case VECTOR_TYPE_FLOAT32: { + float *pDst = (float*)pTarget->data; + float *pSrc = (float*)pVector->data; + memcpy(pDst, pSrc + start_idx, new_dims * sizeof(float)); + break; + } + case VECTOR_TYPE_FLOAT64: { + double *pDst = (double*)pTarget->data; + double *pSrc = (double*)pVector->data; + memcpy(pDst, pSrc + start_idx, new_dims * sizeof(double)); + break; + } + case VECTOR_TYPE_FLOAT1BIT: { + /* For FLOAT1BIT, we need bit-by-bit extraction, which is more complex */ + sqlite3_result_error(context, "vector_slice: FLOAT1BIT vectors not yet supported", -1); + goto out; + } + case VECTOR_TYPE_FLOAT8: { + /* For FLOAT8, copy data and parameters */ + u8 *pDst = (u8*)pTarget->data; + u8 *pSrc = (u8*)pVector->data; + memcpy(pDst, pSrc + start_idx, new_dims); + + /* Copy parameters (alpha and shift) */ + float *pParams = (float*)(pSrc + ALIGN(dims, sizeof(float))); + float *pNewParams = (float*)(pDst + ALIGN(new_dims, sizeof(float))); + memcpy(pNewParams, pParams, 2 * sizeof(float)); + break; + } + case VECTOR_TYPE_FLOAT16: { + u16 *pDst = (u16*)pTarget->data; + u16 *pSrc = (u16*)pVector->data; + memcpy(pDst, pSrc + start_idx, new_dims * sizeof(u16)); + break; + } + case VECTOR_TYPE_FLOATB16: { + u16 *pDst = (u16*)pTarget->data; + u16 *pSrc = (u16*)pVector->data; + memcpy(pDst, pSrc + start_idx, new_dims * sizeof(u16)); + break; + } + default: + sqlite3_result_error(context, "vector_slice: unsupported vector type", -1); + goto out; + } + + vectorMarshalToBlob(context, pTarget); + +out: + if( pTarget ){ + vectorFree(pTarget); + } + if( pVector ){ + vectorFree(pVector); + } +} + +/* +** Implementation of vector_concat(X, Y) function. +** Concatenates two vectors of same type. +*/ +static void vectorConcatFunc( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + char *pzErrMsg = NULL; + Vector *pVector1 = NULL, *pVector2 = NULL, *pTarget = NULL; + int type1, dims1, type2, dims2; + + if( argc != 2 ){ + sqlite3_result_error(context, "vector_concat requires exactly two arguments", -1); + goto out; + } + + /* Parse first vector */ + if( detectVectorParameters(argv[0], 0, &type1, &dims1, &pzErrMsg) != 0 ){ + sqlite3_result_error(context, pzErrMsg, -1); + sqlite3_free(pzErrMsg); + goto out; + } + pVector1 = vectorContextAlloc(context, type1, dims1); + if( pVector1 == NULL ){ + goto out; + } + if( vectorParseWithType(argv[0], pVector1, &pzErrMsg) < 0 ){ + sqlite3_result_error(context, pzErrMsg, -1); + sqlite3_free(pzErrMsg); + goto out; + } + + /* Parse second vector */ + if( detectVectorParameters(argv[1], 0, &type2, &dims2, &pzErrMsg) != 0 ){ + sqlite3_result_error(context, pzErrMsg, -1); + sqlite3_free(pzErrMsg); + goto out; + } + pVector2 = vectorContextAlloc(context, type2, dims2); + if( pVector2 == NULL ){ + goto out; + } + if( vectorParseWithType(argv[1], pVector2, &pzErrMsg) < 0 ){ + sqlite3_result_error(context, pzErrMsg, -1); + sqlite3_free(pzErrMsg); + goto out; + } + + /* Check if both vectors are of the same type */ + if( type1 != type2 ){ + sqlite3_result_error(context, "vector_concat: vectors must be of the same type", -1); + goto out; + } + + /* Allocate target vector */ + pTarget = vectorContextAlloc(context, type1, dims1 + dims2); + if( pTarget == NULL ){ + goto out; + } + + /* Copy data from both vectors into the target vector */ + switch( type1 ){ + case VECTOR_TYPE_FLOAT32: { + float *pDst = (float*)pTarget->data; + float *pSrc1 = (float*)pVector1->data; + float *pSrc2 = (float*)pVector2->data; + memcpy(pDst, pSrc1, dims1 * sizeof(float)); + memcpy(pDst + dims1, pSrc2, dims2 * sizeof(float)); + break; + } + case VECTOR_TYPE_FLOAT64: { + double *pDst = (double*)pTarget->data; + double *pSrc1 = (double*)pVector1->data; + double *pSrc2 = (double*)pVector2->data; + memcpy(pDst, pSrc1, dims1 * sizeof(double)); + memcpy(pDst + dims1, pSrc2, dims2 * sizeof(double)); + break; + } + case VECTOR_TYPE_FLOAT1BIT: { + u8 *pDst = (u8*)pTarget->data; + u8 *pSrc1 = (u8*)pVector1->data; + u8 *pSrc2 = (u8*)pVector2->data; + size_t size1 = (dims1 + 7) / 8; + size_t size2 = (dims2 + 7) / 8; + memcpy(pDst, pSrc1, size1); + memcpy(pDst + size1, pSrc2, size2); + break; + } + case VECTOR_TYPE_FLOAT8: { + u8 *pDst = (u8*)pTarget->data; + u8 *pSrc1 = (u8*)pVector1->data; + u8 *pSrc2 = (u8*)pVector2->data; + size_t size1 = dims1; + size_t size2 = dims2; + memcpy(pDst, pSrc1, size1); + memcpy(pDst + size1, pSrc2, size2); + + /* Copy parameters (alpha and shift) from the first vector */ + float *pParams1 = (float*)(pSrc1 + ALIGN(dims1, sizeof(float))); + float *pParams = (float*)(pDst + ALIGN(dims1 + dims2, sizeof(float))); + memcpy(pParams, pParams1, 2 * sizeof(float)); + break; + } + case VECTOR_TYPE_FLOAT16: { + u16 *pDst = (u16*)pTarget->data; + u16 *pSrc1 = (u16*)pVector1->data; + u16 *pSrc2 = (u16*)pVector2->data; + memcpy(pDst, pSrc1, dims1 * sizeof(u16)); + memcpy(pDst + dims1, pSrc2, dims2 * sizeof(u16)); + break; + } + case VECTOR_TYPE_FLOATB16: { + u16 *pDst = (u16*)pTarget->data; + u16 *pSrc1 = (u16*)pVector1->data; + u16 *pSrc2 = (u16*)pVector2->data; + memcpy(pDst, pSrc1, dims1 * sizeof(u16)); + memcpy(pDst + dims1, pSrc2, dims2 * sizeof(u16)); + break; + } + default: + sqlite3_result_error(context, "vector_concat: unsupported vector type", -1); + goto out; + } + + vectorMarshalToBlob(context, pTarget); + +out: + if( pTarget ){ + vectorFree(pTarget); + } + if( pVector2 ){ + vectorFree(pVector2); + } + if( pVector1 ){ + vectorFree(pVector1); + } +} + +/* +** Implementation of vector_slice(X, start_idx, end_idx) function. +** Extracts a subvector from start_idx (inclusive) to end_idx (exclusive). +*/ +static void vectorSliceFunc( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + char *pzErrMsg = NULL; + Vector *pVector = NULL, *pTarget = NULL; + int type, dims; + sqlite3_int64 start_idx, end_idx; + int new_dims; + + if( argc != 3 ){ + sqlite3_result_error(context, "vector_slice requires exactly three arguments", -1); + goto out; + } + + /* Parse the vector */ + if( detectVectorParameters(argv[0], 0, &type, &dims, &pzErrMsg) != 0 ){ + sqlite3_result_error(context, pzErrMsg, -1); + sqlite3_free(pzErrMsg); + goto out; + } + pVector = vectorContextAlloc(context, type, dims); + if( pVector == NULL ){ + goto out; + } + if( vectorParseWithType(argv[0], pVector, &pzErrMsg) < 0 ){ + sqlite3_result_error(context, pzErrMsg, -1); + sqlite3_free(pzErrMsg); + goto out; + } + + /* Get start and end indices */ + if( sqlite3_value_type(argv[1]) != SQLITE_INTEGER ){ + sqlite3_result_error(context, "vector_slice: start_idx must be an integer", -1); + goto out; + } + start_idx = sqlite3_value_int64(argv[1]); + + if( sqlite3_value_type(argv[2]) != SQLITE_INTEGER ){ + sqlite3_result_error(context, "vector_slice: end_idx must be an integer", -1); + goto out; + } + end_idx = sqlite3_value_int64(argv[2]); + + /* Validate indices */ + if( start_idx < 0 || end_idx < 0 ){ + sqlite3_result_error(context, "vector_slice: indices must be non-negative", -1); + goto out; + } + + if( start_idx > end_idx ){ + sqlite3_result_error(context, "vector_slice: start_idx must not be greater than end_idx", -1); + goto out; + } + + if( start_idx >= dims || end_idx > dims ){ + sqlite3_result_error(context, "vector_slice: indices out of bounds", -1); + goto out; + } + + new_dims = (int)(end_idx - start_idx); + pTarget = vectorContextAlloc(context, type, new_dims); + if( pTarget == NULL ){ + goto out; + } + + /* Copy the appropriate slice of data */ + switch( type ){ + case VECTOR_TYPE_FLOAT32: { + float *pDst = (float*)pTarget->data; + float *pSrc = (float*)pVector->data; + memcpy(pDst, pSrc + start_idx, new_dims * sizeof(float)); + break; + } + case VECTOR_TYPE_FLOAT64: { + double *pDst = (double*)pTarget->data; + double *pSrc = (double*)pVector->data; + memcpy(pDst, pSrc + start_idx, new_dims * sizeof(double)); + break; + } + case VECTOR_TYPE_FLOAT1BIT: { + /* For FLOAT1BIT, we need bit-by-bit extraction, which is more complex */ + sqlite3_result_error(context, "vector_slice: FLOAT1BIT vectors not yet supported", -1); + goto out; + } + case VECTOR_TYPE_FLOAT8: { + /* For FLOAT8, copy data and parameters */ + u8 *pDst = (u8*)pTarget->data; + u8 *pSrc = (u8*)pVector->data; + memcpy(pDst, pSrc + start_idx, new_dims); + + /* Copy parameters (alpha and shift) */ + float *pParams = (float*)(pSrc + ALIGN(dims, sizeof(float))); + float *pNewParams = (float*)(pDst + ALIGN(new_dims, sizeof(float))); + memcpy(pNewParams, pParams, 2 * sizeof(float)); + break; + } + case VECTOR_TYPE_FLOAT16: { + u16 *pDst = (u16*)pTarget->data; + u16 *pSrc = (u16*)pVector->data; + memcpy(pDst, pSrc + start_idx, new_dims * sizeof(u16)); + break; + } + case VECTOR_TYPE_FLOATB16: { + u16 *pDst = (u16*)pTarget->data; + u16 *pSrc = (u16*)pVector->data; + memcpy(pDst, pSrc + start_idx, new_dims * sizeof(u16)); + break; + } + default: + sqlite3_result_error(context, "vector_slice: unsupported vector type", -1); + goto out; + } + + vectorMarshalToBlob(context, pTarget); + +out: + if( pTarget ){ + vectorFree(pTarget); + } + if( pVector ){ + vectorFree(pVector); + } +} + /* ** Register vector functions. */ @@ -1266,6 +1809,8 @@ void sqlite3RegisterVectorFunctions(void){ FUNCTION(vector_extract, 1, 0, 0, vectorExtractFunc), FUNCTION(vector_distance_cos, 2, 0, 0, vectorDistanceCosFunc), FUNCTION(vector_distance_l2, 2, 0, 0, vectorDistanceL2Func), + FUNCTION(vector_concat, 2, 0, 0, vectorConcatFunc), + FUNCTION(vector_slice, 3, 0, 0, vectorSliceFunc), FUNCTION(libsql_vector_idx, -1, 0, 0, libsqlVectorIdx), }; diff --git a/test_vector_functions.sql b/test_vector_functions.sql new file mode 100644 index 0000000000..99aa44d6fa --- /dev/null +++ b/test_vector_functions.sql @@ -0,0 +1,54 @@ +.open :memory: + +-- Create a test table with a vector column +CREATE TABLE test_vectors ( + id INTEGER PRIMARY KEY, + vec VECTOR +); + +-- Insert some test vectors +INSERT INTO test_vectors VALUES (1, vector32(1, 2, 3, 4, 5)); +INSERT INTO test_vectors VALUES (2, vector32(6, 7, 8, 9, 10)); +INSERT INTO test_vectors VALUES (3, vector64(1.1, 2.2, 3.3, 4.4, 5.5)); +INSERT INTO test_vectors VALUES (4, vector64(6.6, 7.7, 8.8, 9.9, 10.10)); + +-- Test vector_concat +SELECT id, vector_extract(vector_concat(vec, vector32(11, 12, 13))) AS concat_result +FROM test_vectors +WHERE id = 1; + +SELECT id, vector_extract(vector_concat(vec, vector32(11, 12, 13))) AS concat_result +FROM test_vectors +WHERE id = 2; + +SELECT id, vector_extract(vector_concat(vec, vector64(11.11, 12.12, 13.13))) AS concat_result +FROM test_vectors +WHERE id = 3; + +-- Test that concat requires same vector types +SELECT vector_extract(vector_concat(vector32(1, 2, 3), vector64(4, 5, 6))) AS should_fail; + +-- Test vector_slice +SELECT id, vector_extract(vector_slice(vec, 1, 4)) AS slice_result +FROM test_vectors +WHERE id = 1; + +SELECT id, vector_extract(vector_slice(vec, 0, 2)) AS slice_result +FROM test_vectors +WHERE id = 3; + +-- Test vector_slice edge cases +-- Out of bounds +SELECT vector_extract(vector_slice(vector32(1, 2, 3, 4, 5), 5, 10)) AS should_fail; + +-- Negative indices +SELECT vector_extract(vector_slice(vector32(1, 2, 3, 4, 5), -1, 3)) AS should_fail; + +-- End smaller than start +SELECT vector_extract(vector_slice(vector32(1, 2, 3, 4, 5), 3, 1)) AS should_fail; + +-- Zero length slice +SELECT vector_extract(vector_slice(vector32(1, 2, 3, 4, 5), 2, 2)) AS zero_length_slice; + +-- Full slice +SELECT vector_extract(vector_slice(vector32(1, 2, 3, 4, 5), 0, 5)) AS full_slice; diff --git a/vector_func_impl.c b/vector_func_impl.c new file mode 100644 index 0000000000..c458604860 --- /dev/null +++ b/vector_func_impl.c @@ -0,0 +1,279 @@ +/* +** This file contains implementations of the vector_concat and vector_slice functions. +** It is included by vector.c and not compiled separately. +*/ + +#ifndef VECTOR_FUNC_IMPL_C +#define VECTOR_FUNC_IMPL_C + +/* +** Implementation of vector_concat(X, Y) function. +** Concatenates two vectors of same type. +*/ +static void vectorConcatFunc( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + char *pzErrMsg = NULL; + Vector *pVector1 = NULL, *pVector2 = NULL, *pTarget = NULL; + int type1, dims1, type2, dims2; + + if( argc != 2 ){ + sqlite3_result_error(context, "vector_concat requires exactly two arguments", -1); + goto out; + } + + /* Parse first vector */ + if( detectVectorParameters(argv[0], 0, &type1, &dims1, &pzErrMsg) != 0 ){ + sqlite3_result_error(context, pzErrMsg, -1); + sqlite3_free(pzErrMsg); + goto out; + } + pVector1 = vectorContextAlloc(context, type1, dims1); + if( pVector1 == NULL ){ + goto out; + } + if( vectorParseWithType(argv[0], pVector1, &pzErrMsg) < 0 ){ + sqlite3_result_error(context, pzErrMsg, -1); + sqlite3_free(pzErrMsg); + goto out; + } + + /* Parse second vector */ + if( detectVectorParameters(argv[1], 0, &type2, &dims2, &pzErrMsg) != 0 ){ + sqlite3_result_error(context, pzErrMsg, -1); + sqlite3_free(pzErrMsg); + goto out; + } + pVector2 = vectorContextAlloc(context, type2, dims2); + if( pVector2 == NULL ){ + goto out; + } + if( vectorParseWithType(argv[1], pVector2, &pzErrMsg) < 0 ){ + sqlite3_result_error(context, pzErrMsg, -1); + sqlite3_free(pzErrMsg); + goto out; + } + + /* Check if both vectors are of the same type */ + if( type1 != type2 ){ + sqlite3_result_error(context, "vector_concat: vectors must be of the same type", -1); + goto out; + } + + /* Allocate target vector */ + pTarget = vectorContextAlloc(context, type1, dims1 + dims2); + if( pTarget == NULL ){ + goto out; + } + + /* Copy data from both vectors into the target vector */ + switch( type1 ){ + case VECTOR_TYPE_FLOAT32: { + float *pDst = (float*)pTarget->data; + float *pSrc1 = (float*)pVector1->data; + float *pSrc2 = (float*)pVector2->data; + memcpy(pDst, pSrc1, dims1 * sizeof(float)); + memcpy(pDst + dims1, pSrc2, dims2 * sizeof(float)); + break; + } + case VECTOR_TYPE_FLOAT64: { + double *pDst = (double*)pTarget->data; + double *pSrc1 = (double*)pVector1->data; + double *pSrc2 = (double*)pVector2->data; + memcpy(pDst, pSrc1, dims1 * sizeof(double)); + memcpy(pDst + dims1, pSrc2, dims2 * sizeof(double)); + break; + } + case VECTOR_TYPE_FLOAT1BIT: { + u8 *pDst = (u8*)pTarget->data; + u8 *pSrc1 = (u8*)pVector1->data; + u8 *pSrc2 = (u8*)pVector2->data; + size_t size1 = (dims1 + 7) / 8; + size_t size2 = (dims2 + 7) / 8; + memcpy(pDst, pSrc1, size1); + memcpy(pDst + size1, pSrc2, size2); + break; + } + case VECTOR_TYPE_FLOAT8: { + u8 *pDst = (u8*)pTarget->data; + u8 *pSrc1 = (u8*)pVector1->data; + u8 *pSrc2 = (u8*)pVector2->data; + size_t size1 = dims1; + size_t size2 = dims2; + memcpy(pDst, pSrc1, size1); + memcpy(pDst + size1, pSrc2, size2); + + /* Copy parameters (alpha and shift) from the first vector */ + float *pParams1 = (float*)(pSrc1 + ALIGN(dims1, sizeof(float))); + float *pParams = (float*)(pDst + ALIGN(dims1 + dims2, sizeof(float))); + memcpy(pParams, pParams1, 2 * sizeof(float)); + break; + } + case VECTOR_TYPE_FLOAT16: { + u16 *pDst = (u16*)pTarget->data; + u16 *pSrc1 = (u16*)pVector1->data; + u16 *pSrc2 = (u16*)pVector2->data; + memcpy(pDst, pSrc1, dims1 * sizeof(u16)); + memcpy(pDst + dims1, pSrc2, dims2 * sizeof(u16)); + break; + } + case VECTOR_TYPE_FLOATB16: { + u16 *pDst = (u16*)pTarget->data; + u16 *pSrc1 = (u16*)pVector1->data; + u16 *pSrc2 = (u16*)pVector2->data; + memcpy(pDst, pSrc1, dims1 * sizeof(u16)); + memcpy(pDst + dims1, pSrc2, dims2 * sizeof(u16)); + break; + } + default: + sqlite3_result_error(context, "vector_concat: unsupported vector type", -1); + goto out; + } + + vectorSerializeWithMeta(context, pTarget); + +out: + if( pTarget ){ + vectorFree(pTarget); + } + if( pVector2 ){ + vectorFree(pVector2); + } + if( pVector1 ){ + vectorFree(pVector1); + } +} + +/* +** Implementation of vector_slice(X, start_idx, end_idx) function. +** Extracts a subvector from start_idx (inclusive) to end_idx (exclusive). +*/ +static void vectorSliceFunc( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + char *pzErrMsg = NULL; + Vector *pVector = NULL, *pTarget = NULL; + int type, dims; + sqlite3_int64 start_idx, end_idx; + int new_dims; + + if( argc != 3 ){ + sqlite3_result_error(context, "vector_slice requires exactly three arguments", -1); + goto out; + } + + /* Parse the vector */ + if( detectVectorParameters(argv[0], 0, &type, &dims, &pzErrMsg) != 0 ){ + sqlite3_result_error(context, pzErrMsg, -1); + sqlite3_free(pzErrMsg); + goto out; + } + pVector = vectorContextAlloc(context, type, dims); + if( pVector == NULL ){ + goto out; + } + if( vectorParseWithType(argv[0], pVector, &pzErrMsg) < 0 ){ + sqlite3_result_error(context, pzErrMsg, -1); + sqlite3_free(pzErrMsg); + goto out; + } + + /* Get start and end indices */ + if( sqlite3_value_type(argv[1]) != SQLITE_INTEGER ){ + sqlite3_result_error(context, "vector_slice: start_idx must be an integer", -1); + goto out; + } + start_idx = sqlite3_value_int64(argv[1]); + + if( sqlite3_value_type(argv[2]) != SQLITE_INTEGER ){ + sqlite3_result_error(context, "vector_slice: end_idx must be an integer", -1); + goto out; + } + end_idx = sqlite3_value_int64(argv[2]); + + /* Validate indices */ + if( start_idx < 0 || end_idx < 0 ){ + sqlite3_result_error(context, "vector_slice: indices must be non-negative", -1); + goto out; + } + + if( start_idx > end_idx ){ + sqlite3_result_error(context, "vector_slice: start_idx must not be greater than end_idx", -1); + goto out; + } + + if( start_idx >= dims || end_idx > dims ){ + sqlite3_result_error(context, "vector_slice: indices out of bounds", -1); + goto out; + } + + new_dims = (int)(end_idx - start_idx); + pTarget = vectorContextAlloc(context, type, new_dims); + if( pTarget == NULL ){ + goto out; + } + + /* Copy the appropriate slice of data */ + switch( type ){ + case VECTOR_TYPE_FLOAT32: { + float *pDst = (float*)pTarget->data; + float *pSrc = (float*)pVector->data; + memcpy(pDst, pSrc + start_idx, new_dims * sizeof(float)); + break; + } + case VECTOR_TYPE_FLOAT64: { + double *pDst = (double*)pTarget->data; + double *pSrc = (double*)pVector->data; + memcpy(pDst, pSrc + start_idx, new_dims * sizeof(double)); + break; + } + case VECTOR_TYPE_FLOAT1BIT: { + /* For FLOAT1BIT, we need bit-by-bit extraction, which is more complex */ + sqlite3_result_error(context, "vector_slice: FLOAT1BIT vectors not yet supported", -1); + goto out; + } + case VECTOR_TYPE_FLOAT8: { + /* For FLOAT8, copy data and parameters */ + u8 *pDst = (u8*)pTarget->data; + u8 *pSrc = (u8*)pVector->data; + memcpy(pDst, pSrc + start_idx, new_dims); + + /* Copy parameters (alpha and shift) */ + float *pParams = (float*)(pSrc + ALIGN(dims, sizeof(float))); + float *pNewParams = (float*)(pDst + ALIGN(new_dims, sizeof(float))); + memcpy(pNewParams, pParams, 2 * sizeof(float)); + break; + } + case VECTOR_TYPE_FLOAT16: { + u16 *pDst = (u16*)pTarget->data; + u16 *pSrc = (u16*)pVector->data; + memcpy(pDst, pSrc + start_idx, new_dims * sizeof(u16)); + break; + } + case VECTOR_TYPE_FLOATB16: { + u16 *pDst = (u16*)pTarget->data; + u16 *pSrc = (u16*)pVector->data; + memcpy(pDst, pSrc + start_idx, new_dims * sizeof(u16)); + break; + } + default: + sqlite3_result_error(context, "vector_slice: unsupported vector type", -1); + goto out; + } + + vectorSerializeWithMeta(context, pTarget); + +out: + if( pTarget ){ + vectorFree(pTarget); + } + if( pVector ){ + vectorFree(pVector); + } +} + +#endif /* VECTOR_FUNC_IMPL_C */