diff --git a/melos.yaml b/melos.yaml index 96d44214..ef79055d 100644 --- a/melos.yaml +++ b/melos.yaml @@ -30,6 +30,7 @@ command: cross_file: ^0.3.4+2 crypto: ^3.0.3 csv: ^6.0.0 + drift: ^2.21.0 equatable: ^2.0.5 fetch_client: ^1.1.2 firebase_app_check: ^0.3.0 @@ -53,16 +54,20 @@ command: math_expressions: ^2.6.0 meta: ^1.11.0 objectbox: ^4.0.1 + path: ^1.9.0 pinecone: ^0.7.2 rxdart: ">=0.27.7 <0.29.0" shared_preferences: ^2.3.0 shelf: ^1.4.2 shelf_router: ^1.1.4 + sqlite3: ^2.4.6 supabase: ^2.2.7 uuid: ^4.5.1 web_socket_channel: ^3.0.1 dev_dependencies: + archive: ^3.6.1 build_runner: ^2.4.11 + drift_dev: ^2.21.0 freezed: ^2.5.7 json_serializable: ^6.8.0 objectbox_generator: ^4.0.1 diff --git a/packages/langchain_community/app.v5.db b/packages/langchain_community/app.v5.db new file mode 100644 index 00000000..660fc419 Binary files /dev/null and b/packages/langchain_community/app.v5.db differ diff --git a/packages/langchain_community/build.yaml b/packages/langchain_community/build.yaml new file mode 100644 index 00000000..472f1bf4 --- /dev/null +++ b/packages/langchain_community/build.yaml @@ -0,0 +1,15 @@ +targets: + $default: + builders: + objectbox_generator:generator: + enabled: true + generate_for: + - lib/src/vector_stores/objectbox/**.dart + source_gen:combining_builder: + options: + ignore_for_file: + - type=lint + - subtype=lint + generate_for: + exclude: + - lib/src/vector_stores/objectbox/**.dart \ No newline at end of file diff --git a/packages/langchain_community/extensions/mac_vec0.dll b/packages/langchain_community/extensions/mac_vec0.dll new file mode 100644 index 00000000..ca230b5f Binary files /dev/null and b/packages/langchain_community/extensions/mac_vec0.dll differ diff --git a/packages/langchain_community/extensions/vec0.dylib b/packages/langchain_community/extensions/vec0.dylib new file mode 100644 index 00000000..604e0fc4 Binary files /dev/null and b/packages/langchain_community/extensions/vec0.dylib differ diff --git a/packages/langchain_community/extensions/win_vec0.dll b/packages/langchain_community/extensions/win_vec0.dll new file mode 100644 index 00000000..a111f4d5 Binary files /dev/null and b/packages/langchain_community/extensions/win_vec0.dll differ diff --git a/packages/langchain_community/lib/dart_utils/debugging.dart b/packages/langchain_community/lib/dart_utils/debugging.dart new file mode 100644 index 00000000..e71dadbc --- /dev/null +++ b/packages/langchain_community/lib/dart_utils/debugging.dart @@ -0,0 +1,24 @@ +/// +/// Prints a debug message if the application is not in profile or product mode. +/// +/// The message is printed only if the optional condition `cond` is either null +/// or evaluates to true for the given message. +/// +/// \param message The message to print. +/// \param cond An optional condition that must be true for the message to be printed. +void kDebugPrint(dynamic message, {bool Function(String str)? cond}) { + if (!const bool.fromEnvironment('dart.vm.profile') && + !const bool.fromEnvironment('dart.vm.product') && + (cond == null || cond(message))) { + // ignore: avoid_print + print(message); + } +} + +/// Returns true if the application is in debug mode. +/// +/// The application is considered to be in debug mode if it is not in profile +/// or product mode. +bool get kDebugDartMode => + !const bool.fromEnvironment('dart.vm.profile') && + !const bool.fromEnvironment('dart.vm.product'); diff --git a/packages/langchain_community/lib/src/vector_stores/sqlite_vec/sqlite_vec.dart b/packages/langchain_community/lib/src/vector_stores/sqlite_vec/sqlite_vec.dart new file mode 100644 index 00000000..1a54eba7 --- /dev/null +++ b/packages/langchain_community/lib/src/vector_stores/sqlite_vec/sqlite_vec.dart @@ -0,0 +1,133 @@ +import 'dart:convert'; + +import 'package:langchain_core/documents.dart'; +import 'package:langchain_core/embeddings.dart'; +import 'package:langchain_core/vector_stores.dart'; + +import 'src/database.dart'; + +/// SQLite with VEC extension as a vector database. +/// +/// To use, you should have the `sqlite-vec` package installed. +/// +/// Example: +/// ```dart +/// import 'package:langchain_community/vector_stores/sqlite_vec.dart'; +/// import 'package:langchain_community/embeddings/openai.dart'; +/// +/// // ... (rest of the example code) +/// ``` +class SQLiteVEC extends VectorStore { + /// Drift sqlite instance + late final Database db; + + /// Table name + final String table; + + /// Database file path or `:memory:` for in-memory database + final String dbFile; + + SQLiteVEC._internal({ + required super.embeddings, + required this.table, + required this.dbFile, + required this.db, + }); + + /// Create a new SQLiteVEC instance. + static Future create({ + required Embeddings embeddings, + required String table, + required String dbFile, + }) async { + final dummyEmbedding = await embeddings.embedQuery('This is a dummy text'); + final embeddingDimension = dummyEmbedding.length; + + final db = Database(embeddings, embeddingDimension, dbFile: dbFile); + + return SQLiteVEC._internal( + embeddings: embeddings, + table: table, + dbFile: dbFile, + db: db, + ); + } + + /// Return VectorStore initialized from texts and embeddings. + static Future fromTexts( + List texts, + Embeddings embeddings, { + List>? metadatas, + String table = 'langchain', + String dbFile = 'vec', + }) async { + final vec = await SQLiteVEC.create( + embeddings: embeddings, + table: table, + dbFile: dbFile, + ); + await vec.addTexts(texts: texts, metadatas: metadatas); + return vec; + } + + /// Adds texts to the vector store. + Future> addTexts({ + required List texts, + List>? metadatas, + }) async { + final ids = []; + for (int i = 0; i < texts.length; i++) { + final metadata = (metadatas != null && i < metadatas.length) + ? metadatas[i] + : {}; + // documents.add(Document(pageContent: texts[i], metadata: metadata)); + + final fileId = await db // + .insertFile('in-memory', texts[i], jsonEncode(metadata)) + .then((e) => e); + final chunks = chunkText(texts[i]); + for (final chunk in chunks) { + final chunkId = await db.addChunk(chunk.$1); + await db.insertFileEmbedding(fileId, chunkId, chunk.$2, chunk.$3); + ids.add(chunkId); + } + } + return ids; + } + + @override + Future> addDocuments({ + required List documents, + }) async { + final vectors = await embeddings.embedDocuments(documents); + return addVectors( + vectors: vectors, + documents: documents, + ); + } + + @override + Future> addVectors({ + required List> vectors, + required List documents, + }) async { + final ids = await db.addVectors(vectors: vectors, documents: documents); + return ids.map((id) => id.toString()).toList(); + } + + @override + Future delete({required List ids}) { + return db.deleteChunks(ids.map(int.parse).toList()); + } + + @override + Future> similaritySearchByVectorWithScores({ + required List embedding, + VectorStoreSimilaritySearch config = const VectorStoreSimilaritySearch(), + }) async { + return db.similaritySearchByVectorWithScores( + embedding: embedding, + config: config, + ); + } +} diff --git a/packages/langchain_community/lib/src/vector_stores/sqlite_vec/src/connection/connection.dart b/packages/langchain_community/lib/src/vector_stores/sqlite_vec/src/connection/connection.dart new file mode 100644 index 00000000..827bd5cb --- /dev/null +++ b/packages/langchain_community/lib/src/vector_stores/sqlite_vec/src/connection/connection.dart @@ -0,0 +1,5 @@ +// We use a conditional export to expose the right connection factory depending +// on the platform. +export 'unsupported.dart' + if (dart.library.js) 'web.dart' + if (dart.library.ffi) 'native.dart'; diff --git a/packages/langchain_community/lib/src/vector_stores/sqlite_vec/src/connection/native.dart b/packages/langchain_community/lib/src/vector_stores/sqlite_vec/src/connection/native.dart new file mode 100644 index 00000000..77873194 --- /dev/null +++ b/packages/langchain_community/lib/src/vector_stores/sqlite_vec/src/connection/native.dart @@ -0,0 +1,59 @@ +import 'dart:ffi'; +import 'dart:io'; + +import 'package:drift/drift.dart'; +import 'package:drift/native.dart'; +import 'package:path/path.dart' as p; +import 'package:sqlite3/sqlite3.dart'; + +/// Obtains a database connection for running drift in a pure Dart environment. +Future databaseFile(String name) async { + // Replace with a suitable path for non-Flutter environments + final appDir = Directory.current.path; // Use the current directory + final dbPath = p.join(appDir, '$name.db'); + return File(dbPath); +} + +/// Obtains a database connection for running drift in a pure Dart environment. +QueryExecutor connect(String name) { + // For Android, Linux, or other Unix-based systems + if (Platform.isAndroid || Platform.isLinux) { + final cachebase = + Directory.systemTemp.path; // Use system temporary directory + sqlite3.tempDirectory = cachebase; // Set SQLite temporary directory + } + + sqlite3.ensureExtensionLoaded( + SqliteExtension.inLibrary( + _loadLibrary(Platform.isWindows ? 'win_vec0' : 'mac_vec0'), + 'sqlite3_vec_init', + ), + ); + + if (name == ':memory:') { + return NativeDatabase.memory(); + } + return DatabaseConnection.delayed( + Future(() async { + return NativeDatabase.createBackgroundConnection( + await databaseFile(name), + ); + }), + ); +} + +DynamicLibrary _loadLibrary(String name) { + // Dynamically load the library based on the operating system + if (Platform.isIOS || Platform.isMacOS) { + return DynamicLibrary.open('$name.dylib'); + } + if (Platform.isAndroid || Platform.isLinux) { + return DynamicLibrary.open('$name.so'); + } + if (Platform.isWindows) { + return DynamicLibrary.open( + p.join(Directory.current.path, 'extensions', '$name.dll'), + ); + } + throw UnsupportedError('Unknown platform: ${Platform.operatingSystem}'); +} diff --git a/packages/langchain_community/lib/src/vector_stores/sqlite_vec/src/connection/unsupported.dart b/packages/langchain_community/lib/src/vector_stores/sqlite_vec/src/connection/unsupported.dart new file mode 100644 index 00000000..4b6800c8 --- /dev/null +++ b/packages/langchain_community/lib/src/vector_stores/sqlite_vec/src/connection/unsupported.dart @@ -0,0 +1,18 @@ +import 'package:drift/drift.dart'; + +Never _unsupported() { + throw UnsupportedError( + 'No suitable database implementation was found on this platform.', + ); +} + +/// Depending on the platform the app is compiled to, the following stubs will +/// be replaced with the methods in native.dart or web.dart +DatabaseConnection connect(String name) { + _unsupported(); +} + +/// Depending on the platform the app is compiled to, the following stubs will +Future validateDatabaseSchema(GeneratedDatabase database) async { + _unsupported(); +} diff --git a/packages/langchain_community/lib/src/vector_stores/sqlite_vec/src/connection/web.dart b/packages/langchain_community/lib/src/vector_stores/sqlite_vec/src/connection/web.dart new file mode 100644 index 00000000..6fe7f26a --- /dev/null +++ b/packages/langchain_community/lib/src/vector_stores/sqlite_vec/src/connection/web.dart @@ -0,0 +1,33 @@ +import 'dart:async'; + +import 'package:drift/drift.dart'; +import 'package:drift/wasm.dart'; +import '../../../../../dart_utils/debugging.dart'; + +/// Obtains a database connection for running drift on the web. +DatabaseConnection connect(String name) { + return DatabaseConnection.delayed( + Future(() async { + final db = await WasmDatabase.open( + databaseName: name, + sqlite3Uri: Uri.parse('sqlite3_vec.wasm'), + driftWorkerUri: Uri.parse('drift_worker.js'), + ); + + if (db.missingFeatures.isNotEmpty) { + kDebugPrint( + 'Using ${db.chosenImplementation} due to unsupported ' + 'browser features: ${db.missingFeatures}', + ); + } + + return db.resolvedExecutor; + }), + ); +} + +/// Unfortunately, validating database schemas only works for native platforms +/// right now. +/// As we also have migration tests (see the `Testing migrations` section in +/// the readme of this example), this is not a huge issue. +Future validateDatabaseSchema(GeneratedDatabase database) async {} diff --git a/packages/langchain_community/lib/src/vector_stores/sqlite_vec/src/database.dart b/packages/langchain_community/lib/src/vector_stores/sqlite_vec/src/database.dart new file mode 100644 index 00000000..7cc4c726 --- /dev/null +++ b/packages/langchain_community/lib/src/vector_stores/sqlite_vec/src/database.dart @@ -0,0 +1,183 @@ +import 'dart:async'; +import 'dart:convert'; +import 'dart:typed_data'; + +import 'package:drift/drift.dart'; +import 'package:langchain_core/documents.dart'; +import 'package:langchain_core/embeddings.dart'; +import 'package:langchain_core/vector_stores.dart'; +import 'connection/connection.dart' as impl; + +part 'database.g.dart'; + +@DriftDatabase(include: {'sql.drift'}) + +/// Database class for storing embeddings in SQLite. +class Database extends _$Database { + /// Create a new database instance. + Database(this.textEmbedder, this.embeddingDimension, {this.dbFile = 'app.v5'}) + : super(impl.connect(dbFile)); + // Database(this.textEmbedder, this.embeddingDimension) + // : super(impl.connect('app.v5')); + + /// Create a new database instance for testing. + final Embeddings textEmbedder; + + /// The dimension of the embeddings. + final int embeddingDimension; + + /// The database file path or `:memory:` for in-memory database. + final String dbFile; + + @override + int get schemaVersion => 1; + + @override + MigrationStrategy get migration => MigrationStrategy( + beforeOpen: (details) async { + if (details.wasCreated) return; + + /// Get embedding dimension from chunks + final schemaQueryRow = await customSelect( + "SELECT sql FROM sqlite_master WHERE name = 'chunks';", + ).getSingleOrNull(); + final createStmnt = schemaQueryRow?.read('sql'); + + final size = createStmnt?.split('float[').last.split(']').first; + + if (size == null || int.tryParse(size) != embeddingDimension) { + /// drop table if page size is different + await customStatement('DROP TABLE IF EXISTS chunks;'); + await customStatement( + 'CREATE VIRTUAL TABLE IF NOT EXISTS chunks using vec0( ' + ' id INTEGER PRIMARY KEY AUTOINCREMENT, ' + ' embedding float[$embeddingDimension] ' + ');'); + } + }, + onCreate: (m) async { + await m.createAll(); + await m.database.customStatement( + 'CREATE VIRTUAL TABLE IF NOT EXISTS chunks using vec0( ' + ' id INTEGER PRIMARY KEY AUTOINCREMENT, ' + ' embedding float[$embeddingDimension] ' + ');', + ); + }, + ); + + /// Add a chunk to the database. + Future addChunk( + String text, { + String? title, + int? outputDimensionality, + }) async { + final result = await textEmbedder + .embedQuery(title != null ? 'title: $title\n$text' : text); + await customStatement( + 'INSERT INTO chunks (embedding) VALUES (:embedding)', + [_serializeFloat32(result)], + ); + return getLastId().getSingle(); + } + + /// Search chunks in the database. + Future> searchChunks( + String query, { + String? title, + int? outputDimensionality, + }) async { + final result = await textEmbedder + .embedQuery(title != null ? 'title: $title\n$query' : query); + return searchEmbeddings(_serializeFloat32(result)); + } + + /// Search chunks in the database. + Future> similaritySearchByVectorWithScores({ + required List embedding, + VectorStoreSimilaritySearch config = const VectorStoreSimilaritySearch(), + }) async { + final embeddingResults = + await searchEmbeddings(_serializeFloat32(embedding)).get(); + return embeddingResults + .map( + (e) => ( + Document( + pageContent: e.content ?? '', + metadata: jsonDecode(e.metadata ?? '{}'), + ), + e.distance + ), + ) + .toList(); + } + + /// Delete a chunk from the database. + Future deleteChunk(int id) async { + await customStatement( + 'DELETE FROM chunks WHERE id = :id', + [id], + ); + } + + /// Delete chunks from the database. + /// [ids] is a list of chunk ids to delete. + Future deleteChunks(List ids) async { + await customStatement( + 'DELETE FROM chunks WHERE id IN (:ids)', + [ids], + ); + } + + /// Return a [DatabaseConnection] for the given database file. + static DatabaseConnection createConnection(String dbFile) { + return DatabaseConnection(impl.connect(dbFile)); + } + + /// Add a list of documents to the database. + Future> addVectors({ + required List> vectors, + required List documents, + }) async { + if (vectors.length != documents.length) { + throw ArgumentError( + 'The number of vectors must match the number of documents.', + ); + } + + final List addedIds = []; + + for (int i = 0; i < vectors.length; i++) { + final vector = vectors[i]; + await customStatement( + 'INSERT INTO chunks (embedding) VALUES (:embedding)', + [_serializeFloat32(vector)], + ); + final id = await getLastId().getSingle(); + addedIds.add(id); + } + return addedIds; + } +} + +// Serializes a float32 list into a vector BLOB that sqlite-vec accepts. +Uint8List _serializeFloat32(List vector) { + final ByteData byteData = ByteData(vector.length * 4); // 4 bytes per float32 + + for (int i = 0; i < vector.length; i++) { + byteData.setFloat32(i * 4, vector[i], Endian.little); + } + + return byteData.buffer.asUint8List(); +} + +/// Split long text into chunks for embedding +Iterable<(String, int, int)> chunkText(String text) sync* { + final regex = RegExp(r'((?:[^\n][\n]?)+)'); + final matches = regex.allMatches(text); + for (final match in matches) { + // Need to limit to 500 tokens for really long paragraphs + final str = text.substring(match.start, match.end); + yield (str, match.start, match.end); + } +} diff --git a/packages/langchain_community/lib/src/vector_stores/sqlite_vec/src/database.g.dart b/packages/langchain_community/lib/src/vector_stores/sqlite_vec/src/database.g.dart new file mode 100644 index 00000000..05ce7a44 --- /dev/null +++ b/packages/langchain_community/lib/src/vector_stores/sqlite_vec/src/database.g.dart @@ -0,0 +1,967 @@ +// GENERATED CODE - DO NOT MODIFY BY HAND + +// ignore_for_file: type=lint, subtype=lint + +part of 'database.dart'; + +// ignore_for_file: type=lint +class Files extends Table with TableInfo { + @override + final GeneratedDatabase attachedDatabase; + final String? _alias; + Files(this.attachedDatabase, [this._alias]); + static const VerificationMeta _idMeta = const VerificationMeta('id'); + late final GeneratedColumn id = GeneratedColumn( + 'id', aliasedName, false, + hasAutoIncrement: true, + type: DriftSqlType.int, + requiredDuringInsert: false, + $customConstraints: 'PRIMARY KEY AUTOINCREMENT'); + static const VerificationMeta _pathMeta = const VerificationMeta('path'); + late final GeneratedColumn path = GeneratedColumn( + 'path', aliasedName, false, + type: DriftSqlType.string, + requiredDuringInsert: true, + $customConstraints: 'NOT NULL'); + static const VerificationMeta _contentMeta = + const VerificationMeta('content'); + late final GeneratedColumn content = GeneratedColumn( + 'content', aliasedName, true, + type: DriftSqlType.string, + requiredDuringInsert: false, + $customConstraints: ''); + static const VerificationMeta _metadataMeta = + const VerificationMeta('metadata'); + late final GeneratedColumn metadata = GeneratedColumn( + 'metadata', aliasedName, true, + type: DriftSqlType.string, + requiredDuringInsert: false, + $customConstraints: ''); + @override + List get $columns => [id, path, content, metadata]; + @override + String get aliasedName => _alias ?? actualTableName; + @override + String get actualTableName => $name; + static const String $name = 'files'; + @override + VerificationContext validateIntegrity(Insertable instance, + {bool isInserting = false}) { + final context = VerificationContext(); + final data = instance.toColumns(true); + if (data.containsKey('id')) { + context.handle(_idMeta, id.isAcceptableOrUnknown(data['id']!, _idMeta)); + } + if (data.containsKey('path')) { + context.handle( + _pathMeta, path.isAcceptableOrUnknown(data['path']!, _pathMeta)); + } else if (isInserting) { + context.missing(_pathMeta); + } + if (data.containsKey('content')) { + context.handle(_contentMeta, + content.isAcceptableOrUnknown(data['content']!, _contentMeta)); + } + if (data.containsKey('metadata')) { + context.handle(_metadataMeta, + metadata.isAcceptableOrUnknown(data['metadata']!, _metadataMeta)); + } + return context; + } + + @override + Set get $primaryKey => {id}; + @override + File map(Map data, {String? tablePrefix}) { + final effectivePrefix = tablePrefix != null ? '$tablePrefix.' : ''; + return File( + id: attachedDatabase.typeMapping + .read(DriftSqlType.int, data['${effectivePrefix}id'])!, + path: attachedDatabase.typeMapping + .read(DriftSqlType.string, data['${effectivePrefix}path'])!, + content: attachedDatabase.typeMapping + .read(DriftSqlType.string, data['${effectivePrefix}content']), + metadata: attachedDatabase.typeMapping + .read(DriftSqlType.string, data['${effectivePrefix}metadata']), + ); + } + + @override + Files createAlias(String alias) { + return Files(attachedDatabase, alias); + } + + @override + bool get dontWriteConstraints => true; +} + +class File extends DataClass implements Insertable { + final int id; + final String path; + final String? content; + final String? metadata; + const File( + {required this.id, required this.path, this.content, this.metadata}); + @override + Map toColumns(bool nullToAbsent) { + final map = {}; + map['id'] = Variable(id); + map['path'] = Variable(path); + if (!nullToAbsent || content != null) { + map['content'] = Variable(content); + } + if (!nullToAbsent || metadata != null) { + map['metadata'] = Variable(metadata); + } + return map; + } + + FilesCompanion toCompanion(bool nullToAbsent) { + return FilesCompanion( + id: Value(id), + path: Value(path), + content: content == null && nullToAbsent + ? const Value.absent() + : Value(content), + metadata: metadata == null && nullToAbsent + ? const Value.absent() + : Value(metadata), + ); + } + + factory File.fromJson(Map json, + {ValueSerializer? serializer}) { + serializer ??= driftRuntimeOptions.defaultSerializer; + return File( + id: serializer.fromJson(json['id']), + path: serializer.fromJson(json['path']), + content: serializer.fromJson(json['content']), + metadata: serializer.fromJson(json['metadata']), + ); + } + @override + Map toJson({ValueSerializer? serializer}) { + serializer ??= driftRuntimeOptions.defaultSerializer; + return { + 'id': serializer.toJson(id), + 'path': serializer.toJson(path), + 'content': serializer.toJson(content), + 'metadata': serializer.toJson(metadata), + }; + } + + File copyWith( + {int? id, + String? path, + Value content = const Value.absent(), + Value metadata = const Value.absent()}) => + File( + id: id ?? this.id, + path: path ?? this.path, + content: content.present ? content.value : this.content, + metadata: metadata.present ? metadata.value : this.metadata, + ); + File copyWithCompanion(FilesCompanion data) { + return File( + id: data.id.present ? data.id.value : this.id, + path: data.path.present ? data.path.value : this.path, + content: data.content.present ? data.content.value : this.content, + metadata: data.metadata.present ? data.metadata.value : this.metadata, + ); + } + + @override + String toString() { + return (StringBuffer('File(') + ..write('id: $id, ') + ..write('path: $path, ') + ..write('content: $content, ') + ..write('metadata: $metadata') + ..write(')')) + .toString(); + } + + @override + int get hashCode => Object.hash(id, path, content, metadata); + @override + bool operator ==(Object other) => + identical(this, other) || + (other is File && + other.id == this.id && + other.path == this.path && + other.content == this.content && + other.metadata == this.metadata); +} + +class FilesCompanion extends UpdateCompanion { + final Value id; + final Value path; + final Value content; + final Value metadata; + const FilesCompanion({ + this.id = const Value.absent(), + this.path = const Value.absent(), + this.content = const Value.absent(), + this.metadata = const Value.absent(), + }); + FilesCompanion.insert({ + this.id = const Value.absent(), + required String path, + this.content = const Value.absent(), + this.metadata = const Value.absent(), + }) : path = Value(path); + static Insertable custom({ + Expression? id, + Expression? path, + Expression? content, + Expression? metadata, + }) { + return RawValuesInsertable({ + if (id != null) 'id': id, + if (path != null) 'path': path, + if (content != null) 'content': content, + if (metadata != null) 'metadata': metadata, + }); + } + + FilesCompanion copyWith( + {Value? id, + Value? path, + Value? content, + Value? metadata}) { + return FilesCompanion( + id: id ?? this.id, + path: path ?? this.path, + content: content ?? this.content, + metadata: metadata ?? this.metadata, + ); + } + + @override + Map toColumns(bool nullToAbsent) { + final map = {}; + if (id.present) { + map['id'] = Variable(id.value); + } + if (path.present) { + map['path'] = Variable(path.value); + } + if (content.present) { + map['content'] = Variable(content.value); + } + if (metadata.present) { + map['metadata'] = Variable(metadata.value); + } + return map; + } + + @override + String toString() { + return (StringBuffer('FilesCompanion(') + ..write('id: $id, ') + ..write('path: $path, ') + ..write('content: $content, ') + ..write('metadata: $metadata') + ..write(')')) + .toString(); + } +} + +class FileEmbeddings extends Table + with TableInfo { + @override + final GeneratedDatabase attachedDatabase; + final String? _alias; + FileEmbeddings(this.attachedDatabase, [this._alias]); + static const VerificationMeta _idMeta = const VerificationMeta('id'); + late final GeneratedColumn id = GeneratedColumn( + 'id', aliasedName, false, + hasAutoIncrement: true, + type: DriftSqlType.int, + requiredDuringInsert: false, + $customConstraints: 'PRIMARY KEY AUTOINCREMENT'); + static const VerificationMeta _fileIdMeta = const VerificationMeta('fileId'); + late final GeneratedColumn fileId = GeneratedColumn( + 'file_id', aliasedName, false, + type: DriftSqlType.int, + requiredDuringInsert: true, + $customConstraints: 'NOT NULL'); + static const VerificationMeta _chunkIdMeta = + const VerificationMeta('chunkId'); + late final GeneratedColumn chunkId = GeneratedColumn( + 'chunk_id', aliasedName, false, + type: DriftSqlType.int, + requiredDuringInsert: true, + $customConstraints: 'NOT NULL'); + static const VerificationMeta _startMeta = const VerificationMeta('start'); + late final GeneratedColumn start = GeneratedColumn( + 'start', aliasedName, true, + type: DriftSqlType.int, + requiredDuringInsert: false, + $customConstraints: ''); + static const VerificationMeta _endMeta = const VerificationMeta('end'); + late final GeneratedColumn end = GeneratedColumn( + 'end', aliasedName, true, + type: DriftSqlType.int, + requiredDuringInsert: false, + $customConstraints: ''); + @override + List get $columns => [id, fileId, chunkId, start, end]; + @override + String get aliasedName => _alias ?? actualTableName; + @override + String get actualTableName => $name; + static const String $name = 'file_embeddings'; + @override + VerificationContext validateIntegrity(Insertable instance, + {bool isInserting = false}) { + final context = VerificationContext(); + final data = instance.toColumns(true); + if (data.containsKey('id')) { + context.handle(_idMeta, id.isAcceptableOrUnknown(data['id']!, _idMeta)); + } + if (data.containsKey('file_id')) { + context.handle(_fileIdMeta, + fileId.isAcceptableOrUnknown(data['file_id']!, _fileIdMeta)); + } else if (isInserting) { + context.missing(_fileIdMeta); + } + if (data.containsKey('chunk_id')) { + context.handle(_chunkIdMeta, + chunkId.isAcceptableOrUnknown(data['chunk_id']!, _chunkIdMeta)); + } else if (isInserting) { + context.missing(_chunkIdMeta); + } + if (data.containsKey('start')) { + context.handle( + _startMeta, start.isAcceptableOrUnknown(data['start']!, _startMeta)); + } + if (data.containsKey('end')) { + context.handle( + _endMeta, end.isAcceptableOrUnknown(data['end']!, _endMeta)); + } + return context; + } + + @override + Set get $primaryKey => {id}; + @override + FileEmbedding map(Map data, {String? tablePrefix}) { + final effectivePrefix = tablePrefix != null ? '$tablePrefix.' : ''; + return FileEmbedding( + id: attachedDatabase.typeMapping + .read(DriftSqlType.int, data['${effectivePrefix}id'])!, + fileId: attachedDatabase.typeMapping + .read(DriftSqlType.int, data['${effectivePrefix}file_id'])!, + chunkId: attachedDatabase.typeMapping + .read(DriftSqlType.int, data['${effectivePrefix}chunk_id'])!, + start: attachedDatabase.typeMapping + .read(DriftSqlType.int, data['${effectivePrefix}start']), + end: attachedDatabase.typeMapping + .read(DriftSqlType.int, data['${effectivePrefix}end']), + ); + } + + @override + FileEmbeddings createAlias(String alias) { + return FileEmbeddings(attachedDatabase, alias); + } + + @override + bool get dontWriteConstraints => true; +} + +class FileEmbedding extends DataClass implements Insertable { + final int id; + final int fileId; + final int chunkId; + final int? start; + final int? end; + const FileEmbedding( + {required this.id, + required this.fileId, + required this.chunkId, + this.start, + this.end}); + @override + Map toColumns(bool nullToAbsent) { + final map = {}; + map['id'] = Variable(id); + map['file_id'] = Variable(fileId); + map['chunk_id'] = Variable(chunkId); + if (!nullToAbsent || start != null) { + map['start'] = Variable(start); + } + if (!nullToAbsent || end != null) { + map['end'] = Variable(end); + } + return map; + } + + FileEmbeddingsCompanion toCompanion(bool nullToAbsent) { + return FileEmbeddingsCompanion( + id: Value(id), + fileId: Value(fileId), + chunkId: Value(chunkId), + start: + start == null && nullToAbsent ? const Value.absent() : Value(start), + end: end == null && nullToAbsent ? const Value.absent() : Value(end), + ); + } + + factory FileEmbedding.fromJson(Map json, + {ValueSerializer? serializer}) { + serializer ??= driftRuntimeOptions.defaultSerializer; + return FileEmbedding( + id: serializer.fromJson(json['id']), + fileId: serializer.fromJson(json['file_id']), + chunkId: serializer.fromJson(json['chunk_id']), + start: serializer.fromJson(json['start']), + end: serializer.fromJson(json['end']), + ); + } + @override + Map toJson({ValueSerializer? serializer}) { + serializer ??= driftRuntimeOptions.defaultSerializer; + return { + 'id': serializer.toJson(id), + 'file_id': serializer.toJson(fileId), + 'chunk_id': serializer.toJson(chunkId), + 'start': serializer.toJson(start), + 'end': serializer.toJson(end), + }; + } + + FileEmbedding copyWith( + {int? id, + int? fileId, + int? chunkId, + Value start = const Value.absent(), + Value end = const Value.absent()}) => + FileEmbedding( + id: id ?? this.id, + fileId: fileId ?? this.fileId, + chunkId: chunkId ?? this.chunkId, + start: start.present ? start.value : this.start, + end: end.present ? end.value : this.end, + ); + FileEmbedding copyWithCompanion(FileEmbeddingsCompanion data) { + return FileEmbedding( + id: data.id.present ? data.id.value : this.id, + fileId: data.fileId.present ? data.fileId.value : this.fileId, + chunkId: data.chunkId.present ? data.chunkId.value : this.chunkId, + start: data.start.present ? data.start.value : this.start, + end: data.end.present ? data.end.value : this.end, + ); + } + + @override + String toString() { + return (StringBuffer('FileEmbedding(') + ..write('id: $id, ') + ..write('fileId: $fileId, ') + ..write('chunkId: $chunkId, ') + ..write('start: $start, ') + ..write('end: $end') + ..write(')')) + .toString(); + } + + @override + int get hashCode => Object.hash(id, fileId, chunkId, start, end); + @override + bool operator ==(Object other) => + identical(this, other) || + (other is FileEmbedding && + other.id == this.id && + other.fileId == this.fileId && + other.chunkId == this.chunkId && + other.start == this.start && + other.end == this.end); +} + +class FileEmbeddingsCompanion extends UpdateCompanion { + final Value id; + final Value fileId; + final Value chunkId; + final Value start; + final Value end; + const FileEmbeddingsCompanion({ + this.id = const Value.absent(), + this.fileId = const Value.absent(), + this.chunkId = const Value.absent(), + this.start = const Value.absent(), + this.end = const Value.absent(), + }); + FileEmbeddingsCompanion.insert({ + this.id = const Value.absent(), + required int fileId, + required int chunkId, + this.start = const Value.absent(), + this.end = const Value.absent(), + }) : fileId = Value(fileId), + chunkId = Value(chunkId); + static Insertable custom({ + Expression? id, + Expression? fileId, + Expression? chunkId, + Expression? start, + Expression? end, + }) { + return RawValuesInsertable({ + if (id != null) 'id': id, + if (fileId != null) 'file_id': fileId, + if (chunkId != null) 'chunk_id': chunkId, + if (start != null) 'start': start, + if (end != null) 'end': end, + }); + } + + FileEmbeddingsCompanion copyWith( + {Value? id, + Value? fileId, + Value? chunkId, + Value? start, + Value? end}) { + return FileEmbeddingsCompanion( + id: id ?? this.id, + fileId: fileId ?? this.fileId, + chunkId: chunkId ?? this.chunkId, + start: start ?? this.start, + end: end ?? this.end, + ); + } + + @override + Map toColumns(bool nullToAbsent) { + final map = {}; + if (id.present) { + map['id'] = Variable(id.value); + } + if (fileId.present) { + map['file_id'] = Variable(fileId.value); + } + if (chunkId.present) { + map['chunk_id'] = Variable(chunkId.value); + } + if (start.present) { + map['start'] = Variable(start.value); + } + if (end.present) { + map['end'] = Variable(end.value); + } + return map; + } + + @override + String toString() { + return (StringBuffer('FileEmbeddingsCompanion(') + ..write('id: $id, ') + ..write('fileId: $fileId, ') + ..write('chunkId: $chunkId, ') + ..write('start: $start, ') + ..write('end: $end') + ..write(')')) + .toString(); + } +} + +abstract class _$Database extends GeneratedDatabase { + _$Database(QueryExecutor e) : super(e); + $DatabaseManager get managers => $DatabaseManager(this); + late final Files files = Files(this); + late final FileEmbeddings fileEmbeddings = FileEmbeddings(this); + Selectable getFiles() { + return customSelect('SELECT * FROM files', variables: [], readsFrom: { + files, + }).asyncMap(files.mapFromRow); + } + + Selectable getFileById(int id) { + return customSelect('SELECT * FROM files WHERE id = ?1', variables: [ + Variable(id) + ], readsFrom: { + files, + }).asyncMap(files.mapFromRow); + } + + Selectable getFileByPath(String path) { + return customSelect('SELECT * FROM files WHERE path = ?1', variables: [ + Variable(path) + ], readsFrom: { + files, + }).asyncMap(files.mapFromRow); + } + + Future insertFile(String path, String? content, String? metadata) { + return customInsert( + 'INSERT INTO files (path, content, metadata) VALUES (?1, ?2, ?3)', + variables: [ + Variable(path), + Variable(content), + Variable(metadata) + ], + updates: {files}, + ); + } + + Future deleteFileById(int id) { + return customUpdate( + 'DELETE FROM files WHERE id = ?1', + variables: [Variable(id)], + updates: {files}, + updateKind: UpdateKind.delete, + ); + } + + Selectable getFileEmbeddingsByFileId(int fileId) { + return customSelect('SELECT * FROM file_embeddings WHERE file_id = ?1', + variables: [ + Variable(fileId) + ], + readsFrom: { + fileEmbeddings, + }).asyncMap(fileEmbeddings.mapFromRow); + } + + Future deleteFileEmbeddingByFileId(int id) { + return customUpdate( + 'DELETE FROM file_embeddings WHERE file_id = ?1', + variables: [Variable(id)], + updates: {fileEmbeddings}, + updateKind: UpdateKind.delete, + ); + } + + Selectable getLastId() { + return customSelect('SELECT last_insert_rowid() AS _c0', + variables: [], + readsFrom: {}).map((QueryRow row) => row.read('_c0')); + } + + Future insertFileEmbedding( + int fileId, int chunkId, int? start, int? end) { + return customInsert( + 'INSERT INTO file_embeddings (file_id, chunk_id, start, "end") VALUES (?1, ?2, ?3, ?4)', + variables: [ + Variable(fileId), + Variable(chunkId), + Variable(start), + Variable(end) + ], + updates: {fileEmbeddings}, + ); + } + + Selectable searchEmbeddings(Uint8List embedding) { + return customSelect( + 'SELECT files.id AS fileId, files.path, file_embeddings.start, file_embeddings."end", CAST(chunks.distance AS REAL) AS distance, files.content, files.metadata FROM chunks LEFT JOIN file_embeddings ON file_embeddings.chunk_id = chunks.id LEFT JOIN files ON files.id = file_embeddings.file_id WHERE embedding MATCH ?1 AND k = 20', + variables: [ + Variable(embedding) + ], + readsFrom: { + files, + fileEmbeddings, + }).map((QueryRow row) => SearchEmbeddingsResult( + fileId: row.readNullable('fileId'), + path: row.readNullable('path'), + start: row.readNullable('start'), + end: row.readNullable('end'), + distance: row.read('distance'), + content: row.readNullable('content'), + metadata: row.readNullable('metadata'), + )); + } + + @override + Iterable> get allTables => + allSchemaEntities.whereType>(); + @override + List get allSchemaEntities => [files, fileEmbeddings]; +} + +typedef $FilesCreateCompanionBuilder = FilesCompanion Function({ + Value id, + required String path, + Value content, + Value metadata, +}); +typedef $FilesUpdateCompanionBuilder = FilesCompanion Function({ + Value id, + Value path, + Value content, + Value metadata, +}); + +class $FilesFilterComposer extends FilterComposer<_$Database, Files> { + $FilesFilterComposer(super.$state); + ColumnFilters get id => $state.composableBuilder( + column: $state.table.id, + builder: (column, joinBuilders) => + ColumnFilters(column, joinBuilders: joinBuilders)); + + ColumnFilters get path => $state.composableBuilder( + column: $state.table.path, + builder: (column, joinBuilders) => + ColumnFilters(column, joinBuilders: joinBuilders)); + + ColumnFilters get content => $state.composableBuilder( + column: $state.table.content, + builder: (column, joinBuilders) => + ColumnFilters(column, joinBuilders: joinBuilders)); + + ColumnFilters get metadata => $state.composableBuilder( + column: $state.table.metadata, + builder: (column, joinBuilders) => + ColumnFilters(column, joinBuilders: joinBuilders)); +} + +class $FilesOrderingComposer extends OrderingComposer<_$Database, Files> { + $FilesOrderingComposer(super.$state); + ColumnOrderings get id => $state.composableBuilder( + column: $state.table.id, + builder: (column, joinBuilders) => + ColumnOrderings(column, joinBuilders: joinBuilders)); + + ColumnOrderings get path => $state.composableBuilder( + column: $state.table.path, + builder: (column, joinBuilders) => + ColumnOrderings(column, joinBuilders: joinBuilders)); + + ColumnOrderings get content => $state.composableBuilder( + column: $state.table.content, + builder: (column, joinBuilders) => + ColumnOrderings(column, joinBuilders: joinBuilders)); + + ColumnOrderings get metadata => $state.composableBuilder( + column: $state.table.metadata, + builder: (column, joinBuilders) => + ColumnOrderings(column, joinBuilders: joinBuilders)); +} + +class $FilesTableManager extends RootTableManager< + _$Database, + Files, + File, + $FilesFilterComposer, + $FilesOrderingComposer, + $FilesCreateCompanionBuilder, + $FilesUpdateCompanionBuilder, + (File, BaseReferences<_$Database, Files, File>), + File, + PrefetchHooks Function()> { + $FilesTableManager(_$Database db, Files table) + : super(TableManagerState( + db: db, + table: table, + filteringComposer: $FilesFilterComposer(ComposerState(db, table)), + orderingComposer: $FilesOrderingComposer(ComposerState(db, table)), + updateCompanionCallback: ({ + Value id = const Value.absent(), + Value path = const Value.absent(), + Value content = const Value.absent(), + Value metadata = const Value.absent(), + }) => + FilesCompanion( + id: id, + path: path, + content: content, + metadata: metadata, + ), + createCompanionCallback: ({ + Value id = const Value.absent(), + required String path, + Value content = const Value.absent(), + Value metadata = const Value.absent(), + }) => + FilesCompanion.insert( + id: id, + path: path, + content: content, + metadata: metadata, + ), + withReferenceMapper: (p0) => p0 + .map((e) => (e.readTable(table), BaseReferences(db, table, e))) + .toList(), + prefetchHooksCallback: null, + )); +} + +typedef $FilesProcessedTableManager = ProcessedTableManager< + _$Database, + Files, + File, + $FilesFilterComposer, + $FilesOrderingComposer, + $FilesCreateCompanionBuilder, + $FilesUpdateCompanionBuilder, + (File, BaseReferences<_$Database, Files, File>), + File, + PrefetchHooks Function()>; +typedef $FileEmbeddingsCreateCompanionBuilder = FileEmbeddingsCompanion + Function({ + Value id, + required int fileId, + required int chunkId, + Value start, + Value end, +}); +typedef $FileEmbeddingsUpdateCompanionBuilder = FileEmbeddingsCompanion + Function({ + Value id, + Value fileId, + Value chunkId, + Value start, + Value end, +}); + +class $FileEmbeddingsFilterComposer + extends FilterComposer<_$Database, FileEmbeddings> { + $FileEmbeddingsFilterComposer(super.$state); + ColumnFilters get id => $state.composableBuilder( + column: $state.table.id, + builder: (column, joinBuilders) => + ColumnFilters(column, joinBuilders: joinBuilders)); + + ColumnFilters get fileId => $state.composableBuilder( + column: $state.table.fileId, + builder: (column, joinBuilders) => + ColumnFilters(column, joinBuilders: joinBuilders)); + + ColumnFilters get chunkId => $state.composableBuilder( + column: $state.table.chunkId, + builder: (column, joinBuilders) => + ColumnFilters(column, joinBuilders: joinBuilders)); + + ColumnFilters get start => $state.composableBuilder( + column: $state.table.start, + builder: (column, joinBuilders) => + ColumnFilters(column, joinBuilders: joinBuilders)); + + ColumnFilters get end => $state.composableBuilder( + column: $state.table.end, + builder: (column, joinBuilders) => + ColumnFilters(column, joinBuilders: joinBuilders)); +} + +class $FileEmbeddingsOrderingComposer + extends OrderingComposer<_$Database, FileEmbeddings> { + $FileEmbeddingsOrderingComposer(super.$state); + ColumnOrderings get id => $state.composableBuilder( + column: $state.table.id, + builder: (column, joinBuilders) => + ColumnOrderings(column, joinBuilders: joinBuilders)); + + ColumnOrderings get fileId => $state.composableBuilder( + column: $state.table.fileId, + builder: (column, joinBuilders) => + ColumnOrderings(column, joinBuilders: joinBuilders)); + + ColumnOrderings get chunkId => $state.composableBuilder( + column: $state.table.chunkId, + builder: (column, joinBuilders) => + ColumnOrderings(column, joinBuilders: joinBuilders)); + + ColumnOrderings get start => $state.composableBuilder( + column: $state.table.start, + builder: (column, joinBuilders) => + ColumnOrderings(column, joinBuilders: joinBuilders)); + + ColumnOrderings get end => $state.composableBuilder( + column: $state.table.end, + builder: (column, joinBuilders) => + ColumnOrderings(column, joinBuilders: joinBuilders)); +} + +class $FileEmbeddingsTableManager extends RootTableManager< + _$Database, + FileEmbeddings, + FileEmbedding, + $FileEmbeddingsFilterComposer, + $FileEmbeddingsOrderingComposer, + $FileEmbeddingsCreateCompanionBuilder, + $FileEmbeddingsUpdateCompanionBuilder, + (FileEmbedding, BaseReferences<_$Database, FileEmbeddings, FileEmbedding>), + FileEmbedding, + PrefetchHooks Function()> { + $FileEmbeddingsTableManager(_$Database db, FileEmbeddings table) + : super(TableManagerState( + db: db, + table: table, + filteringComposer: + $FileEmbeddingsFilterComposer(ComposerState(db, table)), + orderingComposer: + $FileEmbeddingsOrderingComposer(ComposerState(db, table)), + updateCompanionCallback: ({ + Value id = const Value.absent(), + Value fileId = const Value.absent(), + Value chunkId = const Value.absent(), + Value start = const Value.absent(), + Value end = const Value.absent(), + }) => + FileEmbeddingsCompanion( + id: id, + fileId: fileId, + chunkId: chunkId, + start: start, + end: end, + ), + createCompanionCallback: ({ + Value id = const Value.absent(), + required int fileId, + required int chunkId, + Value start = const Value.absent(), + Value end = const Value.absent(), + }) => + FileEmbeddingsCompanion.insert( + id: id, + fileId: fileId, + chunkId: chunkId, + start: start, + end: end, + ), + withReferenceMapper: (p0) => p0 + .map((e) => (e.readTable(table), BaseReferences(db, table, e))) + .toList(), + prefetchHooksCallback: null, + )); +} + +typedef $FileEmbeddingsProcessedTableManager = ProcessedTableManager< + _$Database, + FileEmbeddings, + FileEmbedding, + $FileEmbeddingsFilterComposer, + $FileEmbeddingsOrderingComposer, + $FileEmbeddingsCreateCompanionBuilder, + $FileEmbeddingsUpdateCompanionBuilder, + (FileEmbedding, BaseReferences<_$Database, FileEmbeddings, FileEmbedding>), + FileEmbedding, + PrefetchHooks Function()>; + +class $DatabaseManager { + final _$Database _db; + $DatabaseManager(this._db); + $FilesTableManager get files => $FilesTableManager(_db, _db.files); + $FileEmbeddingsTableManager get fileEmbeddings => + $FileEmbeddingsTableManager(_db, _db.fileEmbeddings); +} + +class SearchEmbeddingsResult { + final int? fileId; + final String? path; + final int? start; + final int? end; + final double distance; + final String? content; + final String? metadata; + SearchEmbeddingsResult({ + this.fileId, + this.path, + this.start, + this.end, + required this.distance, + this.content, + this.metadata, + }); +} diff --git a/packages/langchain_community/lib/src/vector_stores/sqlite_vec/src/sql.drift b/packages/langchain_community/lib/src/vector_stores/sqlite_vec/src/sql.drift new file mode 100644 index 00000000..fcd0affb --- /dev/null +++ b/packages/langchain_community/lib/src/vector_stores/sqlite_vec/src/sql.drift @@ -0,0 +1,70 @@ +CREATE TABLE files ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + path TEXT NOT NULL, + content TEXT, + metadata TEXT -- Deviated from flutter_sqlite_document_search to adhere more to langchain +); + +CREATE TABLE file_embeddings ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + file_id INTEGER NOT NULL, + chunk_id INTEGER NOT NULL, + [start] INTEGER, + [end] INTEGER +); + +-- CREATE VIRTUAL TABLE chunks using vec0( +-- id INTEGER PRIMARY KEY AUTOINCREMENT, +-- embedding float[768] +-- ); + +getFiles: +SELECT * FROM files; + +getFileById: +SELECT * FROM files +WHERE id = :id; + +getFileByPath: +SELECT * FROM files +WHERE path = :path; + +insertFile: +INSERT INTO files (path, content, metadata) VALUES (:path, :content, :metadata); +RETURNING *; + +deleteFileById: +DELETE FROM files +WHERE id = :id; + +getFileEmbeddingsByFileId: +SELECT * FROM file_embeddings +WHERE file_id = :fileId; + +deleteFileEmbeddingByFileId: +DELETE FROM file_embeddings +WHERE file_id = :id; + +-- insertChunk(:embedding AS BLOB): +-- INSERT INTO chunks (embedding) VALUES (:embedding); + +getLastId: +SELECT last_insert_rowid(); + +insertFileEmbedding: +INSERT INTO file_embeddings (file_id, chunk_id, [start], [end]) +VALUES (:fileId, :chunkId, :start, :end); + +searchEmbeddings(:embedding AS BLOB): +SELECT + files.id as fileId, + files.path, + file_embeddings.start, + file_embeddings.end, + CAST(chunks.distance AS REAL) as distance, + files.content, + files.metadata +FROM chunks +LEFT JOIN file_embeddings ON file_embeddings.chunk_id = chunks.id +LEFT JOIN files ON files.id = file_embeddings.file_id +WHERE embedding MATCH :embedding AND k = 20; diff --git a/packages/langchain_community/pubspec.yaml b/packages/langchain_community/pubspec.yaml index 7271a128..4748bb0e 100644 --- a/packages/langchain_community/pubspec.yaml +++ b/packages/langchain_community/pubspec.yaml @@ -19,6 +19,7 @@ dependencies: beautiful_soup_dart: ^0.3.0 cross_file: ^0.3.4+2 csv: ^6.0.0 + drift: ^2.21.0 flat_buffers: ^23.5.26 http: ^1.2.2 json_path: ^0.7.4 @@ -26,14 +27,16 @@ dependencies: math_expressions: ^2.6.0 meta: ^1.11.0 objectbox: ^4.0.1 + path: ^1.9.0 + sqlite3: ^2.4.6 tavily_dart: ^0.1.0 uuid: ^4.5.1 - dev_dependencies: + archive: ^3.6.1 build_runner: ^2.4.11 + drift_dev: ^2.21.0 langchain_openai: ^0.7.2+3 objectbox_generator: ^4.0.1 test: ^1.25.8 - objectbox: output_dir: src/vector_stores/objectbox diff --git a/packages/langchain_community/test/vector_stores/sqlite_vec/sqlitevec_test.dart b/packages/langchain_community/test/vector_stores/sqlite_vec/sqlitevec_test.dart new file mode 100644 index 00000000..01b565bb --- /dev/null +++ b/packages/langchain_community/test/vector_stores/sqlite_vec/sqlitevec_test.dart @@ -0,0 +1,72 @@ +import 'package:drift/drift.dart'; + +import 'package:langchain_community/src/vector_stores/sqlite_vec/sqlite_vec.dart'; +import 'package:langchain_core/documents.dart'; +import 'package:langchain_core/embeddings.dart'; +import 'package:langchain_core/vector_stores.dart'; +import 'package:test/test.dart'; + +Future sqliteVecFromTexts({ + List>? metaData, + bool drop = true, +}) async { + return SQLiteVEC.fromTexts( + ['foo', 'bar', 'baz'], // Replace with actual fake_texts + FakeEmbeddings(), + metadatas: metaData, + table: 'test', + dbFile: ':memory:', + ); +} + +void main() { + driftRuntimeOptions.dontWarnAboutMultipleDatabases = true; + group('SQLiteVEC Tests', () { + test('Test end to end construction and search', () async { + final docSearch = await sqliteVecFromTexts(); + final output = await docSearch.similaritySearch( + query: 'foo', + config: const VectorStoreSimilaritySearch(k: 1), + ); + // issue with k? + expect(output.first, const Document(pageContent: 'foo')); + }); + + test('Test end to end construction and search with scores and IDs', + () async { + final texts = ['foo', 'bar', 'baz']; + final metaData = [ + for (var i = 0; i < texts.length; i++) {'page': i}, + ]; + final docSearch = await sqliteVecFromTexts(metaData: metaData); + final output = await docSearch.similaritySearchWithScores( + query: 'foo', + config: const VectorStoreSimilaritySearch(k: 3), + ); + final docs = output.map((o) => o.$1).toList(); + final distances = output.map((o) => o.$2).toList(); + + expect(docs, [ + const Document(pageContent: 'foo', metadata: {'page': 0}), + const Document(pageContent: 'bar', metadata: {'page': 1}), + const Document(pageContent: 'baz', metadata: {'page': 2}), + ]); + expect(distances[0] < distances[1], isTrue); + expect(distances[1] < distances[2], isTrue); + }); + + test('Test end to end construction and MRR search', () async { + final texts = ['foo', 'bar', 'baz']; + final metaData = [ + for (var i = 0; i < texts.length; i++) {'page': i}, + ]; + final docSearch = await sqliteVecFromTexts(metaData: metaData); + await docSearch.addTexts(texts: texts, metadatas: metaData); + final output = await docSearch.similaritySearch( + query: 'foo', + config: const VectorStoreSimilaritySearch(k: 10), + ); + expect(output.length, 6); + }); + }); +} diff --git a/packages/langchain_community/tool/download_sqlite3.dart b/packages/langchain_community/tool/download_sqlite3.dart new file mode 100644 index 00000000..9bc7389f --- /dev/null +++ b/packages/langchain_community/tool/download_sqlite3.dart @@ -0,0 +1,137 @@ +import 'dart:io'; + +import 'package:archive/archive_io.dart'; +import 'package:http/http.dart'; +import 'package:langchain_community/dart_utils/debugging.dart'; +import 'package:path/path.dart' as p; + +typedef SqliteVersion = ({String version, String year}); + +const SqliteVersion latest = (version: '3460000', year: '2024'); +const SqliteVersion minimum = (version: '3290000', year: '2019'); + +Future main(List args) async { + if (args.contains('version')) { + kDebugPrint(latest.version); + exit(0); + } + + await _downloadAndCompile('latest', latest, force: args.contains('--force')); + await _downloadAndCompile( + 'minimum', + minimum, + force: args.contains('--force'), + ); +} + +extension on SqliteVersion { + String get autoconfUrl => + 'https://www.sqlite.org/$year/sqlite-autoconf-$version.tar.gz'; + + String get windowsUrl => + 'https://www.sqlite.org/$year/sqlite-dll-win-x64-$version.zip'; +} + +Future _downloadAndCompile( + String name, + SqliteVersion version, { + bool force = false, +}) async { + final driftDirectory = p.dirname(p.dirname(Platform.script.toFilePath())); + final target = p.join(driftDirectory, '.dart_tool', 'sqlite3', name); + final versionFile = File(p.join(target, 'version')); + + final needsDownload = force || + !versionFile.existsSync() || + versionFile.readAsStringSync() != version.version; + + if (!needsDownload) { + kDebugPrint( + 'Not downloading sqlite3 $name as it has already been downloaded. Use ' + '--force to re-compile it.', + ); + exit(0); + } + + kDebugPrint('Downloading and compiling sqlite3 $name (${version.version})'); + final targetDirectory = Directory(target); + + if (!targetDirectory.existsSync()) { + targetDirectory.createSync(recursive: true); + } + + final temporaryDir = + await Directory.systemTemp.createTemp('drift-compile-sqlite3'); + final temporaryDirPath = temporaryDir.path; + + // Compiling on Windows is ugly because we need users to have Visual Studio + // installed and all those tools activated in the current shell. + // Much easier to just download precompiled builds. + if (Platform.isWindows) { + final windowsUri = version.windowsUrl; + final sqlite3Zip = p.join(temporaryDirPath, 'sqlite3.zip'); + final client = Client(); + final response = await client.send(Request('GET', Uri.parse(windowsUri))); + if (response.statusCode != 200) { + kDebugPrint( + 'Could not download $windowsUri, status code ${response.statusCode}', + ); + exit(1); + } + await response.stream.pipe(File(sqlite3Zip).openWrite()); + + final inputStream = InputFileStream(sqlite3Zip); + final archive = ZipDecoder().decodeBuffer(inputStream); + + for (final file in archive.files) { + if (file.isFile && file.name == 'sqlite3.dll') { + final outputStream = OutputFileStream(p.join(target, 'sqlite3.dll')); + + file.writeContent(outputStream); + await outputStream.close(); + } + } + + await File(p.join(target, 'version')).writeAsString(version.version); + exit(0); + } + + await _run( + 'curl ${version.autoconfUrl} --output sqlite.tar.gz', + workingDirectory: temporaryDirPath, + ); + await _run('tar zxvf sqlite.tar.gz', workingDirectory: temporaryDirPath); + + final sqlitePath = + p.join(temporaryDirPath, 'sqlite-autoconf-${version.version}'); + await _run('./configure', workingDirectory: sqlitePath); + await _run('make -j', workingDirectory: sqlitePath); + + await File(p.join(sqlitePath, 'sqlite3')).copy(p.join(target, 'sqlite3')); + + if (Platform.isLinux) { + await File(p.join(sqlitePath, '.libs', 'libsqlite3.so')) + .copy(p.join(target, 'libsqlite3.so')); + } else if (Platform.isMacOS) { + await File(p.join(sqlitePath, '.libs', 'libsqlite3.dylib')) + .copy(p.join(target, 'libsqlite3.dylib')); + } + + await File(p.join(target, 'version')).writeAsString(version.version); +} + +Future _run(String command, {String? workingDirectory}) async { + kDebugPrint('Running $command'); + + final proc = await Process.start( + 'sh', + ['-c', command], + mode: ProcessStartMode.inheritStdio, + workingDirectory: workingDirectory, + ); + final exitCode = await proc.exitCode; + + if (exitCode != 0) { + exit(exitCode); + } +}