Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -402,6 +402,13 @@ GEM
spreadsheet (1.3.1)
bigdecimal
ruby-ole
sqlite3 (1.7.3)
mini_portile2 (~> 2.8.0)
sqlite3 (1.7.3-aarch64-linux)
sqlite3 (1.7.3-arm-linux)
sqlite3 (1.7.3-arm64-darwin)
sqlite3 (1.7.3-x86_64-darwin)
sqlite3 (1.7.3-x86_64-linux)
standard (1.39.1)
language_server-protocol (~> 3.17.0.2)
lint_roller (~> 1.0)
Expand Down Expand Up @@ -493,6 +500,7 @@ DEPENDENCIES
ruby-openai (~> 7.1.0)
safe_ruby (~> 1.0.4)
sequel (~> 5.87.0)
sqlite3 (~> 1.7.0)
standard (>= 1.35.1)
vcr
weaviate-ruby (~> 0.9.2)
Expand Down
46 changes: 46 additions & 0 deletions examples/sqlite_vec_example.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
require "langchain"

# Initialize the LLM (using Ollama in this example)
llm = Langchain::LLM::Ollama.new

# Initialize the SQLite-vec vectorstore
db = Langchain::Vectorsearch::SqliteVec.new(
url: ":memory:", # Use a file-based DB by passing a path or ":memory:" for in-memory
index_name: "documents",
namespace: "test",
llm: llm
)

# Create the schema
db.create_default_schema

# Add some sample texts
texts = [
"Ruby is a dynamic, open source programming language with a focus on simplicity and productivity.",
"Python is a programming language that lets you work quickly and integrate systems more effectively.",
"JavaScript is a lightweight, interpreted programming language with first-class functions.",
"Rust is a multi-paradigm, general-purpose programming language designed for performance and safety."
]

puts "Adding texts..."
ids = db.add_texts(texts: texts)
puts "Added #{ids.size} texts with IDs: #{ids.join(", ")}"

# Search for similar texts
query = "What programming language is focused on memory safety?"
puts "\nSearching for: #{query}"
results = db.similarity_search(query: query)

puts "\nResults:"
results.each do |result|
puts "- #{result[1]}"
end

# Ask a question
question = "Which programming language emphasizes simplicity?"
puts "\nAsking: #{question}"
response = db.ask(question: question)
puts "Answer: #{response.chat_completion}"

# Clean up
db.destroy_default_schema
7 changes: 6 additions & 1 deletion lib/langchain/dependency_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,12 @@ class VersionError < ScriptError; end
# @raise [VersionError] If the gem is installed, but the version does not meet the requirements
#
def depends_on(gem_name, req: true)
gem(gem_name) # require the gem
if gem_name == "sqlite_vec"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@CarlQLange Why is this if/else statement needed? Isn't the gem version what gets required?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Woof, I have no memory of this. I think it was something to do with using the gem locally from something else. Sorry, I pretty much have no idea what I was doing back then!

require "sqlite_vec"
return true
else
gem(gem_name) # require the gem
end

return(true) unless defined?(Bundler) # If we're in a non-bundler environment, we're no longer able to determine if we'll meet requirements

Expand Down
2 changes: 1 addition & 1 deletion lib/langchain/llm/ollama.rb
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class Ollama < Base
llama2: 4_096,
llama3: 4_096,
"llama3.1": 4_096,
"llama3.2": 4_096,
"llama3.2": 3_072,
llava: 4_096,
mistral: 4_096,
"mistral-openorca": 4_096,
Expand Down
154 changes: 154 additions & 0 deletions lib/langchain/vectorsearch/sqlite_vec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
# frozen_string_literal: true

require "sqlite_vec"
module Langchain::Vectorsearch
class SqliteVec < Base
#
# The SQLite vector search adapter using sqlite-vec
#
# Gem requirements:
# gem "sqlite3", "~> 2.5"
# gem "sqlite_vec", "~> 0.16.0"
#
# Usage:
# sqlite_vec = Langchain::Vectorsearch::SqliteVec.new(url:, index_name:, llm:, namespace: nil)
#

attr_reader :db, :table_name, :namespace_column, :namespace

# @param url [String] The path to the SQLite database file (or :memory: for in-memory)
# @param index_name [String] The name of the table to use for the index
# @param llm [Object] The LLM client to use
# @param namespace [String] The namespace to use for the index when inserting/querying
def initialize(url:, index_name:, llm:, namespace: nil)
depends_on "sqlite3"
depends_on "sqlite_vec"

@db = SQLite3::Database.new(url)
@db.enable_load_extension(true)
::SqliteVec.load(@db)
@db.enable_load_extension(false)

@table_name = index_name
@namespace_column = "namespace"
@namespace = namespace

super(llm: llm)
end

# Create default schema
def create_default_schema
@db.execute("CREATE VIRTUAL TABLE IF NOT EXISTS #{table_name} USING vec0(
embedding float[#{llm.default_dimensions}],
content TEXT,
#{namespace_column} TEXT
)")
end

# Destroy default schema
def destroy_default_schema
@db.execute("DROP TABLE IF EXISTS #{table_name}")
end

# Add a list of texts to the index
# @param texts [Array<String>] The texts to add to the index
# @param ids [Array<String>] The ids to add to the index, in the same order as the texts
# @return [Array<Integer>] The ids of the added texts
def add_texts(texts:, ids: nil)
if ids.nil? || ids.empty?
max_rowid = @db.execute("SELECT MAX(rowid) FROM #{table_name}").first.first || 0
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Doesn't the rowid auto increment?

ids = texts.map.with_index do |_, i|
max_rowid + i + 1
end
end

@db.transaction do
texts.zip(ids).each do |text, id|
embedding = llm.embed(text: text).embedding
@db.execute(
"INSERT INTO #{table_name}(rowid, content, embedding, #{namespace_column}) VALUES (?, ?, ?, ?)",
[id, text, embedding.pack("f*"), namespace]
)
end
end

ids
end

# Update a list of ids and corresponding texts in the index
# @param texts [Array<String>] The texts to update in the index
# @param ids [Array<String>] The ids to update in the index, in the same order as the texts
# @return [Array<Integer>] The ids of the updated texts
def update_texts(texts:, ids:)
@db.transaction do
texts.zip(ids).each do |text, id|
embedding = llm.embed(text: text).embedding
@db.execute(
"UPDATE #{table_name} SET content = ?, embedding = ? WHERE rowid = ?",
[text, embedding.pack("f*"), id]
)
end
end
ids
end

# Remove a list of texts from the index
# @param ids [Array<Integer>] The ids of the texts to remove from the index
# @return [Integer] The number of texts removed from the index
def remove_texts(ids:)
@db.execute("DELETE FROM #{table_name} WHERE rowid IN (#{ids.join(",")})")
ids.length
end

# Search for similar texts in the index
# @param query [String] The text to search for
# @param k [Integer] The number of top results to return
# @return [Array<Hash>] The results of the search
def similarity_search(query:, k: 4)
embedding = llm.embed(text: query).embedding
similarity_search_by_vector(embedding: embedding, k: k)
end

# Search for similar texts in the index by vector
# @param embedding [Array<Float>] The vector to search for
# @param k [Integer] The number of top results to return
# @return [Array<Hash>] The results of the search
def similarity_search_by_vector(embedding:, k: 4)
namespace_condition = namespace ? "AND #{namespace_column} = ?" : ""
query_params = [embedding.pack("f*")]
query_params << namespace if namespace

@db.execute(<<-SQL, query_params)
SELECT
rowid,
content,
distance
FROM #{table_name}
WHERE embedding MATCH ?
#{namespace_condition}
ORDER BY distance
LIMIT #{k}
SQL
end

# Ask a question and return the answer
# @param question [String] The question to ask
# @param k [Integer] The number of results to have in context
# @yield [String] Stream responses back one String at a time
# @return [String] The answer to the question
def ask(question:, k: 4, &)
search_results = similarity_search(query: question, k: k)

context = search_results.map { |result| result[1].to_s }
context = context.join("\n---\n")

prompt = generate_rag_prompt(question: question, context: context)

messages = [{role: "user", content: prompt}]
response = llm.chat(messages: messages, &)

response.context = context
response
end
end
end
Loading
Loading