Skip to content

Commit ba8ef7a

Browse files
committed
Bump version to 0.1.13
- Add score_threshold parameter to similarity_search, similarity_search_by_vector, and ask methods - Improve test coverage with comprehensive tests for score_threshold functionality and ask method
1 parent 8bda66d commit ba8ef7a

File tree

7 files changed

+126
-30
lines changed

7 files changed

+126
-30
lines changed

.tool-versions

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
ruby 3.2.2
1+
ruby 3.4.7

CHANGELOG.md

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,43 +9,60 @@
99
- [DOCS]: Documentation changes. No changes to the library's behavior.
1010
- [SECURITY]: A change which fixes a security vulnerability.
1111

12+
## [0.1.13] - 2025-12-10
13+
14+
- [FEATURE]: Add `score_threshold` parameter to `similarity_search`, `similarity_search_by_vector`, and `ask` methods to filter out irrelevant results based on similarity score
15+
- [FEATURE]: Improve test coverage with comprehensive tests for the new `score_threshold` functionality and `ask` method
16+
1217
## [0.1.12] - 2024-09-20
18+
1319
- Adding `rails g langchainrb_rails:assistant --llm=...` generator
1420
- Adding `rails g langchainrb_rails:prompt` generator
1521

1622
## [0.1.11] - 2024-06-16
23+
1724
- Add destroy_from_vectorsearch hook
1825

1926
## [0.1.10] - 2024-05-20
2027

2128
## [0.1.9] - 2024-04-19
29+
2230
- Bump langchainrb gem to include v0.11.x
2331
- Remove pg_vector Overriding Operator Constants
2432

2533
## [0.1.8] - 2024-03-16
34+
2635
- Bump langchainrb gem
2736

2837
## [0.1.7] - 2024-01-29
38+
2939
- Fix Pgvector#ask method
3040

3141
## [0.1.6] - 2024-01-25
42+
3243
- Fix bug when multiple ActiveRecord models use vectorsearch
3344
- Bump langchainrb version
3445
- Avoid extra query when Pgvector is used
3546

3647
## [0.1.5] - 2023-11-30
48+
3749
- Qdrant vectorsearch generator
3850

3951
## [0.1.4] - 2023-11-20
52+
4053
- Bugfix AR integration when using vectorsearch other than Pgvector
4154

4255
## [0.1.3] - 2023-11-01
56+
4357
- Pgvector vectorsearch generator
4458

4559
## [0.1.2] - 2023-10-27
60+
4661
- Pinecone vectorsearch generator
4762

4863
## [0.1.1] - 2023-10-23
4964

5065
## [0.1.0] - 2023-10-22
66+
5167
- Initial release
68+

Gemfile.lock

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,6 @@ GEM
141141
matrix (0.4.2)
142142
method_source (1.0.0)
143143
mini_mime (1.1.5)
144-
mini_portile2 (2.8.8)
145144
minitest (5.25.4)
146145
mutex_m (0.3.0)
147146
net-imap (0.4.18)
@@ -154,12 +153,9 @@ GEM
154153
net-smtp (0.5.0)
155154
net-protocol
156155
nio4r (2.7.4)
157-
nokogiri (1.17.2)
158-
mini_portile2 (~> 2.8.2)
156+
nokogiri (1.18.10-x86_64-darwin)
159157
racc (~> 1.4)
160-
nokogiri (1.17.2-x86_64-darwin)
161-
racc (~> 1.4)
162-
nokogiri (1.17.2-x86_64-linux)
158+
nokogiri (1.18.10-x86_64-linux-gnu)
163159
racc (~> 1.4)
164160
parallel (1.26.3)
165161
parser (3.3.6.0)
@@ -279,7 +275,6 @@ GEM
279275
zeitwerk (2.6.18)
280276

281277
PLATFORMS
282-
ruby
283278
x86_64-darwin-19
284279
x86_64-darwin-22
285280
x86_64-linux

lib/langchainrb_overrides/vectorsearch/pgvector.rb

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -79,38 +79,50 @@ def destroy_default_schema
7979
# Search for similar texts in the index
8080
# @param query [String] The text to search for
8181
# @param k [Integer] The number of top results to return
82+
# @param score_threshold [Float] The minimum similarity score (lower distance) to include in results
8283
# @return [Array<Hash>] The results of the search
8384
# TODO - drop the named "query:" param so it is the same interface as #ask?
84-
def similarity_search(query:, k: 4)
85+
def similarity_search(query:, k: 4, score_threshold: nil)
8586
embedding = llm.embed(text: query).embedding
8687

8788
similarity_search_by_vector(
8889
embedding: embedding,
89-
k: k
90+
k: k,
91+
score_threshold: score_threshold
9092
)
9193
end
9294

9395
# Search for similar texts in the index by the passed in vector.
9496
# You must generate your own vector using the same LLM that generated the embeddings stored in the Vectorsearch DB.
9597
# @param embedding [Array<Float>] The vector to search for
9698
# @param k [Integer] The number of top results to return
99+
# @param score_threshold [Float] The minimum similarity score (lower distance) to include in results
97100
# @return [Array<Hash>] The results of the search
98101
# TODO - drop the named "embedding:" param so it is the same interface as #ask?
99-
def similarity_search_by_vector(embedding:, k: 4)
100-
model
101-
.nearest_neighbors(:embedding, embedding, distance: operator)
102-
.limit(k)
102+
def similarity_search_by_vector(embedding:, k: 4, score_threshold: nil)
103+
query = model.nearest_neighbors(:embedding, embedding, distance: operator)
104+
105+
if score_threshold
106+
# Fetch more results than needed and filter in Ruby to avoid depending on virtual columns
107+
candidates = query.limit(k + 5)
108+
filtered = candidates.select { |r| r.neighbor_distance <= score_threshold }.first(k)
109+
ids = filtered.map(&:id)
110+
model.where(id: ids).order(Arel.sql("array_position(ARRAY#{ids.inspect}, id)"))
111+
else
112+
query.limit(k)
113+
end
103114
end
104115

105116
# Ask a question and return the answer
106117
# @param question [String] The question to ask
107118
# @param k [Integer] The number of results to have in context
119+
# @param score_threshold [Float] The minimum similarity score to include in results
108120
# @yield [String] Stream responses back one String at a time
109121
# @return [String] The answer to the question
110-
def ask(question:, k: 4, &block)
122+
def ask(question:, k: 4, score_threshold: nil, &block)
111123
# Noisy as the embedding column has a lot of data
112124
ActiveRecord::Base.logger.silence do
113-
search_results = similarity_search(query: question, k: k)
125+
search_results = similarity_search(query: question, k: k, score_threshold: score_threshold)
114126

115127
context = search_results.map do |result|
116128
result.as_vector

lib/langchainrb_rails/active_record/hooks.rb

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -105,11 +105,13 @@ def embed!
105105
#
106106
# @param query [String] The query to search for
107107
# @param k [Integer] The number of results to return
108+
# @param score_threshold [Float] The minimum similarity score to include in results
108109
# @return [ActiveRecord::Relation] The ActiveRecord relation
109-
def similarity_search(query, k: 1)
110+
def similarity_search(query, k: 1, score_threshold: nil)
110111
records = class_variable_get(:@@provider).similarity_search(
111112
query: query,
112-
k: k
113+
k: k,
114+
score_threshold: score_threshold
113115
)
114116

115117
return records if LangchainrbRails.config.vectorsearch.is_a?(Langchain::Vectorsearch::Pgvector)
@@ -123,13 +125,15 @@ def similarity_search(query, k: 1)
123125
#
124126
# @param question [String] The question to ask
125127
# @param k [Integer] The number of results to have in context
128+
# @param score_threshold [Float] The minimum similarity score to include in results
126129
# @yield [String] Stream responses back one String at a time
127130
# @return [String] The answer to the question
128131
# standard:disable Style/ArgumentsForwarding
129-
def ask(question, k: 4, &block)
132+
def ask(question, k: 4, score_threshold: nil, &block)
130133
class_variable_get(:@@provider).ask(
131134
question: question,
132135
k: k,
136+
score_threshold: score_threshold,
133137
&block
134138
).chat_completion
135139
end

lib/langchainrb_rails/version.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# frozen_string_literal: true
22

33
module LangchainrbRails
4-
VERSION = "0.1.12"
4+
VERSION = "0.1.13"
55
end
Lines changed: 78 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,95 @@
11
# frozen_string_literal: true
22

33
RSpec.describe Langchain::Vectorsearch::Pgvector do
4-
let(:llm) { Langchain::LLM::OpenAI.new(api_key: "123") }
4+
let(:llm) { double("LLM") }
5+
let(:model) { double("Model") }
56
subject { described_class.new(llm: llm) }
67

7-
describe "#add_texts" do
8+
before do
9+
subject.model = model
810
end
911

10-
describe "#update_text" do
11-
end
12-
13-
describe "#create_default_schema" do
14-
end
12+
describe "#similarity_search" do
13+
it "passes score_threshold to similarity_search_by_vector" do
14+
allow(llm).to receive(:embed).and_return(double(embedding: [0.1, 0.2]))
15+
allow(model).to receive(:nearest_neighbors).and_return(double(limit: []))
16+
allow(model).to receive(:where).and_return([])
1517

16-
describe "#destroy_default_schema" do
17-
end
18+
expect(subject).to receive(:similarity_search_by_vector).with(embedding: [0.1, 0.2], k: 4, score_threshold: 0.5)
1819

19-
describe "#similarity_search" do
20+
subject.similarity_search(query: "test", k: 4, score_threshold: 0.5)
21+
end
2022
end
2123

2224
describe "#similarity_search_by_vector" do
25+
let(:query) { double("Query") }
26+
let(:candidates) { double("Candidates") }
27+
let(:filtered) { [double(id: 1, neighbor_distance: 0.3), double(id: 2, neighbor_distance: 0.4)] }
28+
29+
before do
30+
allow(model).to receive(:nearest_neighbors).and_return(query)
31+
end
32+
33+
context "without score_threshold" do
34+
it "returns query.limit(k)" do
35+
allow(query).to receive(:limit).with(4).and_return(:result)
36+
37+
result = subject.similarity_search_by_vector(embedding: [0.1, 0.2], k: 4)
38+
39+
expect(result).to eq(:result)
40+
end
41+
end
42+
43+
context "with score_threshold" do
44+
it "filters candidates and returns ordered results" do
45+
allow(query).to receive(:limit).with(9).and_return(candidates) # k + 5 = 9
46+
allow(candidates).to receive(:select).and_return(filtered)
47+
allow(filtered).to receive(:first).with(4).and_return(filtered)
48+
allow(model).to receive(:where).with(id: [1, 2]).and_return(double(order: :ordered_result))
49+
50+
result = subject.similarity_search_by_vector(embedding: [0.1, 0.2], k: 4, score_threshold: 0.5)
51+
52+
expect(result).to eq(:ordered_result)
53+
end
54+
end
2355
end
2456

2557
describe "#ask" do
58+
it "passes score_threshold to similarity_search and processes results" do
59+
# Mock embedding
60+
allow(llm).to receive(:embed).and_return(double(embedding: [0.1, 0.2]))
61+
62+
# Mock nearest_neighbors and query chain
63+
query = double("Query")
64+
allow(model).to receive(:nearest_neighbors).and_return(query)
65+
allow(query).to receive(:limit).and_return([])
66+
67+
# Mock search results
68+
record1 = double("Record1", as_vector: "Vector 1")
69+
record2 = double("Record2", as_vector: "Vector 2")
70+
search_results = [record1, record2]
71+
72+
# Mock similarity_search to return the results
73+
allow(subject).to receive(:similarity_search).and_return(search_results)
74+
75+
# Mock logger silence
76+
logger = double("Logger")
77+
allow(ActiveRecord::Base).to receive(:logger).and_return(logger)
78+
allow(logger).to receive(:silence).and_yield
79+
80+
# Mock generate_rag_prompt
81+
allow(subject).to receive(:generate_rag_prompt).and_return("Mocked prompt")
82+
83+
# Mock llm.chat
84+
chat_response = double("ChatResponse", chat_completion: "Mocked answer")
85+
allow(llm).to receive(:chat).and_return(chat_response)
86+
87+
result = subject.ask(question: "question", k: 4, score_threshold: 0.5)
88+
89+
expect(subject).to have_received(:similarity_search).with(query: "question", k: 4, score_threshold: 0.5)
90+
expect(subject).to have_received(:generate_rag_prompt).with(question: "question", context: "Vector 1\n---\nVector 2")
91+
expect(llm).to have_received(:chat).with(messages: [{role: "user", content: "Mocked prompt"}])
92+
expect(result.chat_completion).to eq("Mocked answer")
93+
end
2694
end
2795
end

0 commit comments

Comments
 (0)