Skip to content
This repository was archived by the owner on Jul 22, 2025. It is now read-only.

Commit 89bcf9b

Browse files
authored
FIX: Process succesfully generated embeddings even if some failed (#1500)
1 parent 6b5ea38 commit 89bcf9b

File tree

7 files changed

+48
-32
lines changed

7 files changed

+48
-32
lines changed

lib/embeddings/vector.rb

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,13 +42,14 @@ def gen_bulk_reprensentations(relation)
4242
.then_on(pool) do |w_prepared_text|
4343
w_prepared_text.merge(embedding: embedding_gen.perform!(w_prepared_text[:text]))
4444
end
45+
.rescue { nil } # We log the error during #perform. Skip failed embeddings.
4546
end
4647
.compact
4748

4849
Concurrent::Promises
4950
.zip(*promised_embeddings)
5051
.value!
51-
.each { |e| schema.store(e[:target], e[:embedding], e[:digest]) }
52+
.each { |e| schema.store(e[:target], e[:embedding], e[:digest]) if e.present? }
5253
ensure
5354
pool.shutdown
5455
pool.wait_for_termination

lib/inference/cloudflare_workers_ai.rb

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,6 @@ def perform!(content)
2626
case response.status
2727
when 200
2828
JSON.parse(response.body, symbolize_names: true).dig(:result, :data).first
29-
when 429
30-
# TODO add a AdminDashboard Problem?
3129
else
3230
Rails.logger.warn(
3331
"Cloudflare Workers AI Embeddings failed with status: #{response.status} body: #{response.body}",

lib/inference/gemini_embeddings.rb

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,6 @@ def perform!(content)
2222
case response.status
2323
when 200
2424
JSON.parse(response.body, symbolize_names: true).dig(:embedding, :values)
25-
when 429
26-
# TODO add a AdminDashboard Problem?
2725
else
2826
Rails.logger.warn(
2927
"Google Gemini Embeddings failed with status: #{response.status} body: #{response.body}",

lib/inference/open_ai_embeddings.rb

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,6 @@ def perform!(content)
3030
case response.status
3131
when 200
3232
JSON.parse(response.body, symbolize_names: true).dig(:data, 0, :embedding)
33-
when 429
34-
# TODO add a AdminDashboard Problem?
3533
else
3634
Rails.logger.warn(
3735
"OpenAI Embeddings failed with status: #{response.status} body: #{response.body}",

spec/lib/inference/cloudflare_workers_ai_spec.rb

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -40,16 +40,7 @@
4040
end
4141
end
4242

43-
context "when the response status is 429" do
44-
let(:response_status) { 429 }
45-
let(:response_body) { "" }
46-
47-
it "doesn't raises a Net::HTTPBadResponse error" do
48-
expect { subject.perform!(content) }.not_to raise_error
49-
end
50-
end
51-
52-
context "when the response status is not 200 or 429" do
43+
context "when the response status is not 200" do
5344
let(:response_status) { 500 }
5445
let(:response_body) { "Internal Server Error" }
5546

spec/lib/modules/embeddings/vector_spec.rb

Lines changed: 37 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -83,17 +83,34 @@
8383

8484
expect(topics_schema.find_by_target(topic).updated_at).to eq_time(original_vector_gen)
8585
end
86+
87+
context "when one of the concurrently generated embeddings fails" do
88+
it "still processes the succesful ones" do
89+
text = vdef.prepare_target_text(topic)
90+
91+
text2 = vdef.prepare_target_text(topic_2)
92+
93+
stub_vector_mapping(text, expected_embedding_1)
94+
stub_vector_mapping(text2, expected_embedding_2, result_status: 429)
95+
96+
vector.gen_bulk_reprensentations(Topic.where(id: [topic.id, topic_2.id]))
97+
98+
expect(topics_schema.find_by_embedding(expected_embedding_1).topic_id).to eq(topic.id)
99+
expect(topics_schema.find_by_target(topic_2)).to be_nil
100+
end
101+
end
86102
end
87103
end
88104

89105
context "with open_ai as the provider" do
90106
fab!(:vdef) { Fabricate(:open_ai_embedding_def) }
91107

92-
def stub_vector_mapping(text, expected_embedding)
108+
def stub_vector_mapping(text, expected_embedding, result_status: 200)
93109
EmbeddingsGenerationStubs.openai_service(
94110
vdef.lookup_custom_param("model_name"),
95111
text,
96112
expected_embedding,
113+
result_status: result_status,
97114
)
98115
end
99116

@@ -123,8 +140,12 @@ def stub_vector_mapping(text, expected_embedding)
123140
context "with hugging_face as the provider" do
124141
fab!(:vdef) { Fabricate(:embedding_definition) }
125142

126-
def stub_vector_mapping(text, expected_embedding)
127-
EmbeddingsGenerationStubs.hugging_face_service(text, expected_embedding)
143+
def stub_vector_mapping(text, expected_embedding, result_status: 200)
144+
EmbeddingsGenerationStubs.hugging_face_service(
145+
text,
146+
expected_embedding,
147+
result_status: result_status,
148+
)
128149
end
129150

130151
it_behaves_like "generates and store embeddings using a vector definition"
@@ -133,8 +154,13 @@ def stub_vector_mapping(text, expected_embedding)
133154
context "with google as the provider" do
134155
fab!(:vdef) { Fabricate(:gemini_embedding_def) }
135156

136-
def stub_vector_mapping(text, expected_embedding)
137-
EmbeddingsGenerationStubs.gemini_service(vdef.api_key, text, expected_embedding)
157+
def stub_vector_mapping(text, expected_embedding, result_status: 200)
158+
EmbeddingsGenerationStubs.gemini_service(
159+
vdef.api_key,
160+
text,
161+
expected_embedding,
162+
result_status: result_status,
163+
)
138164
end
139165

140166
it_behaves_like "generates and store embeddings using a vector definition"
@@ -143,8 +169,12 @@ def stub_vector_mapping(text, expected_embedding)
143169
context "with cloudflare as the provider" do
144170
fab!(:vdef) { Fabricate(:cloudflare_embedding_def) }
145171

146-
def stub_vector_mapping(text, expected_embedding)
147-
EmbeddingsGenerationStubs.cloudflare_service(text, expected_embedding)
172+
def stub_vector_mapping(text, expected_embedding, result_status: 200)
173+
EmbeddingsGenerationStubs.cloudflare_service(
174+
text,
175+
expected_embedding,
176+
result_status: result_status,
177+
)
148178
end
149179

150180
it_behaves_like "generates and store embeddings using a vector definition"

spec/support/embeddings_generation_stubs.rb

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,35 +2,35 @@
22

33
class EmbeddingsGenerationStubs
44
class << self
5-
def hugging_face_service(string, embedding)
5+
def hugging_face_service(string, embedding, result_status: 200)
66
WebMock
77
.stub_request(:post, "https://test.com/embeddings")
88
.with(body: JSON.dump({ inputs: string, truncate: true }))
9-
.to_return(status: 200, body: JSON.dump([embedding]))
9+
.to_return(status: result_status, body: JSON.dump([embedding]))
1010
end
1111

12-
def openai_service(model, string, embedding, extra_args: {})
12+
def openai_service(model, string, embedding, result_status: 200, extra_args: {})
1313
WebMock
1414
.stub_request(:post, "https://api.openai.com/v1/embeddings")
1515
.with(body: JSON.dump({ model: model, input: string }.merge(extra_args)))
16-
.to_return(status: 200, body: JSON.dump({ data: [{ embedding: embedding }] }))
16+
.to_return(status: result_status, body: JSON.dump({ data: [{ embedding: embedding }] }))
1717
end
1818

19-
def gemini_service(api_key, string, embedding)
19+
def gemini_service(api_key, string, embedding, result_status: 200)
2020
WebMock
2121
.stub_request(
2222
:post,
2323
"https://generativelanguage.googleapis.com/v1beta/models/embedding-001:embedContent\?key\=#{api_key}",
2424
)
2525
.with(body: JSON.dump({ content: { parts: [{ text: string }] } }))
26-
.to_return(status: 200, body: JSON.dump({ embedding: { values: embedding } }))
26+
.to_return(status: result_status, body: JSON.dump({ embedding: { values: embedding } }))
2727
end
2828

29-
def cloudflare_service(string, embedding)
29+
def cloudflare_service(string, embedding, result_status: 200)
3030
WebMock
3131
.stub_request(:post, "https://test.com/embeddings")
3232
.with(body: JSON.dump({ text: [string] }))
33-
.to_return(status: 200, body: JSON.dump({ result: { data: [embedding] } }))
33+
.to_return(status: result_status, body: JSON.dump({ result: { data: [embedding] } }))
3434
end
3535
end
3636
end

0 commit comments

Comments
 (0)