|
3 | 3 | RSpec.describe DiscourseAi::Embeddings::Strategies::Truncation do |
4 | 4 | subject(:truncation) { described_class.new } |
5 | 5 |
|
6 | | - describe "#prepare_query_text" do |
7 | | - context "when using vector def from OpenAI" do |
8 | | - before { SiteSetting.max_post_length = 100_000 } |
| 6 | + fab!(:open_ai_embedding_def) |
| 7 | + let(:prefix) { "I come first:" } |
9 | 8 |
|
10 | | - fab!(:topic) |
11 | | - fab!(:post) do |
12 | | - Fabricate(:post, topic: topic, raw: "Baby, bird, bird, bird\nBird is the word\n" * 500) |
13 | | - end |
14 | | - fab!(:post) do |
15 | | - Fabricate( |
16 | | - :post, |
17 | | - topic: topic, |
18 | | - raw: "Don't you know about the bird?\nEverybody knows that the bird is a word\n" * 400, |
19 | | - ) |
20 | | - end |
21 | | - fab!(:post) { Fabricate(:post, topic: topic, raw: "Surfin' bird\n" * 800) } |
22 | | - fab!(:open_ai_embedding_def) |
| 9 | + describe "#prepare_target_text" do |
| 10 | + before { SiteSetting.max_post_length = 100_000 } |
| 11 | + |
| 12 | + fab!(:topic) |
| 13 | + fab!(:post) do |
| 14 | + Fabricate(:post, topic: topic, raw: "Baby, bird, bird, bird\nBird is the word\n" * 500) |
| 15 | + end |
| 16 | + fab!(:post) do |
| 17 | + Fabricate( |
| 18 | + :post, |
| 19 | + topic: topic, |
| 20 | + raw: "Don't you know about the bird?\nEverybody knows that the bird is a word\n" * 400, |
| 21 | + ) |
| 22 | + end |
| 23 | + fab!(:post) { Fabricate(:post, topic: topic, raw: "Surfin' bird\n" * 800) } |
| 24 | + fab!(:open_ai_embedding_def) |
| 25 | + |
| 26 | + it "truncates a topic" do |
| 27 | + prepared_text = truncation.prepare_target_text(topic, open_ai_embedding_def) |
| 28 | + |
| 29 | + expect(open_ai_embedding_def.tokenizer.size(prepared_text)).to be <= |
| 30 | + open_ai_embedding_def.max_sequence_length |
| 31 | + end |
| 32 | + |
| 33 | + it "includes embed prefix" do |
| 34 | + open_ai_embedding_def.update!(embed_prompt: prefix) |
| 35 | + |
| 36 | + prepared_text = truncation.prepare_target_text(topic, open_ai_embedding_def) |
| 37 | + |
| 38 | + expect(prepared_text.starts_with?(prefix)).to eq(true) |
| 39 | + end |
| 40 | + end |
| 41 | + |
| 42 | + describe "#prepare_query_text" do |
| 43 | + context "when search is asymetric" do |
| 44 | + it "includes search prefix" do |
| 45 | + open_ai_embedding_def.update!(search_prompt: prefix) |
23 | 46 |
|
24 | | - it "truncates a topic" do |
25 | | - prepared_text = truncation.prepare_target_text(topic, open_ai_embedding_def) |
| 47 | + prepared_query_text = |
| 48 | + truncation.prepare_query_text("searching", open_ai_embedding_def, asymetric: true) |
26 | 49 |
|
27 | | - expect(open_ai_embedding_def.tokenizer.size(prepared_text)).to be <= |
28 | | - open_ai_embedding_def.max_sequence_length |
| 50 | + expect(prepared_query_text.starts_with?(prefix)).to eq(true) |
29 | 51 | end |
30 | 52 | end |
31 | 53 | end |
|
0 commit comments