Skip to content
This repository was archived by the owner on Jul 22, 2025. It is now read-only.

Commit 7d222ec

Browse files
committed
test file support
1 parent fbf89ce commit 7d222ec

File tree

2 files changed

+26
-3
lines changed

2 files changed

+26
-3
lines changed

lib/utils/pdf_to_text.rb

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@
33
class DiscourseAi::Utils::PdfToText
44
class Reader
55
def initialize(upload:, user: nil, llm_model: nil)
6-
@extractor = self.new(upload: upload, user: user, llm_model: llm_model)
6+
@extractor =
7+
DiscourseAi::Utils::PdfToText.new(upload: upload, user: user, llm_model: llm_model)
78
@enumerator = create_enumerator
89
@buffer = +""
910
end
@@ -12,9 +13,9 @@ def read(length)
1213
return @buffer.slice!(0, length) if !@buffer.empty?
1314

1415
begin
15-
buffer << @enumerator.next
16+
@buffer << @enumerator.next
1617
rescue StopIteration
17-
nil
18+
return nil
1819
end
1920

2021
@buffer.slice!(0, length)

spec/lib/utils/pdf_to_text_spec.rb

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,28 @@
1818
end
1919

2020
context "when improving PDF extraction with LLM" do
21+
it "can properly simulate a file" do
22+
if ENV["CI"]
23+
skip "This test requires imagemagick is installed with ghostscript support - which is not available in CI"
24+
end
25+
26+
responses = [
27+
"<chunk>Page 1: LLM chunk 1</chunk><chunk>Page 1: LLM chunk 2</chunk>",
28+
"<chunk>Page 2: LLM chunk 3</chunk>",
29+
]
30+
31+
pages = []
32+
DiscourseAi::Completions::Llm.with_prepared_responses(responses) do |_, _, _prompts|
33+
file = described_class.as_fake_file(upload: upload, user: user, llm_model: llm_model)
34+
35+
while content = file.read(100_000)
36+
pages << content
37+
end
38+
end
39+
40+
expect(pages).to eq(["Page 1: LLM chunk 1", "Page 1: LLM chunk 2", "Page 2: LLM chunk 3"])
41+
end
42+
2143
it "works as expected" do
2244
if ENV["CI"]
2345
skip "This test requires imagemagick is installed with ghostscript support - which is not available in CI"

0 commit comments

Comments
 (0)