This repository was archived by the owner on Jul 22, 2025. It is now read-only.
File tree Expand file tree Collapse file tree 2 files changed +26
-3
lines changed Expand file tree Collapse file tree 2 files changed +26
-3
lines changed Original file line number Diff line number Diff line change 33class DiscourseAi ::Utils ::PdfToText
44 class Reader
55 def initialize ( upload :, user : nil , llm_model : nil )
6- @extractor = self . new ( upload : upload , user : user , llm_model : llm_model )
6+ @extractor =
7+ DiscourseAi ::Utils ::PdfToText . new ( upload : upload , user : user , llm_model : llm_model )
78 @enumerator = create_enumerator
89 @buffer = +""
910 end
@@ -12,9 +13,9 @@ def read(length)
1213 return @buffer . slice! ( 0 , length ) if !@buffer . empty?
1314
1415 begin
15- buffer << @enumerator . next
16+ @ buffer << @enumerator . next
1617 rescue StopIteration
17- nil
18+ return nil
1819 end
1920
2021 @buffer . slice! ( 0 , length )
Original file line number Diff line number Diff line change 1818 end
1919
2020 context "when improving PDF extraction with LLM" do
21+ it "can properly simulate a file" do
22+ if ENV [ "CI" ]
23+ skip "This test requires imagemagick is installed with ghostscript support - which is not available in CI"
24+ end
25+
26+ responses = [
27+ "<chunk>Page 1: LLM chunk 1</chunk><chunk>Page 1: LLM chunk 2</chunk>" ,
28+ "<chunk>Page 2: LLM chunk 3</chunk>" ,
29+ ]
30+
31+ pages = [ ]
32+ DiscourseAi ::Completions ::Llm . with_prepared_responses ( responses ) do |_ , _ , _prompts |
33+ file = described_class . as_fake_file ( upload : upload , user : user , llm_model : llm_model )
34+
35+ while content = file . read ( 100_000 )
36+ pages << content
37+ end
38+ end
39+
40+ expect ( pages ) . to eq ( [ "Page 1: LLM chunk 1" , "Page 1: LLM chunk 2" , "Page 2: LLM chunk 3" ] )
41+ end
42+
2143 it "works as expected" do
2244 if ENV [ "CI" ]
2345 skip "This test requires imagemagick is installed with ghostscript support - which is not available in CI"
You can’t perform that action at this time.
0 commit comments