|
| 1 | +# frozen_string_literal: true |
| 2 | + |
| 3 | +require "spec_helper" |
| 4 | + |
| 5 | +RSpec.describe RubyLLM::RedCandle::Streaming do |
| 6 | + let(:config) { RubyLLM::Configuration.new } |
| 7 | + let(:provider) { RubyLLM::RedCandle::Provider.new(config) } |
| 8 | + let(:mock_model) { instance_double(Candle::LLM) } |
| 9 | + |
| 10 | + before do |
| 11 | + allow(provider).to receive(:ensure_model_loaded!).and_return(mock_model) |
| 12 | + allow(mock_model).to receive(:respond_to?).with(:apply_chat_template).and_return(true) |
| 13 | + allow(mock_model).to receive(:apply_chat_template).and_return("formatted prompt") |
| 14 | + end |
| 15 | + |
| 16 | + describe "#stream" do |
| 17 | + let(:messages) { [{ role: "user", content: "Test message" }] } |
| 18 | + |
| 19 | + context "when stream: true" do |
| 20 | + it "calls perform_streaming_completion!" do |
| 21 | + tokens = %w[Hello world] |
| 22 | + |
| 23 | + allow(mock_model).to receive(:generate_stream) do |_prompt, config:, &block| |
| 24 | + tokens.each { |token| block.call(token) } |
| 25 | + end |
| 26 | + |
| 27 | + payload = { |
| 28 | + messages: messages, |
| 29 | + model: "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF", |
| 30 | + temperature: 0.7, |
| 31 | + stream: true |
| 32 | + } |
| 33 | + |
| 34 | + chunks = [] |
| 35 | + provider.stream(payload) { |chunk| chunks << chunk } |
| 36 | + |
| 37 | + # Should receive token chunks plus final empty chunk |
| 38 | + expect(chunks.size).to eq(3) |
| 39 | + expect(chunks[0].content).to eq("Hello") |
| 40 | + expect(chunks[1].content).to eq("world") |
| 41 | + expect(chunks[2].content).to eq("") |
| 42 | + end |
| 43 | + end |
| 44 | + |
| 45 | + context "when stream: false" do |
| 46 | + it "yields a single chunk with complete result" do |
| 47 | + allow(mock_model).to receive(:generate).and_return("Complete response") |
| 48 | + |
| 49 | + payload = { |
| 50 | + messages: messages, |
| 51 | + model: "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF", |
| 52 | + temperature: 0.7, |
| 53 | + stream: false |
| 54 | + } |
| 55 | + |
| 56 | + chunks = [] |
| 57 | + provider.stream(payload) { |chunk| chunks << chunk } |
| 58 | + |
| 59 | + expect(chunks.size).to eq(1) |
| 60 | + expect(chunks[0][:content]).to eq("Complete response") |
| 61 | + expect(chunks[0][:role]).to eq("assistant") |
| 62 | + end |
| 63 | + end |
| 64 | + |
| 65 | + context "when stream is nil (defaults to non-streaming)" do |
| 66 | + it "yields a single chunk with complete result" do |
| 67 | + allow(mock_model).to receive(:generate).and_return("Complete response") |
| 68 | + |
| 69 | + payload = { |
| 70 | + messages: messages, |
| 71 | + model: "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF", |
| 72 | + temperature: 0.7 |
| 73 | + } |
| 74 | + |
| 75 | + chunks = [] |
| 76 | + provider.stream(payload) { |chunk| chunks << chunk } |
| 77 | + |
| 78 | + expect(chunks.size).to eq(1) |
| 79 | + expect(chunks[0][:content]).to eq("Complete response") |
| 80 | + end |
| 81 | + end |
| 82 | + end |
| 83 | + |
| 84 | + describe "#stream_processor" do |
| 85 | + it "returns nil for compatibility" do |
| 86 | + expect(provider.send(:stream_processor)).to be_nil |
| 87 | + end |
| 88 | + end |
| 89 | + |
| 90 | + describe "#process_stream_response" do |
| 91 | + it "returns the response unchanged" do |
| 92 | + response = { content: "test" } |
| 93 | + expect(provider.send(:process_stream_response, response)).to eq(response) |
| 94 | + end |
| 95 | + end |
| 96 | +end |
0 commit comments