streaming ollama

starpit · starpit · commit a1123b006d80 · 2025-04-03T17:20:25.000-04:00
Signed-off-by: Nick Mitchell &lt;nickm@us.ibm.com&gt;
diff --git a/pdl-live-react/src-tauri/Cargo.lock b/pdl-live-react/src-tauri/Cargo.lock
diff --git a/pdl-live-react/src-tauri/Cargo.toml b/pdl-live-react/src-tauri/Cargo.toml
@@ -35,10 +35,12 @@ base64ct = { version = "1.7.1", features = ["alloc"] }
 dirs = "6.0.0"
 serde_norway = "0.9.42"
 minijinja = { version = "2.9.0", features = ["custom_syntax"] }
-ollama-rs = { version = "0.3.0", features = ["tokio"] }
+ollama-rs = { version = "0.3.0", features = ["stream"] }
 owo-colors = "4.2.0"
 rustpython-vm = "0.4.0"
 async-recursion = "1.1.1"
+tokio-stream = "0.1.17"
+tokio = { version = "1.44.1", features = ["io-std"] }
 
 [target.'cfg(not(any(target_os = "android", target_os = "ios")))'.dependencies]
 tauri-plugin-cli = "2"
diff --git a/pdl-live-react/src-tauri/src/pdl/interpreter.rs b/pdl-live-react/src-tauri/src/pdl/interpreter.rs
@@ -1,5 +1,6 @@
 // use ::std::cell::LazyCell;
 use ::std::collections::HashMap;
+use std::sync::{Arc, Mutex};
 // use ::std::env::current_dir;
 use ::std::error::Error;
 use ::std::fs::File;
@@ -8,10 +9,12 @@ use ::std::fs::File;
 use async_recursion::async_recursion;
 use minijinja::{syntax::SyntaxConfig, Environment};
 use owo_colors::OwoColorize;
+use tokio::io::{stdout, AsyncWriteExt};
+use tokio_stream::StreamExt;
 
 use ollama_rs::{
     generation::{
-        chat::{request::ChatMessageRequest, ChatMessage, MessageRole},
+        chat::{request::ChatMessageRequest, ChatMessage, ChatMessageResponse, MessageRole},
         tools::ToolInfo,
     },
     models::ModelOptions,
@@ -288,7 +291,7 @@ impl<'a> Interpreter<'a> {
             pdl_model
                 if pdl_model.starts_with("ollama/") || pdl_model.starts_with("ollama_chat/") =>
             {
-                let mut ollama = Ollama::default();
+                let ollama = Ollama::default();
                 let model = if pdl_model.starts_with("ollama/") {
                     &pdl_model[7..]
                 } else {
@@ -313,7 +316,7 @@ impl<'a> Interpreter<'a> {
                         Some(x) => x,
                         None => (&ChatMessage::user("".into()), &[]),
                     };
-                let mut history = Vec::from(history_slice);
+                let history = Vec::from(history_slice);
                 if self.debug {
                     eprintln!(
                         "Ollama {:?} model={:?} prompt={:?} history={:?}",
@@ -327,6 +330,7 @@ impl<'a> Interpreter<'a> {
                 let req = ChatMessageRequest::new(model.into(), vec![prompt.clone()])
                     .options(options)
                     .tools(tools);
+                /* if we ever want non-streaming:
                 let res = ollama
                     .send_chat_messages_with_history(
                         &mut history,
@@ -349,6 +353,48 @@ impl<'a> Interpreter<'a> {
                 }
                 // dbg!(history);
                 Ok((vec![res.message], PdlBlock::Model(trace)))
+                 */
+                let mut stream = ollama
+                    .send_chat_messages_with_history_stream(
+                        Arc::new(Mutex::new(history)),
+                        req,
+                        //ollama.generate(GenerationRequest::new(model.into(), prompt),
+                    )
+                    .await?;
+                // dbg!("Model result {:?}", &res);
+
+                let mut last_res: Option<ChatMessageResponse> = None;
+                let mut response_string = String::new();
+                let mut stdout = stdout();
+                stdout.write_all(b"\x1b[1mAssistant: \x1b[0m").await?;
+                while let Some(Ok(res)) = stream.next().await {
+                    stdout.write_all(b"\x1b[32m").await?; // green
+                    stdout.write_all(res.message.content.as_bytes()).await?;
+                    stdout.flush().await?;
+                    stdout.write_all(b"\x1b[0m").await?; // reset color
+                    response_string += res.message.content.as_str();
+                    last_res = Some(res);
+                }
+
+                let mut trace = block.clone();
+                trace.pdl_result = Some(response_string.clone());
+
+                if let Some(res) = last_res {
+                    if let Some(usage) = res.final_data {
+                        trace.pdl_usage = Some(PdlUsage {
+                            prompt_tokens: usage.prompt_eval_count,
+                            prompt_nanos: usage.prompt_eval_duration,
+                            completion_tokens: usage.eval_count,
+                            completion_nanos: usage.eval_duration,
+                        });
+                    }
+                    let mut message = res.message.clone();
+                    message.content = response_string;
+                    Ok((vec![message], PdlBlock::Model(trace)))
+                } else {
+                    Ok((vec![], PdlBlock::Model(trace)))
+                }
+                // dbg!(history);
             }
             _ => Err(Box::from(format!("Unsupported model {}", block.model))),
         }