Skip to content

Commit 83a8c1c

Browse files
committed
change warmup prompt [skip ci]
1 parent e212cc5 commit 83a8c1c

File tree

2 files changed

+22
-13
lines changed

2 files changed

+22
-13
lines changed

workflows-v2/src/providers/ollama.rs

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,6 @@ const PERFORMANCE_TIMEOUT: Duration = Duration::from_secs(80);
1515
/// Minimum tokens per second (TPS) for checking model performance during a generation.
1616
const PERFORMANCE_MIN_TPS: f64 = 15.0;
1717

18-
/// Prompt to be used to see Ollama performance.
19-
const TEST_PROMPT: &str = "Please write a poem about Kapadokya.";
20-
2118
/// Ollama-specific configurations.
2219
#[derive(Clone)]
2320
pub struct OllamaClient {
@@ -160,15 +157,19 @@ impl OllamaClient {
160157
/// This is to see if a given system can execute Ollama workflows for their chosen models,
161158
/// e.g. if they have enough RAM/CPU and such.
162159
pub async fn test_performance(&self, model: &Model) -> bool {
163-
log::info!("Testing model {}", model);
160+
const TEST_PROMPT: &str = "Please write a poem about Kapadokya.";
161+
const WARMUP_PROMPT: &str = "Write a short poem about hedgehogs and squirrels.";
164162

165-
let generation_request = GenerationRequest::new(model.to_string(), TEST_PROMPT.to_string());
163+
log::info!("Testing model {}", model);
166164

167165
// run a dummy generation for warm-up
168166
log::debug!("Warming up Ollama for model {}", model);
169167
if let Err(e) = self
170168
.ollama_rs_client
171-
.generate(generation_request.clone())
169+
.generate(GenerationRequest::new(
170+
model.to_string(),
171+
WARMUP_PROMPT.to_string(),
172+
))
172173
.await
173174
{
174175
log::warn!("Ignoring model {}: Workflow failed with error {}", model, e);
@@ -180,7 +181,10 @@ impl OllamaClient {
180181
_ = tokio::time::sleep(PERFORMANCE_TIMEOUT) => {
181182
log::warn!("Ignoring model {}: Workflow timed out", model);
182183
},
183-
result = self.ollama_rs_client.generate(generation_request) => {
184+
result = self.ollama_rs_client.generate(GenerationRequest::new(
185+
model.to_string(),
186+
TEST_PROMPT.to_string(),
187+
)) => {
184188
match result {
185189
Ok(response) => {
186190
let tps = (response.eval_count.unwrap_or_default() as f64)

workflows/src/providers/ollama.rs

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,6 @@ const DEFAULT_MIN_TPS: f64 = 15.0;
1717

1818
/// Some models such as small embedding models, are hardcoded into the node.
1919
const HARDCODED_MODELS: [&str; 1] = ["hellord/mxbai-embed-large-v1:f16"];
20-
/// Prompt to be used to see Ollama performance.
21-
const TEST_PROMPT: &str = "Please write a poem about Kapadokya.";
2220

2321
/// Ollama-specific configurations.
2422
#[derive(Debug, Clone)]
@@ -179,13 +177,20 @@ impl OllamaConfig {
179177
/// This is to see if a given system can execute Ollama workflows for their chosen models,
180178
/// e.g. if they have enough RAM/CPU and such.
181179
pub async fn test_performance(&self, ollama: &Ollama, model: &Model) -> bool {
182-
log::info!("Testing model {}", model);
180+
const TEST_PROMPT: &str = "Please write a poem about Kapadokya.";
181+
const WARMUP_PROMPT: &str = "Write a short poem about hedgehogs and squirrels.";
183182

184-
let generation_request = GenerationRequest::new(model.to_string(), TEST_PROMPT.to_string());
183+
log::info!("Testing model {}", model);
185184

186185
// run a dummy generation for warm-up
187186
log::debug!("Warming up Ollama for model {}", model);
188-
if let Err(e) = ollama.generate(generation_request.clone()).await {
187+
if let Err(e) = ollama
188+
.generate(GenerationRequest::new(
189+
model.to_string(),
190+
WARMUP_PROMPT.to_string(),
191+
))
192+
.await
193+
{
189194
log::warn!("Ignoring model {}: Workflow failed with error {}", model, e);
190195
return false;
191196
}
@@ -195,7 +200,7 @@ impl OllamaConfig {
195200
_ = tokio::time::sleep(self.timeout) => {
196201
log::warn!("Ignoring model {}: Workflow timed out", model);
197202
},
198-
result = ollama.generate(generation_request) => {
203+
result = ollama.generate(GenerationRequest::new(model.to_string(), TEST_PROMPT.to_string())) => {
199204
match result {
200205
Ok(response) => {
201206
let tps = (response.eval_count.unwrap_or_default() as f64)

0 commit comments

Comments
 (0)