@@ -15,9 +15,6 @@ const PERFORMANCE_TIMEOUT: Duration = Duration::from_secs(80);
15
15
/// Minimum tokens per second (TPS) for checking model performance during a generation.
16
16
const PERFORMANCE_MIN_TPS : f64 = 15.0 ;
17
17
18
- /// Prompt to be used to see Ollama performance.
19
- const TEST_PROMPT : & str = "Please write a poem about Kapadokya." ;
20
-
21
18
/// Ollama-specific configurations.
22
19
#[ derive( Clone ) ]
23
20
pub struct OllamaClient {
@@ -160,15 +157,19 @@ impl OllamaClient {
160
157
/// This is to see if a given system can execute Ollama workflows for their chosen models,
161
158
/// e.g. if they have enough RAM/CPU and such.
162
159
pub async fn test_performance ( & self , model : & Model ) -> bool {
163
- log:: info!( "Testing model {}" , model) ;
160
+ const TEST_PROMPT : & str = "Please write a poem about Kapadokya." ;
161
+ const WARMUP_PROMPT : & str = "Write a short poem about hedgehogs and squirrels." ;
164
162
165
- let generation_request = GenerationRequest :: new ( model. to_string ( ) , TEST_PROMPT . to_string ( ) ) ;
163
+ log :: info! ( "Testing model {}" , model ) ;
166
164
167
165
// run a dummy generation for warm-up
168
166
log:: debug!( "Warming up Ollama for model {}" , model) ;
169
167
if let Err ( e) = self
170
168
. ollama_rs_client
171
- . generate ( generation_request. clone ( ) )
169
+ . generate ( GenerationRequest :: new (
170
+ model. to_string ( ) ,
171
+ WARMUP_PROMPT . to_string ( ) ,
172
+ ) )
172
173
. await
173
174
{
174
175
log:: warn!( "Ignoring model {}: Workflow failed with error {}" , model, e) ;
@@ -180,7 +181,10 @@ impl OllamaClient {
180
181
_ = tokio:: time:: sleep( PERFORMANCE_TIMEOUT ) => {
181
182
log:: warn!( "Ignoring model {}: Workflow timed out" , model) ;
182
183
} ,
183
- result = self . ollama_rs_client. generate( generation_request) => {
184
+ result = self . ollama_rs_client. generate( GenerationRequest :: new(
185
+ model. to_string( ) ,
186
+ TEST_PROMPT . to_string( ) ,
187
+ ) ) => {
184
188
match result {
185
189
Ok ( response) => {
186
190
let tps = ( response. eval_count. unwrap_or_default( ) as f64 )
0 commit comments