@@ -32,18 +32,21 @@ pub struct WorkflowsWorkerOutput {
32
32
///
33
33
/// It is expected to be spawned in another thread, with `run_batch` for batch processing and `run` for single processing.
34
34
pub struct WorkflowsWorker {
35
+ /// Workflow message channel receiver, the sender is most likely the compute node itself.
35
36
workflow_rx : mpsc:: Receiver < WorkflowsWorkerInput > ,
37
+ /// Publish message channel sender, the receiver is most likely the compute node itself.
36
38
publish_tx : mpsc:: Sender < WorkflowsWorkerOutput > ,
37
39
}
38
40
39
41
/// Buffer size for workflow tasks (per worker).
40
42
const WORKFLOW_CHANNEL_BUFSIZE : usize = 1024 ;
41
43
42
44
impl WorkflowsWorker {
43
- /// Batch size that defines how many tasks can be executed in parallel at once.
44
- /// IMPORTANT NOTE: `run` function is designed to handle the batch size here specifically,
45
+ /// Batch size that defines how many tasks can be executed concurrently at once.
46
+ ///
47
+ /// The `run` function is designed to handle the batch size here specifically,
45
48
/// if there are more tasks than the batch size, the function will panic.
46
- const BATCH_SIZE : usize = 8 ;
49
+ pub const MAX_BATCH_SIZE : usize = 8 ;
47
50
48
51
/// Creates a worker and returns the sender and receiver for the worker.
49
52
pub fn new (
@@ -65,24 +68,20 @@ impl WorkflowsWorker {
65
68
self . workflow_rx . close ( ) ;
66
69
}
67
70
68
- /// Launches the thread that can process tasks one by one.
71
+ /// Launches the thread that can process tasks one by one (in series) .
69
72
/// This function will block until the channel is closed.
70
73
///
71
74
/// It is suitable for task streams that consume local resources, unlike API calls.
72
- pub async fn run ( & mut self ) {
75
+ pub async fn run_series ( & mut self ) {
73
76
loop {
74
77
let task = self . workflow_rx . recv ( ) . await ;
75
78
76
- let result = if let Some ( task) = task {
79
+ if let Some ( task) = task {
77
80
log:: info!( "Processing single workflow for task {}" , task. task_id) ;
78
- WorkflowsWorker :: execute ( task) . await
81
+ WorkflowsWorker :: execute ( ( task, self . publish_tx . clone ( ) ) ) . await
79
82
} else {
80
83
return self . shutdown ( ) ;
81
84
} ;
82
-
83
- if let Err ( e) = self . publish_tx . send ( result) . await {
84
- log:: error!( "Error sending workflow result: {}" , e) ;
85
- }
86
85
}
87
86
}
88
87
@@ -91,13 +90,16 @@ impl WorkflowsWorker {
91
90
///
92
91
/// It is suitable for task streams that make use of API calls, unlike Ollama-like
93
92
/// tasks that consumes local resources and would not make sense to run in parallel.
94
- pub async fn run_batch ( & mut self ) {
93
+ ///
94
+ /// Batch size must NOT be larger than `MAX_BATCH_SIZE`, otherwise will panic.
95
+ pub async fn run_batch ( & mut self , batch_size : usize ) {
96
+ // TODO: need some better batch_size error handling here
95
97
loop {
96
98
// get tasks in batch from the channel
97
99
let mut task_buffer = Vec :: new ( ) ;
98
100
let num_tasks = self
99
101
. workflow_rx
100
- . recv_many ( & mut task_buffer, Self :: BATCH_SIZE )
102
+ . recv_many ( & mut task_buffer, batch_size )
101
103
. await ;
102
104
103
105
if num_tasks == 0 {
@@ -106,8 +108,10 @@ impl WorkflowsWorker {
106
108
107
109
// process the batch
108
110
log:: info!( "Processing {} workflows in batch" , num_tasks) ;
109
- let mut batch = task_buffer. into_iter ( ) ;
110
- let results = match num_tasks {
111
+ let mut batch = task_buffer
112
+ . into_iter ( )
113
+ . map ( |b| ( b, self . publish_tx . clone ( ) ) ) ;
114
+ match num_tasks {
111
115
1 => {
112
116
let r0 = WorkflowsWorker :: execute ( batch. next ( ) . unwrap ( ) ) . await ;
113
117
vec ! [ r0]
@@ -186,23 +190,17 @@ impl WorkflowsWorker {
186
190
unreachable ! (
187
191
"number of tasks cant be larger than batch size ({} > {})" ,
188
192
num_tasks,
189
- Self :: BATCH_SIZE
193
+ Self :: MAX_BATCH_SIZE
190
194
) ;
191
195
}
192
196
} ;
193
-
194
- // publish all results
195
- log:: info!( "Publishing {} workflow results" , results. len( ) ) ;
196
- for result in results {
197
- if let Err ( e) = self . publish_tx . send ( result) . await {
198
- log:: error!( "Error sending workflow result: {}" , e) ;
199
- }
200
- }
201
197
}
202
198
}
203
199
204
- /// A single task execution.
205
- pub async fn execute ( input : WorkflowsWorkerInput ) -> WorkflowsWorkerOutput {
200
+ /// Executes a single task, and publishes the output.
201
+ pub async fn execute (
202
+ ( input, publish_tx) : ( WorkflowsWorkerInput , mpsc:: Sender < WorkflowsWorkerOutput > ) ,
203
+ ) {
206
204
let mut memory = ProgramMemory :: new ( ) ;
207
205
208
206
let started_at = std:: time:: Instant :: now ( ) ;
@@ -211,13 +209,17 @@ impl WorkflowsWorker {
211
209
. execute ( input. entry . as_ref ( ) , & input. workflow , & mut memory)
212
210
. await ;
213
211
214
- WorkflowsWorkerOutput {
212
+ let output = WorkflowsWorkerOutput {
215
213
result,
216
214
public_key : input. public_key ,
217
215
task_id : input. task_id ,
218
216
model_name : input. model_name ,
219
217
batchable : input. batchable ,
220
218
stats : input. stats . record_execution_time ( started_at) ,
219
+ } ;
220
+
221
+ if let Err ( e) = publish_tx. send ( output) . await {
222
+ log:: error!( "Error sending workflow result: {}" , e) ;
221
223
}
222
224
}
223
225
}
0 commit comments