@@ -2,6 +2,7 @@ import { Mutex } from 'async-mutex';
22import { getPendingTasks , TaskExecutor , TaskPriority } from './task-executor' ;
33import { db , Task , TaskStatus } from './models' ;
44import { Server } from './server' ;
5+ import { TaskResults } from './task-results' ;
56export const DEFAULT_TASK_TIMEOUT = 8 * 60 * 60
67/**
78 * Payload for task completion from worker
@@ -34,6 +35,31 @@ export interface TaskFailurePayload {
3435 } | null ;
3536}
3637
38+ /**
39+ * Payload for pushData chunk from worker
40+ */
41+ export interface PushDataChunkPayload {
42+ taskId : number ;
43+ chunk : any [ ] ;
44+ }
45+
46+ /**
47+ * Payload for pushData completion from worker
48+ */
49+ export interface PushDataCompletePayload {
50+ taskId : number ;
51+ itemCount : number ;
52+ isDontCache : boolean ;
53+ scraperName : string ;
54+ taskData : any ;
55+ parentTaskId ?: number | null ;
56+ capacity ?: {
57+ scraperType ?: string ;
58+ scraperName ?: string ;
59+ maxTasks : number ;
60+ } | null ;
61+ }
62+
3763
3864/**
3965 * Stale task recovery interval: 60 seconds
@@ -233,4 +259,41 @@ export class MasterExecutor extends TaskExecutor {
233259 console . log ( `[Master] Released ${ releasedCount } /${ inProgressTaskIds . length } tasks from shutting down worker` ) ;
234260 return { releasedCount } ;
235261 }
262+
263+ /**
264+ * Handle pushData chunk from worker.
265+ * Appends chunk to the task's result file.
266+ */
267+ async handlePushDataChunk ( payload : PushDataChunkPayload ) {
268+ const { taskId, chunk } = payload ;
269+
270+ try {
271+ if ( chunk && chunk . length > 0 ) {
272+ await TaskResults . appendAllTask ( taskId , chunk ) ;
273+ }
274+ return { } ;
275+ } catch ( error ) {
276+ console . error ( '[Master] Error handling pushData chunk:' , error ) ;
277+ throw error ;
278+ }
279+ }
280+
281+ /**
282+ * Handle pushData completion from worker.
283+ * Finalizes the task (caching, status update, parent update).
284+ * Piggyback pattern: also return new tasks for worker's available capacity.
285+ */
286+ async handlePushDataComplete ( payload : PushDataCompletePayload ) : Promise < { nextTasks : any [ ] } > {
287+ const { taskId, itemCount, isDontCache, scraperName, taskData, parentTaskId, capacity } = payload ;
288+ const taskFilePath = TaskResults . generateTaskFilePath ( taskId ) ;
289+
290+ try {
291+ await this . reportTaskSuccessWithPushData ( taskId , taskFilePath , itemCount , isDontCache , scraperName , taskData , parentTaskId as any , null as any )
292+ } catch ( error ) {
293+ console . error ( '[Master] Error handling pushData complete:' , error ) ;
294+ }
295+
296+ // Piggyback: Acquire next tasks if capacity is provided
297+ return this . acquireNextTasks ( capacity ) ;
298+ }
236299}
0 commit comments