Skip to content

Commit df06483

Browse files
authored
Merge pull request #834 from getmaxun/capflow-revamp
feat(maxun-core): capture flow revamp
2 parents 05428ef + c2264b6 commit df06483

File tree

21 files changed

+3186
-1577
lines changed

21 files changed

+3186
-1577
lines changed

maxun-core/src/interpret.ts

Lines changed: 179 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,15 @@ export default class Interpreter extends EventEmitter {
7373

7474
private cumulativeResults: Record<string, any>[] = [];
7575

76+
private namedResults: Record<string, Record<string, any>> = {};
77+
78+
private screenshotCounter: number = 0;
79+
80+
private serializableDataByType: Record<string, Record<string, any>> = {
81+
scrapeList: {},
82+
scrapeSchema: {}
83+
};
84+
7685
constructor(workflow: WorkflowFile, options?: Partial<InterpreterOptions>) {
7786
super();
7887
this.workflow = workflow.workflow;
@@ -402,15 +411,37 @@ export default class Interpreter extends EventEmitter {
402411
* Beware of false linter errors - here, we know better!
403412
*/
404413
const wawActions: Record<CustomFunctions, (...args: any[]) => void> = {
405-
screenshot: async (params: PageScreenshotOptions) => {
414+
screenshot: async (
415+
params: PageScreenshotOptions,
416+
nameOverride?: string
417+
) => {
406418
if (this.options.debugChannel?.setActionType) {
407-
this.options.debugChannel.setActionType('screenshot');
419+
this.options.debugChannel.setActionType("screenshot");
408420
}
409421

410422
const screenshotBuffer = await page.screenshot({
411-
...params, path: undefined,
423+
...params,
424+
path: undefined,
412425
});
413-
await this.options.binaryCallback(screenshotBuffer, 'image/png');
426+
427+
const explicitName = (typeof nameOverride === 'string' && nameOverride.trim().length > 0) ? nameOverride.trim() : null;
428+
let screenshotName: string;
429+
430+
if (explicitName) {
431+
screenshotName = explicitName;
432+
} else {
433+
this.screenshotCounter += 1;
434+
screenshotName = `Screenshot ${this.screenshotCounter}`;
435+
}
436+
437+
await this.options.binaryCallback(
438+
{
439+
name: screenshotName,
440+
data: screenshotBuffer,
441+
mimeType: "image/png",
442+
},
443+
"image/png"
444+
);
414445
},
415446
enqueueLinks: async (selector: string) => {
416447
if (this.options.debugChannel?.setActionType) {
@@ -476,21 +507,55 @@ export default class Interpreter extends EventEmitter {
476507
this.cumulativeResults = [];
477508
}
478509

479-
if (this.cumulativeResults.length === 0) {
480-
this.cumulativeResults.push({});
481-
}
482-
483-
const mergedResult = this.cumulativeResults[0];
484510
const resultToProcess = Array.isArray(scrapeResult) ? scrapeResult[0] : scrapeResult;
485511

486-
Object.entries(resultToProcess).forEach(([key, value]) => {
487-
if (value !== undefined) {
488-
mergedResult[key] = value;
512+
if (this.cumulativeResults.length === 0) {
513+
const newRow = {};
514+
Object.entries(resultToProcess).forEach(([key, value]) => {
515+
if (value !== undefined) {
516+
newRow[key] = value;
517+
}
518+
});
519+
this.cumulativeResults.push(newRow);
520+
} else {
521+
const lastRow = this.cumulativeResults[this.cumulativeResults.length - 1];
522+
const newResultKeys = Object.keys(resultToProcess).filter(key => resultToProcess[key] !== undefined);
523+
const hasRepeatedKeys = newResultKeys.some(key => lastRow.hasOwnProperty(key));
524+
525+
if (hasRepeatedKeys) {
526+
const newRow = {};
527+
Object.entries(resultToProcess).forEach(([key, value]) => {
528+
if (value !== undefined) {
529+
newRow[key] = value;
530+
}
531+
});
532+
this.cumulativeResults.push(newRow);
533+
} else {
534+
Object.entries(resultToProcess).forEach(([key, value]) => {
535+
if (value !== undefined) {
536+
lastRow[key] = value;
537+
}
538+
});
489539
}
490-
});
540+
}
491541

492-
console.log("Updated merged result:", mergedResult);
493-
await this.options.serializableCallback([mergedResult]);
542+
const actionType = "scrapeSchema";
543+
const actionName = (schema as any).__name || "Texts";
544+
545+
if (!this.namedResults[actionType]) this.namedResults[actionType] = {};
546+
this.namedResults[actionType][actionName] = this.cumulativeResults;
547+
548+
if (!this.serializableDataByType[actionType]) this.serializableDataByType[actionType] = {};
549+
if (!this.serializableDataByType[actionType][actionName]) {
550+
this.serializableDataByType[actionType][actionName] = [];
551+
}
552+
553+
this.serializableDataByType[actionType][actionName] = [...this.cumulativeResults];
554+
555+
await this.options.serializableCallback({
556+
scrapeList: this.serializableDataByType.scrapeList,
557+
scrapeSchema: this.serializableDataByType.scrapeSchema
558+
});
494559
},
495560

496561
scrapeList: async (config: { listSelector: string, fields: any, limit?: number, pagination: any }) => {
@@ -508,18 +573,62 @@ export default class Interpreter extends EventEmitter {
508573
return;
509574
}
510575

511-
await this.ensureScriptsLoaded(page);
576+
try {
577+
await this.ensureScriptsLoaded(page);
512578

513-
if (this.options.debugChannel?.incrementScrapeListIndex) {
514-
this.options.debugChannel.incrementScrapeListIndex();
515-
}
579+
if (this.options.debugChannel?.incrementScrapeListIndex) {
580+
this.options.debugChannel.incrementScrapeListIndex();
581+
}
516582

517-
if (!config.pagination) {
518-
const scrapeResults: Record<string, any>[] = await page.evaluate((cfg) => window.scrapeList(cfg), config);
519-
await this.options.serializableCallback(scrapeResults);
520-
} else {
521-
const scrapeResults: Record<string, any>[] = await this.handlePagination(page, config);
522-
await this.options.serializableCallback(scrapeResults);
583+
let scrapeResults = [];
584+
585+
if (!config.pagination) {
586+
scrapeResults = await page.evaluate((cfg) => {
587+
try {
588+
return window.scrapeList(cfg);
589+
} catch (error) {
590+
console.warn('ScrapeList evaluation failed:', error.message);
591+
return [];
592+
}
593+
}, config);
594+
} else {
595+
scrapeResults = await this.handlePagination(page, config);
596+
}
597+
598+
if (!Array.isArray(scrapeResults)) {
599+
scrapeResults = [];
600+
}
601+
602+
const actionType = "scrapeList";
603+
const actionName = (config as any).__name || "List";
604+
605+
if (!this.serializableDataByType[actionType]) this.serializableDataByType[actionType] = {};
606+
if (!this.serializableDataByType[actionType][actionName]) {
607+
this.serializableDataByType[actionType][actionName] = [];
608+
}
609+
610+
this.serializableDataByType[actionType][actionName].push(...scrapeResults);
611+
612+
await this.options.serializableCallback({
613+
scrapeList: this.serializableDataByType.scrapeList,
614+
scrapeSchema: this.serializableDataByType.scrapeSchema
615+
});
616+
} catch (error) {
617+
console.error('ScrapeList action failed completely:', error.message);
618+
619+
const actionType = "scrapeList";
620+
const actionName = (config as any).__name || "List";
621+
622+
if (!this.namedResults[actionType]) this.namedResults[actionType] = {};
623+
this.namedResults[actionType][actionName] = [];
624+
625+
if (!this.serializableDataByType[actionType]) this.serializableDataByType[actionType] = {};
626+
this.serializableDataByType[actionType][actionName] = [];
627+
628+
await this.options.serializableCallback({
629+
scrapeList: this.serializableDataByType.scrapeList,
630+
scrapeSchema: this.serializableDataByType.scrapeSchema
631+
});
523632
}
524633
},
525634

@@ -595,12 +704,56 @@ export default class Interpreter extends EventEmitter {
595704

596705

597706
for (const step of steps) {
707+
if (this.isAborted) {
708+
this.log('Workflow aborted during step execution', Level.WARN);
709+
return;
710+
}
711+
598712
this.log(`Launching ${String(step.action)}`, Level.LOG);
599713

714+
let stepName: string | null = null;
715+
try {
716+
const debug = this.options.debugChannel;
717+
if (debug?.setActionType) {
718+
debug.setActionType(String(step.action));
719+
}
720+
721+
if ((step as any)?.name) {
722+
stepName = (step as any).name;
723+
} else if (
724+
Array.isArray((step as any)?.args) &&
725+
(step as any).args.length > 0 &&
726+
typeof (step as any).args[0] === "object" &&
727+
"__name" in (step as any).args[0]
728+
) {
729+
stepName = (step as any).args[0].__name;
730+
} else if (
731+
typeof (step as any)?.args === "object" &&
732+
step?.args !== null &&
733+
"__name" in (step as any).args
734+
) {
735+
stepName = (step as any).args.__name;
736+
}
737+
738+
if (!stepName) {
739+
stepName = String(step.action);
740+
}
741+
742+
if (debug && typeof (debug as any).setActionName === "function") {
743+
(debug as any).setActionName(stepName);
744+
}
745+
} catch (err) {
746+
this.log(`Failed to set action name/type: ${(err as Error).message}`, Level.WARN);
747+
}
748+
600749
if (step.action in wawActions) {
601750
// "Arrayifying" here should not be needed (TS + syntax checker - only arrays; but why not)
602751
const params = !step.args || Array.isArray(step.args) ? step.args : [step.args];
603-
await wawActions[step.action as CustomFunctions](...(params ?? []));
752+
if (step.action === 'screenshot') {
753+
await (wawActions.screenshot as any)(...(params ?? []), stepName ?? undefined);
754+
} else {
755+
await wawActions[step.action as CustomFunctions](...(params ?? []));
756+
}
604757
} else {
605758
if (this.options.debugChannel?.setActionType) {
606759
this.options.debugChannel.setActionType(String(step.action));

maxun-core/src/preprocessor.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@ export default class Preprocessor {
3636
what: Joi.array().items({
3737
action: Joi.string().required(),
3838
args: Joi.array().items(Joi.any()),
39+
name: Joi.string(),
40+
actionId: Joi.string()
3941
}).required(),
4042
}),
4143
).required(),

maxun-core/src/types/workflow.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,9 @@ export type CustomFunctions = 'scrape' | 'scrapeSchema' | 'scroll' | 'screenshot
3232

3333
export type What = {
3434
action: MethodNames<Page> | CustomFunctions,
35-
args?: any[]
35+
args?: any[],
36+
name?: string,
37+
actionId?: string
3638
};
3739

3840
export type PageState = Partial<BaseConditions>;

0 commit comments

Comments
 (0)