Skip to content

Commit 09aa13c

Browse files
committed
fixes
1 parent 1748475 commit 09aa13c

File tree

13 files changed

+570
-124
lines changed

13 files changed

+570
-124
lines changed

botasaurus-controls/src/index.ts

Lines changed: 33 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -430,7 +430,36 @@ const parseListOfTexts = (value:any)=>{
430430
}
431431
return value;
432432
}
433-
433+
function createHelpfulJsonError(e: any, trimmedValue: string) {
434+
const errorMessage = e.message || 'Invalid JSON'
435+
436+
// Extract position info if available
437+
const positionMatch = errorMessage.match(/position\s+(\d+)/i)
438+
const position = positionMatch ? parseInt(positionMatch[1], 10) : null
439+
440+
let helpfulMessage = 'Invalid JSON: '
441+
442+
// Check for common mistakes
443+
if (trimmedValue.includes("'") && !trimmedValue.includes('"')) {
444+
helpfulMessage += "Use double quotes (\") instead of single quotes (') for strings."
445+
} else if (/,\s*[}\]]/.test(trimmedValue)) {
446+
helpfulMessage += "Trailing comma found. Remove the comma before the closing bracket."
447+
} else if (errorMessage.includes('Unexpected token')) {
448+
if (position !== null) {
449+
const contextStart = Math.max(0, position - 10)
450+
const contextEnd = Math.min(trimmedValue.length, position + 10)
451+
const context = trimmedValue.substring(contextStart, contextEnd)
452+
helpfulMessage += `Unexpected character near position ${position}: "...${context}..."`
453+
} else {
454+
helpfulMessage += errorMessage
455+
}
456+
} else if (errorMessage.includes('Unexpected end')) {
457+
helpfulMessage += "JSON is incomplete. Check for missing closing brackets or quotes."
458+
} else {
459+
helpfulMessage += errorMessage
460+
}
461+
return helpfulMessage
462+
}
434463
function parseJSON(value: any): { parsed: any; error: string | null } {
435464
if (typeof value !== 'string') {
436465
return { parsed: value, error: null };
@@ -446,33 +475,7 @@ function parseJSON(value: any): { parsed: any; error: string | null } {
446475
return { parsed, error: null };
447476
} catch (e: any) {
448477
// Provide helpful error messages based on common JSON mistakes
449-
const errorMessage = e.message || 'Invalid JSON';
450-
451-
// Extract position info if available
452-
const positionMatch = errorMessage.match(/position\s+(\d+)/i);
453-
const position = positionMatch ? parseInt(positionMatch[1], 10) : null;
454-
455-
let helpfulMessage = 'Invalid JSON: ';
456-
457-
// Check for common mistakes
458-
if (trimmedValue.includes("'") && !trimmedValue.includes('"')) {
459-
helpfulMessage += "Use double quotes (\") instead of single quotes (') for strings.";
460-
} else if (/,\s*[}\]]/.test(trimmedValue)) {
461-
helpfulMessage += "Trailing comma found. Remove the comma before the closing bracket.";
462-
} else if (errorMessage.includes('Unexpected token')) {
463-
if (position !== null) {
464-
const contextStart = Math.max(0, position - 10);
465-
const contextEnd = Math.min(trimmedValue.length, position + 10);
466-
const context = trimmedValue.substring(contextStart, contextEnd);
467-
helpfulMessage += `Unexpected character near position ${position}: "...${context}..."`;
468-
} else {
469-
helpfulMessage += errorMessage;
470-
}
471-
} else if (errorMessage.includes('Unexpected end')) {
472-
helpfulMessage += "JSON is incomplete. Check for missing closing brackets or quotes.";
473-
} else {
474-
helpfulMessage += errorMessage;
475-
}
478+
let helpfulMessage = createHelpfulJsonError(e, trimmedValue)
476479

477480
return { parsed: null, error: helpfulMessage };
478481
}
@@ -1204,7 +1207,6 @@ private parse(data: any) {
12041207
}
12051208
}
12061209

1207-
12081210
function isInvalidFileType(file: any, acceptedFileTypes: any) {
12091211
const fileExtension = file.name.split(".").pop()?.toLowerCase()
12101212
return !acceptedFileTypes.includes(fileExtension)
@@ -1250,4 +1252,5 @@ function createControls(input_js: string | Function) {
12501252
}
12511253

12521254

1253-
export { Controls, createControls, FileTypes }
1255+
export { Controls, createControls, FileTypes }
1256+

js/botasaurus-server-js/src/api-config.ts

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -56,10 +56,6 @@ function isValidUrl(urlString: string): boolean {
5656
}
5757
}
5858

59-
/**
60-
* Parse command line arguments for --master or --worker flags
61-
*/
62-
6359
/**
6460
* Check master endpoint health, retrying up to 3 times.
6561
*/
@@ -452,9 +448,8 @@ export function buildApp(
452448
enable_cache: boolean
453449
): FastifyInstance {
454450
const app = fastify({
455-
logger: true,
456-
// TODO: change appropriately as needed
457-
bodyLimit: 500 * 1024 * 1024 // 500MB
451+
logger: !isMaster,
452+
bodyLimit: 250 * 1024 * 1024 // 250MB (2.5× headroom for 100MB chunks)
458453
});
459454

460455
// Add CORS handling

js/botasaurus-server-js/src/download.ts

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import {writeExcelStreamed, writeJsonStreamed, writeJson, writeCsvStreamed, writeCsv, writeExcel } from './writer'; // Fixed import statement
1+
import {writeExcelStreamed, writeJsonStreamed, writeJson, writeCsvStreamed, writeCsv, writeExcel, writeNdjson } from './writer'; // Fixed import statement
22
import writterHttp from './writer-http'; // Fixed import statement
33
import { getPathToDownloadsDirectory } from './paths'
44
import { TaskResults } from './task-results'
@@ -27,6 +27,9 @@ export function downloadResults(results: Array<Record<string, any>>, fmt: string
2727
} else {
2828
return writeExcel(results, filePath);
2929
}
30+
} else if (fmt === "ndjson") {
31+
// Files are already stored as NDJSON, so just copy the file
32+
return writeNdjson(TaskResults.generateTaskFilePath(taskId), filePath);
3033
}
3134

3235
throw new Error("Unsupported format");
@@ -63,6 +66,12 @@ export async function downloadResultsHttp(reply: any, results: Array<Record<stri
6366
} else {
6467
return writterHttp.writeExcel(results, reply.raw);
6568
}
69+
} else if (fmt === "ndjson") {
70+
headers["Content-Type"] = "application/x-ndjson";
71+
headers["Content-Disposition"] = `attachment; filename="${filename}.ndjson"`;
72+
reply.raw.writeHead(200, headers);
73+
// Files are already stored as NDJSON, so stream directly
74+
return writterHttp.writeNdjson(TaskResults.generateTaskFilePath(taskId), reply.raw);
6675
} else {
6776
throw new Error("Unsupported format");
6877
}

js/botasaurus-server-js/src/models.ts

Lines changed: 38 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,40 @@ export function serializeTaskForRunTask(obj: Task){
181181
result_count: obj.result_count,
182182
};
183183
}
184+
185+
function sortDictByKeys(dictionary: Record<string, any>, keys: string[]): Record<string, any> {
186+
const newDict: Record<string, any> = {};
187+
const extraKeys: string[] = [];
188+
189+
try {
190+
// Create a Set from keys for O(1) lookups
191+
const keysSet = new Set(keys);
192+
193+
// Check for any keys in dictionary that aren't in the keys list
194+
for (const key in dictionary) {
195+
if (!keysSet.has(key)) {
196+
extraKeys.push(key);
197+
}
198+
}
199+
200+
// If we found extra keys, raise an error
201+
if (extraKeys.length > 0) {
202+
throw new Error(`Found keys in dictionary that weren't in keys list: ${extraKeys.join(', ')}`);
203+
}
204+
205+
// Add all keys from the provided keys list
206+
for (const key of keys) {
207+
newDict[key] = dictionary[key];
208+
}
209+
} catch (error) {
210+
if (error instanceof Error) {
211+
throw error;
212+
}
213+
throw new Error("Failed to sort dict by keys");
214+
}
215+
return newDict;
216+
}
217+
184218
async function serializeTask(obj: Task, withResult: boolean): Promise<any> {
185219
const taskId = obj.id;
186220
const status = obj.status;
@@ -210,10 +244,12 @@ async function serializeTask(obj: Task, withResult: boolean): Promise<any> {
210244
updated_at: isoformat(obj.updated_at),
211245
};
212246
}
247+
export type StatusKind = typeof TaskStatus[keyof typeof TaskStatus]
248+
213249
// Task model
214250
class Task {
215251
id!: number;
216-
status!: string;
252+
status!: StatusKind;
217253
sort_id!: number;
218254
task_name!: string;
219255
scraper_name!: string;
@@ -248,7 +284,7 @@ class Task {
248284

249285
return new Task(x)
250286
}
251-
export { getAutoincrementId, createTask, db, Task, TaskStatus, removeDuplicatesByKey, calculateDuration, isoformat, serializeUiOutputTask, serializeUiDisplayTask, serializeTask , initAutoIncrementDb};
287+
export { getAutoincrementId, createTask, db, Task, TaskStatus, removeDuplicatesByKey, calculateDuration, isoformat, serializeUiOutputTask, serializeUiDisplayTask, serializeTask, sortDictByKeys, initAutoIncrementDb };
252288

253289

254290

0 commit comments

Comments
 (0)