Skip to content

Commit a194d64

Browse files
committed
optimize dataset items truncation logic
1 parent 7a790a2 commit a194d64

File tree

3 files changed

+60
-10
lines changed

3 files changed

+60
-10
lines changed

src/const.ts

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,11 @@ export const defaults = {
4343

4444
// Actor output const
4545
export const ACTOR_OUTPUT_MAX_CHARS_PER_ITEM = 5_000;
46-
export const ACTOR_OUTPUT_TRUNCATED_MESSAGE = `Output was truncated because it will not fit into context.`
47-
+ `There is no reason to call this tool again! You can use ${HelperTools.DATASET_GET_ITEMS} tool to get more items from the dataset.`;
46+
export const ACTOR_OUTPUT_TRUNCATED_MESSAGE = `Output was truncated because it will not fit into context.
47+
There is no reason to call this tool again!
48+
You can use ${HelperTools.DATASET_GET_ITEMS} tool to get more items from the dataset.
49+
The items were truncated from the back, so if using the ${HelperTools.DATASET_GET_ITEMS} tool,
50+
you can skip the first N items that you already have.`;
4851

4952
export const ACTOR_ADDITIONAL_INSTRUCTIONS = `Never call/execute tool/Actor unless confirmed by the user.
5053
Workflow: When an Actor runs, it processes data and stores results in Apify storage,

src/mcp/server.ts

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ import log from '@apify/log';
1919

2020
import {
2121
ACTOR_OUTPUT_MAX_CHARS_PER_ITEM,
22-
ACTOR_OUTPUT_TRUNCATED_MESSAGE,
2322
defaults,
2423
SERVER_NAME,
2524
SERVER_VERSION,
@@ -29,7 +28,7 @@ import { actorNameToToolName } from '../tools/utils.js';
2928
import type { ActorMcpTool, ActorTool, HelperTool, ToolEntry } from '../types.js';
3029
import { createMCPClient } from './client.js';
3130
import { EXTERNAL_TOOL_CALL_TIMEOUT_MSEC } from './const.js';
32-
import { processParamsGetTools } from './utils.js';
31+
import { processParamsGetTools, truncateDatasetItems } from './utils.js';
3332

3433
type ActorsMcpServerOptions = {
3534
enableAddingActors?: boolean;
@@ -462,12 +461,17 @@ export class ActorsMcpServer {
462461
{ type: 'text', text: `Dataset information: ${JSON.stringify(datasetInfo)}` },
463462
];
464463

465-
const itemContents = items.items.map((item: Record<string, unknown>) => {
466-
const text = JSON.stringify(item).slice(0, ACTOR_OUTPUT_MAX_CHARS_PER_ITEM);
467-
return text.length === ACTOR_OUTPUT_MAX_CHARS_PER_ITEM
468-
? { type: 'text', text: `${text} ... ${ACTOR_OUTPUT_TRUNCATED_MESSAGE}` }
469-
: { type: 'text', text };
470-
});
464+
// Get max char length for whole dataset based on the number of items
465+
const maxDatasetLength = ACTOR_OUTPUT_MAX_CHARS_PER_ITEM * items.items.length;
466+
const itemContents = truncateDatasetItems(items, maxDatasetLength, datasetInfo?.itemCount || 0)
467+
.items.map(
468+
(item: Record<string, unknown>) => {
469+
return {
470+
type: 'text',
471+
text: JSON.stringify(item),
472+
};
473+
},
474+
);
471475
content.push(...itemContents);
472476
return { content };
473477
}

src/mcp/utils.ts

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
import { createHash } from 'node:crypto';
22
import { parse } from 'node:querystring';
33

4+
import type { PaginatedList } from 'apify-client';
5+
6+
import { ACTOR_OUTPUT_TRUNCATED_MESSAGE } from '../const.js';
47
import { processInput } from '../input.js';
58
import { addRemoveTools, getActorsAsTools } from '../tools/index.js';
69
import type { Input, ToolEntry } from '../types.js';
@@ -58,3 +61,43 @@ export function parseInputParamsFromUrl(url: string): Input {
5861
const params = parse(query) as unknown as Input;
5962
return processInput(params);
6063
}
64+
65+
/**
66+
* Truncates dataset items to fit within a specified character limit.
67+
*
68+
* This function will remove items from the end of the dataset until the total
69+
* character count of the dataset items is within the specified limit.
70+
* If there is only one item (left) in the dataset, it will not be truncated.
71+
*/
72+
export function truncateDatasetItems(
73+
items: PaginatedList<Record<string, unknown>>,
74+
maxChars: number,
75+
originalItemCount: number,
76+
): PaginatedList<Record<string, unknown>> {
77+
// If within the limit, return as is.
78+
if (JSON.stringify(items).length <= maxChars) {
79+
return items;
80+
}
81+
82+
// Do not truncate single item datasets.
83+
if (items.items.length < 2) {
84+
return items;
85+
}
86+
87+
// Truncate from back and check if the total length is within the limit.
88+
while (items.items.length > 1) {
89+
if (JSON.stringify(items).length <= maxChars) {
90+
break; // If the dataset is within the limit, stop truncating.
91+
}
92+
items.items.pop(); // Remove the last item if the dataset exceeds the limit.
93+
}
94+
95+
// Add truncation message
96+
items.items.push({
97+
truncationInfo: ACTOR_OUTPUT_TRUNCATED_MESSAGE,
98+
originalItemCount,
99+
itemCountAfterTruncation: items.items.length,
100+
});
101+
102+
return items;
103+
}

0 commit comments

Comments
 (0)