Skip to content

Commit c4bca34

Browse files
committed
fix: stripEnvelopeMetadata - MR2 anchored regex + hidden JSON key-order bug
- MR2: INLINE_BOILERPLATE_RE now anchored with ^ to avoid stripping quoted boilerplate later in payload - Hidden bug: JSON stripper regex only worked when message_id came before sender_id; now uses order-independent lookahead - Added 4 new test cases covering the fixes
1 parent 67ca0dd commit c4bca34

File tree

2 files changed

+52
-5
lines changed

2 files changed

+52
-5
lines changed

src/smart-extractor.ts

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -80,12 +80,16 @@ export function stripEnvelopeMetadata(text: string): string {
8080
// Also matches when the wrapper prefix is on its own line ("]\n" = no content after ]).
8181
const WRAPPER_LINE_RE = /^\[(?:Subagent Context|Subagent Task)\](?:\s|$|\n)?/i;
8282
const BOILERPLATE_RE = /^(?:Results auto-announce to your requester\.?|do not busy-poll for status\.?|Reply with a brief acknowledgment only\.?|Do not use any memory tools\.?)$/im;
83-
// Non-anchored version for inline content: matches boilerplate phrases anywhere in a string.
83+
// Anchored inline variant: only strip boilerplate when it starts the wrapper
84+
// remainder. This avoids erasing legitimate inline payload that merely quotes
85+
// a boilerplate phrase later in the sentence.
86+
// Repeat the anchored segment so composite wrappers like "You are running...
87+
// Results auto-announce..." are fully removed before preserving any payload.
8488
// The subagent running phrase uses (?<=\.)\s+|$ alternation (same as old
8589
// RUNTIME_WRAPPER_BOILERPLATE_RE) so that parenthetical depth like "(depth 1/1)."
8690
// is included before the ending whitespace, correctly stripping the full phrase.
8791
const INLINE_BOILERPLATE_RE =
88-
/(?:You are running as a subagent\b.*?(?:(?<=\.)\s+|$)|Results auto-announce to your requester\.?|do not busy-poll for status\.?|Reply with a brief acknowledgment only\.?|Do not use any memory tools\.?)/gi;
92+
/^(?:(?:You are running as a subagent\b.*?(?:(?<=\.)\s+|$)|Results auto-announce to your requester\.?\s*|do not busy-poll for status\.?\s*|Reply with a brief acknowledgment only\.?\s*|Do not use any memory tools\.?\s*))+/i;
8993
// Anchor to start of line — prevents quoted/cited false-positives
9094
const SUBAGENT_RUNNING_RE = /^You are running as a subagent\b/i;
9195

@@ -133,8 +137,8 @@ export function stripEnvelopeMetadata(text: string): string {
133137
let remainder = afterPrefix;
134138
// 2. Remove all boilerplate phrases from remainder (handles inline
135139
// wrapper+boilerplate like "[Subagent Context] ... Results auto-announce...").
136-
// Use INLINE_BOILERPLATE_RE (non-anchored, includes subagent phrase) so
137-
// boilerplate embedded anywhere in the inline content is also removed.
140+
// Use INLINE_BOILERPLATE_RE (anchored, includes subagent phrase) so only
141+
// leading wrapper boilerplate is removed while quoted user payload remains.
138142
remainder = remainder.replace(INLINE_BOILERPLATE_RE, "").replace(/\s{2,}/g, " ").trim();
139143
// 3. Keep remainder if non-empty (non-boilerplate inline content preserved);
140144
// strip the whole line if only boilerplate was present
@@ -202,7 +206,7 @@ export function stripEnvelopeMetadata(text: string): string {
202206
// 3. Strip any remaining JSON blocks that look like envelope metadata
203207
// (contain message_id and sender_id fields)
204208
cleaned = cleaned.replace(
205-
/```json\s*\{[^}]*"message_id"\s*:[^}]*"sender_id"\s*:[^}]*\}\s*```/g,
209+
/```json\s*(?=\{[\s\S]*?"message_id"\s*:)(?=\{[\s\S]*?"sender_id"\s*:)\{[\s\S]*?\}\s*```/g,
206210
"",
207211
);
208212

test/strip-envelope-metadata.test.mjs

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,15 @@ describe("stripEnvelopeMetadata", () => {
213213
assert.doesNotMatch(result, /Subagent Context/);
214214
});
215215

216+
it("preserves inline wrapper payload that only mentions boilerplate later in the sentence", () => {
217+
const input = [
218+
"[Subagent Context] User quoted the phrase Reply with a brief acknowledgment only. for documentation.",
219+
].join("\n");
220+
221+
const result = stripEnvelopeMetadata(input);
222+
assert.equal(result, "User quoted the phrase Reply with a brief acknowledgment only. for documentation.");
223+
});
224+
216225
// FIX 2 regression: wrapper inline boilerplate should still be stripped
217226
it("strips boilerplate-only inline content after wrapper prefix", () => {
218227
const input = [
@@ -223,6 +232,24 @@ describe("stripEnvelopeMetadata", () => {
223232
assert.equal(result, "");
224233
});
225234

235+
it("strips leading inline boilerplate but preserves payload that follows it", () => {
236+
const input = [
237+
"[Subagent Task] Reply with a brief acknowledgment only. Then summarize the failing test.",
238+
].join("\n");
239+
240+
const result = stripEnvelopeMetadata(input);
241+
assert.equal(result, "Then summarize the failing test.");
242+
});
243+
244+
it("strips multiple leading boilerplate phrases before preserving inline payload", () => {
245+
const input = [
246+
"[Subagent Task] Reply with a brief acknowledgment only. Do not use any memory tools. Actual user content starts here.",
247+
].join("\n");
248+
249+
const result = stripEnvelopeMetadata(input);
250+
assert.equal(result, "Actual user content starts here.");
251+
});
252+
226253
it("handles Telegram-style envelope headers", () => {
227254
const input = [
228255
"System: [2026-03-18 14:21:36 GMT+8] Telegram[bot123] DM | user_456 [msg:12345]",
@@ -249,6 +276,22 @@ describe("stripEnvelopeMetadata", () => {
249276
assert.doesNotMatch(result, /message_id/);
250277
});
251278

279+
it("strips standalone JSON blocks when sender_id appears before message_id", () => {
280+
const input = [
281+
"Some text before",
282+
"```json",
283+
'{"sender_id": "ou_yyy", "message_id": "om_xxx", "timestamp": "2026-03-18"}',
284+
"```",
285+
"Some text after",
286+
].join("\n");
287+
288+
const result = stripEnvelopeMetadata(input);
289+
assert.match(result, /Some text before/);
290+
assert.match(result, /Some text after/);
291+
assert.doesNotMatch(result, /message_id/);
292+
assert.doesNotMatch(result, /sender_id/);
293+
});
294+
252295
it("collapses excessive blank lines after stripping", () => {
253296
const input = [
254297
"System: [2026-03-18 14:21:36 GMT+8] Feishu[default] DM | ou_xxx [msg:om_xxx]",

0 commit comments

Comments
 (0)