Skip to content

Commit 4f3b725

Browse files
committed
fix: half-clone counts progress messages as user messages
The grep chains matched "type":"user" anywhere in the raw JSON line, including inside nested subagent progress data. This inflated the clean user message count (e.g. 53 vs actual 28), causing the cut point to land on a progress message instead of a real user turn. Replace all grep chains with a single filter_clean_user_msgs helper that extracts the top-level type (first "type":"..." in the line). Also filters isMeta, interrupted, and tool_result in one pass. Fix 4 pre-existing test failures (off-by-one from uncounted synthetic marker message, double-tag test checking wrong line). Add test for progress messages with nested type:user. Bump plugin to 0.14.9.
1 parent 0a06172 commit 4f3b725

File tree

4 files changed

+128
-38
lines changed

4 files changed

+128
-38
lines changed

.claude-plugin/marketplace.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
"name": "dx",
1010
"source": "./",
1111
"description": "Developer experience essentials: GitHub Actions debugging, conversation cloning/half-cloning, context handoffs, and Reddit research via Gemini CLI",
12-
"version": "0.14.8"
12+
"version": "0.14.9"
1313
}
1414
]
1515
}

.claude-plugin/plugin.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"name": "dx",
33
"description": "Developer experience essentials: GitHub Actions debugging, conversation cloning/half-cloning, context handoffs, and Reddit research via Gemini CLI",
4-
"version": "0.14.8",
4+
"version": "0.14.9",
55
"author": {
66
"name": "YK",
77
"email": "yoyoyosss@wearehackerone.com"

scripts/half-clone-conversation.sh

Lines changed: 52 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,32 @@ get_project_from_conv_file() {
9999
echo "$project_dirname" | sed 's|^-|/|' | sed 's|-|/|g'
100100
}
101101

102+
# Filter JSONL to clean user messages only.
103+
# Checks the FIRST "type":"..." in each line (the top-level type), so nested
104+
# "type":"user" inside progress/subagent data is ignored.
105+
# Also excludes tool_results, isMeta skill expansions, and interrupted messages.
106+
# Usage: filter_clean_user_msgs < file.jsonl (output matching lines)
107+
# filter_clean_user_msgs -n < file.jsonl (with line numbers: NR:line)
108+
# filter_clean_user_msgs -c < file.jsonl (count only)
109+
filter_clean_user_msgs() {
110+
local mode="lines"
111+
if [ "${1:-}" = "-n" ]; then mode="numbered"; fi
112+
if [ "${1:-}" = "-c" ]; then mode="count"; fi
113+
awk -v mode="$mode" '
114+
match($0, /"type":"[^"]*"/) {
115+
t = substr($0, RSTART+8, RLENGTH-9)
116+
if ((t == "user" || t == "queue-operation") &&
117+
index($0, "\"type\":\"tool_result\"") == 0 &&
118+
index($0, "\"isMeta\":true") == 0 &&
119+
index($0, "Request interrupted by user") == 0) {
120+
count++
121+
if (mode == "numbered") print NR":"$0
122+
else if (mode == "lines") print
123+
}
124+
}
125+
END { if (mode == "count") print count+0 }'
126+
}
127+
102128
# Pre-generate UUIDs for the awk script
103129
pre_generate_uuids() {
104130
local count="$1"
@@ -136,11 +162,11 @@ preview_conversation() {
136162
local total_lines
137163
total_lines=$(wc -l < "$source_file" | tr -d ' ')
138164
local first_user_text
139-
first_user_text=$(grep -E '"type":"(user|queue-operation)"' "$source_file" | grep -v '"type":"tool_result"' | head -1 | \
165+
first_user_text=$(filter_clean_user_msgs < "$source_file" | head -1 | \
140166
grep -oE '"(content|text)":"[^"]*"' | head -1 | \
141167
LC_ALL=C sed 's/"content":"//;s/"text":"//;s/"$//' | cut -c1-120 || true)
142168
local last_user_text
143-
last_user_text=$(grep -E '"type":"(user|queue-operation)"' "$source_file" | grep -v '"type":"tool_result"' | tail -1 | \
169+
last_user_text=$(filter_clean_user_msgs < "$source_file" | tail -1 | \
144170
grep -oE '"(content|text)":"[^"]*"' | head -1 | \
145171
LC_ALL=C sed 's/"content":"//;s/"text":"//;s/"$//' | cut -c1-120 || true)
146172

@@ -185,7 +211,7 @@ half_clone_conversation() {
185211
# Count "clean" user messages (not tool_results - those require a preceding tool_use)
186212
# A clean user message is one where we can start a conversation
187213
local total_clean_user_messages
188-
total_clean_user_messages=$(grep -E '"type":"(user|queue-operation)"' "$source_file" | grep -cv '"type":"tool_result"' || echo "0")
214+
total_clean_user_messages=$(filter_clean_user_msgs -c < "$source_file")
189215
log_info "Total clean user messages in conversation: $total_clean_user_messages"
190216

191217
if [ "$total_clean_user_messages" -lt 2 ]; then
@@ -200,9 +226,8 @@ half_clone_conversation() {
200226
keep_clean_count=$((total_clean_user_messages - skip_clean_count))
201227

202228
# OPTIMIZED: Find the line number where the target clean user message starts
203-
# Use grep -n to get all clean user message line numbers in one pass
204229
local clean_user_line_numbers
205-
clean_user_line_numbers=$(grep -nE '"type":"(user|queue-operation)"' "$source_file" | grep -v '"type":"tool_result"' | cut -d: -f1)
230+
clean_user_line_numbers=$(filter_clean_user_msgs -n < "$source_file" | cut -d: -f1)
206231

207232
# Get the line number of the (skip_clean_count + 1)th clean user message
208233
local skip_count
@@ -232,10 +257,9 @@ half_clone_conversation() {
232257
local last_clone_cmd_line=0
233258
local last_clean_user_line=0
234259

235-
# Get all user message lines with line numbers (much faster than per-line grep)
236-
# Filter to clean user messages (not tool_result, not isMeta)
260+
# Get all clean user message lines with line numbers
237261
local clean_user_lines
238-
clean_user_lines=$(grep -nE '"type":"(user|queue-operation)"' "$source_file" | grep -v '"type":"tool_result"' | grep -v '"isMeta":true' || true)
262+
clean_user_lines=$(filter_clean_user_msgs -n < "$source_file" || true)
239263

240264
if [ -n "$clean_user_lines" ]; then
241265
# Get the last clean user message line
@@ -356,6 +380,13 @@ half_clone_conversation() {
356380
return line
357381
}
358382
383+
function get_top_type(line) {
384+
if (match(line, /"type":"[^"]*"/)) {
385+
return substr(line, RSTART+8, RLENGTH-9)
386+
}
387+
return ""
388+
}
389+
359390
function halve_number(line, field, pattern, num, halved) {
360391
pattern = "\"" field "\":[0-9]+"
361392
if (match(line, pattern)) {
@@ -416,11 +447,16 @@ half_clone_conversation() {
416447
gsub("\"messageId\":\"" old_msgid "\"", "\"messageId\":\"" new_msgid "\"", line)
417448
}
418449
419-
# Tag first user message (including queue-operation messages)
420-
if (first_user && (index(line, "\"type\":\"user\"") > 0 || index(line, "\"type\":\"queue-operation\"") > 0)) {
421-
gsub("\"content\":\"", "\"content\":\"" clone_tag " ", line)
422-
gsub("\"text\":\"", "\"text\":\"" clone_tag " ", line)
423-
first_user = 0
450+
# Tag first genuine user message (check top-level type, skip isMeta/interrupted)
451+
if (first_user) {
452+
top_type = get_top_type(line)
453+
if ((top_type == "user" || top_type == "queue-operation") &&
454+
index(line, "\"isMeta\":true") == 0 &&
455+
index(line, "Request interrupted by user") == 0) {
456+
gsub("\"content\":\"", "\"content\":\"" clone_tag " ", line)
457+
gsub("\"text\":\"", "\"text\":\"" clone_tag " ", line)
458+
first_user = 0
459+
}
424460
}
425461
426462
# Halve token counts
@@ -460,16 +496,16 @@ half_clone_conversation() {
460496
# Update history.jsonl
461497
log_info "Updating history file..."
462498

463-
# Get display text from first user message in the KEPT portion
499+
# Get display text from first clean user message in the KEPT portion
464500
local display_text
465-
display_text=$(tail -n +"$((skip_count + 1))" "$source_file" | grep -E '"type":"(user|queue-operation)"' | head -1 | \
501+
display_text=$(tail -n +"$((skip_count + 1))" "$source_file" | filter_clean_user_msgs | head -1 | \
466502
grep -oE '"content":"[^"]*"' | head -1 | \
467503
LC_ALL=C sed 's/"content":"//;s/"$//' | \
468504
head -c 200 || echo "[Half-cloned conversation]")
469505

470506
if [ -z "$display_text" ]; then
471507
# Try array format
472-
display_text=$(tail -n +"$((skip_count + 1))" "$source_file" | grep -E '"type":"(user|queue-operation)"' | head -1 | \
508+
display_text=$(tail -n +"$((skip_count + 1))" "$source_file" | filter_clean_user_msgs | head -1 | \
473509
grep -oE '"text":"[^"]*"' | head -1 | \
474510
LC_ALL=C sed 's/"text":"//;s/"$//' | \
475511
head -c 200 || echo "[Half-cloned conversation]")

scripts/test-half-clone.sh

Lines changed: 74 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -108,9 +108,9 @@ get_new_session_from_output() {
108108
}
109109

110110
# Test 1: 6 messages (3 user, 3 assistant) -> 3 clean user msgs, skip 1, keep 2
111-
# Starts at user message 2 (line 3), keeps lines 3-6 = 4 messages + 1 reference = 5
111+
# Output: marker(1) + kept lines 3-6(4) + reference(1) = 6
112112
test_even_messages() {
113-
log_test "6 messages (3 clean user): should keep 4 lines + 1 reference = 5"
113+
log_test "6 messages (3 clean user): should produce 1 marker + 4 kept + 1 reference = 6"
114114

115115
local session_id
116116
session_id=$(create_test_conversation 6)
@@ -128,17 +128,17 @@ test_even_messages() {
128128

129129
local count
130130
count=$(count_messages "$new_file")
131-
if [ "$count" -eq 5 ]; then
132-
log_pass "Kept 5 messages (4 + reference)"
131+
if [ "$count" -eq 6 ]; then
132+
log_pass "Kept 6 messages (marker + 4 kept + reference)"
133133
else
134-
log_fail "Expected 5 messages, got $count"
134+
log_fail "Expected 6 messages, got $count"
135135
fi
136136
}
137137

138138
# Test 2: 7 messages (4 user, 3 assistant) -> 4 clean user msgs, skip 2, keep 2
139-
# Starts at user message 3 (line 5), keeps lines 5-7 = 3 messages + 1 reference = 4
139+
# Output: marker(1) + kept lines 5-7(3) + reference(1) = 5
140140
test_odd_messages() {
141-
log_test "7 messages (4 clean user): should keep 3 lines + 1 reference = 4"
141+
log_test "7 messages (4 clean user): should produce 1 marker + 3 kept + 1 reference = 5"
142142

143143
local session_id
144144
session_id=$(create_test_conversation 7)
@@ -151,17 +151,17 @@ test_odd_messages() {
151151

152152
local count
153153
count=$(count_messages "$new_file")
154-
if [ "$count" -eq 4 ]; then
155-
log_pass "Kept 4 messages (3 + reference)"
154+
if [ "$count" -eq 5 ]; then
155+
log_pass "Kept 5 messages (marker + 3 kept + reference)"
156156
else
157-
log_fail "Expected 4 messages, got $count"
157+
log_fail "Expected 5 messages, got $count"
158158
fi
159159
}
160160

161161
# Test 3: 4 messages (2 user, 2 assistant) -> 2 clean user msgs, skip 1, keep 1
162-
# Starts at user message 2 (line 3), keeps lines 3-4 = 2 messages + 1 reference = 3
162+
# Output: marker(1) + kept lines 3-4(2) + reference(1) = 4
163163
test_minimum_messages() {
164-
log_test "4 messages (2 clean user): should keep 2 lines + 1 reference = 3"
164+
log_test "4 messages (2 clean user): should produce 1 marker + 2 kept + 1 reference = 4"
165165

166166
local session_id
167167
session_id=$(create_test_conversation 4)
@@ -174,10 +174,10 @@ test_minimum_messages() {
174174

175175
local count
176176
count=$(count_messages "$new_file")
177-
if [ "$count" -eq 3 ]; then
178-
log_pass "Kept 3 messages (2 + reference)"
177+
if [ "$count" -eq 4 ]; then
178+
log_pass "Kept 4 messages (marker + 2 kept + reference)"
179179
else
180-
log_fail "Expected 3 messages, got $count"
180+
log_fail "Expected 4 messages, got $count"
181181
fi
182182
}
183183

@@ -335,19 +335,21 @@ test_double_tagging() {
335335
new_session=$(get_new_session_from_output "$output")
336336
local new_file="${TEST_PROJECTS_DIR}/${TEST_PROJECT_DIRNAME}/${new_session}.jsonl"
337337

338-
# Should have two [HALF-CLONE ...] tags - the new one prepended to the existing one
339-
local first_user_line
340-
first_user_line=$(grep '"type":"user"' "$new_file" | head -1)
338+
# The first user line is the synthetic marker ("Continued from session ...").
339+
# The second user line is the first KEPT message from the original, which
340+
# already had a [HALF-CLONE] tag and should now have a second one prepended.
341+
local kept_user_line
342+
kept_user_line=$(grep '"type":"user"' "$new_file" | grep -v "Continued from session" | head -1)
341343

342344
# Count occurrences of [HALF-CLONE pattern
343345
local tag_count
344-
tag_count=$(echo "$first_user_line" | grep -oE '\[HALF-CLONE [A-Z][a-z]+ [0-9]+ [0-9]+:[0-9]+\]' | wc -l | tr -d ' ')
346+
tag_count=$(echo "$kept_user_line" | grep -oE '\[HALF-CLONE [A-Z][a-z]+ [0-9]+ [0-9]+:[0-9]+\]' | wc -l | tr -d ' ')
345347

346348
if [ "$tag_count" -eq 2 ]; then
347349
log_pass "Double tagging works - found 2 [HALF-CLONE] tags"
348350
else
349351
log_fail "Expected 2 [HALF-CLONE] tags, found $tag_count"
350-
echo "First user line: $first_user_line"
352+
echo "Kept user line: $kept_user_line"
351353
fi
352354
}
353355

@@ -397,6 +399,57 @@ test_thinking_blocks_stripped() {
397399
fi
398400
}
399401

402+
# Test 11: Progress messages with nested "type":"user" should NOT count as clean user messages
403+
test_progress_with_nested_user_type() {
404+
log_test "Progress messages with nested type:user should not count as clean user messages"
405+
406+
local session_id
407+
session_id=$(uuidgen | tr '[:upper:]' '[:lower:]')
408+
local conv_file="${TEST_PROJECTS_DIR}/${TEST_PROJECT_DIRNAME}/${session_id}.jsonl"
409+
410+
local uuid1 uuid2 uuid3 uuid4 uuid5 uuid6 uuid7 uuid8
411+
uuid1=$(uuidgen | tr '[:upper:]' '[:lower:]')
412+
uuid2=$(uuidgen | tr '[:upper:]' '[:lower:]')
413+
uuid3=$(uuidgen | tr '[:upper:]' '[:lower:]')
414+
uuid4=$(uuidgen | tr '[:upper:]' '[:lower:]')
415+
uuid5=$(uuidgen | tr '[:upper:]' '[:lower:]')
416+
uuid6=$(uuidgen | tr '[:upper:]' '[:lower:]')
417+
uuid7=$(uuidgen | tr '[:upper:]' '[:lower:]')
418+
uuid8=$(uuidgen | tr '[:upper:]' '[:lower:]')
419+
420+
# Conversation: U1, A1, progress(nested user), progress(nested user), U2, A2, U3 CORRECT, A3
421+
# Real clean user messages: U1, U2, U3 = 3. Skip 1, keep 2. Start at U2.
422+
# If progress counted: 5 "clean" msgs, skip 2, start at U2 (or worse, a progress line)
423+
# With the fix: 3 clean msgs, skip 1, start at U2 -> first real content is U2
424+
{
425+
generate_message "$uuid1" "null" "$session_id" "user" "Question 1"
426+
generate_message "$uuid2" "$uuid1" "$session_id" "assistant" "Answer 1"
427+
# Progress messages with nested "type":"user" (simulates subagent Task progress)
428+
echo "{\"type\":\"progress\",\"sessionId\":\"${session_id}\",\"data\":{\"message\":{\"type\":\"user\",\"message\":{\"role\":\"user\",\"content\":[{\"type\":\"text\",\"text\":\"subagent input\"}]}}},\"uuid\":\"${uuid3}\",\"timestamp\":\"2025-01-01T00:00:00.000Z\"}"
429+
echo "{\"type\":\"progress\",\"sessionId\":\"${session_id}\",\"data\":{\"message\":{\"type\":\"user\",\"message\":{\"role\":\"user\",\"content\":[{\"type\":\"text\",\"text\":\"another subagent input\"}]}}},\"uuid\":\"${uuid4}\",\"timestamp\":\"2025-01-01T00:00:00.000Z\"}"
430+
generate_message "$uuid5" "$uuid2" "$session_id" "user" "Question 2"
431+
generate_message "$uuid6" "$uuid5" "$session_id" "assistant" "Answer 2"
432+
generate_message "$uuid7" "$uuid6" "$session_id" "user" "Question 3 CORRECT START"
433+
generate_message "$uuid8" "$uuid7" "$session_id" "assistant" "Answer 3"
434+
} > "$conv_file"
435+
436+
local output
437+
output=$(run_half_clone "$session_id")
438+
439+
local new_session
440+
new_session=$(get_new_session_from_output "$output")
441+
local new_file="${TEST_PROJECTS_DIR}/${TEST_PROJECT_DIRNAME}/${new_session}.jsonl"
442+
443+
# The output from the script should say "3" clean user messages
444+
if echo "$output" | grep -q "Total clean user messages in conversation: 3"; then
445+
log_pass "Progress messages with nested type:user correctly excluded from count (3 clean, not 5)"
446+
else
447+
local actual_count
448+
actual_count=$(echo "$output" | grep "Total clean user messages" | grep -oE '[0-9]+')
449+
log_fail "Expected 3 clean user messages, script counted $actual_count"
450+
fi
451+
}
452+
400453
# Main
401454
main() {
402455
echo "================================"
@@ -422,6 +475,7 @@ main() {
422475
test_history_entry
423476
test_double_tagging
424477
test_thinking_blocks_stripped
478+
test_progress_with_nested_user_type
425479

426480
echo ""
427481
echo "================================"

0 commit comments

Comments
 (0)