@@ -76,13 +76,43 @@ Guidelines for determining accuracy:
7676- NOT_ACCURATE: Missing critical information that would mislead developers
7777- NOT_ACCURATE: Incorrect code examples or API usage
7878
79- You must respond in the following JSON format ONLY:
80- {
81- "status": "ACCURATE" or "NOT_ACCURATE",
82- "reasoning": "Brief explanation of your decision (max 200 characters)"
83- }
79+ You must respond in exactly this format:
80+
81+ STATUS: [ACCURATE or NOT_ACCURATE]
82+ REASONING: [Brief explanation of your decision in one sentence]
83+
84+ Do not include any other text, formatting, or markdown in your response.` ;
85+
86+ function parse_verification_response ( text : string ) : {
87+ status: 'ACCURATE' | 'NOT_ACCURATE' ;
88+ reasoning: string ;
89+ } | null {
90+ // Try to extract STATUS and REASONING using regex
91+ const status_match = text . match ( / S T A T U S : \s * ( A C C U R A T E | N O T _ A C C U R A T E ) / i) ;
92+ const reasoning_match = text . match ( / R E A S O N I N G : \s * ( .+ ?) (?: \n | $ ) / i) ;
93+
94+ if ( status_match && reasoning_match ) {
95+ return {
96+ status : status_match [ 1 ] ! . toUpperCase ( ) as 'ACCURATE' | 'NOT_ACCURATE' ,
97+ reasoning : reasoning_match [ 1 ] ! . trim ( ) ,
98+ } ;
99+ }
100+
101+ // Fallback: try to find just "ACCURATE" or "NOT_ACCURATE" anywhere in the response
102+ const accurate_match = text . match ( / \b ( N O T _ A C C U R A T E | A C C U R A T E ) \b / i) ;
103+ if ( accurate_match ) {
104+ // Extract some context as reasoning
105+ const lines = text . split ( '\n' ) . filter ( ( line ) => line . trim ( ) ) ;
106+ const reasoning = lines . slice ( 0 , 3 ) . join ( ' ' ) . slice ( 0 , 200 ) ;
107+
108+ return {
109+ status : accurate_match [ 1 ] ! . toUpperCase ( ) as 'ACCURATE' | 'NOT_ACCURATE' ,
110+ reasoning : reasoning || 'Could not extract detailed reasoning' ,
111+ } ;
112+ }
84113
85- Do not include any other text in your response, only the JSON object.` ;
114+ return null ;
115+ }
86116
87117async function main ( ) {
88118 program . parse ( ) ;
@@ -115,10 +145,7 @@ async function main() {
115145 sections = sections_to_verify . slice ( 0 , 2 ) ;
116146 }
117147
118- console . log ( `\n📋 Will verify ${ sections . length } sections:` ) ;
119- for ( const slug of sections ) {
120- console . log ( ` - ${ slug } ` ) ;
121- }
148+ console . log ( `\n📋 Will verify ${ sections . length } sections` ) ;
122149
123150 // Dry run mode: exit before API calls
124151 if ( options . dryRun ) {
@@ -150,7 +177,7 @@ async function main() {
150177 custom_id : `verify-${ index } ` ,
151178 params : {
152179 model : anthropic . get_model_identifier ( ) ,
153- max_tokens : 1024 ,
180+ max_tokens : 4096 , // Increased to allow full responses
154181 messages : [
155182 {
156183 role : 'user' ,
@@ -227,29 +254,28 @@ async function main() {
227254 continue ;
228255 }
229256
230- try {
231- // Parse the JSON response
232- const parsed = JSON . parse ( output_content . trim ( ) ) ;
233- const status = parsed . status as 'ACCURATE' | 'NOT_ACCURATE' ;
234- const reasoning = parsed . reasoning as string ;
235-
236- verification_results . push ( {
237- slug,
238- status,
239- reasoning,
240- } ) ;
257+ // Parse using regex instead of strict JSON parsing
258+ const parsed = parse_verification_response ( output_content ) ;
241259
242- const emoji = status === 'ACCURATE' ? '✅' : '❌' ;
243- console . log ( ` ${ emoji } ${ slug } : ${ status } ` ) ;
244- } catch ( error ) {
245- console . error ( ` ❌ Failed to parse response for ${ slug } :` , error ) ;
246- console . error ( ` Raw response: ${ output_content } ` ) ;
260+ if ( ! parsed ) {
261+ console . error ( ` ❌ Failed to parse response for ${ slug } ` ) ;
262+ console . error ( ` Raw response: ${ output_content . slice ( 0 , 200 ) } ...` ) ;
247263 verification_results . push ( {
248264 slug,
249265 status : 'NOT_ACCURATE' ,
250266 reasoning : `Failed to parse verification response: ${ output_content . slice ( 0 , 100 ) } ` ,
251267 } ) ;
268+ continue ;
252269 }
270+
271+ verification_results . push ( {
272+ slug,
273+ status : parsed . status ,
274+ reasoning : parsed . reasoning ,
275+ } ) ;
276+
277+ const emoji = parsed . status === 'ACCURATE' ? '✅' : '❌' ;
278+ console . log ( ` ${ emoji } ${ slug } : ${ parsed . status } ` ) ;
253279 }
254280
255281 // Calculate statistics
@@ -274,9 +300,7 @@ async function main() {
274300 // Validate output before writing
275301 const validated = v . safeParse ( verification_output_schema , output_data ) ;
276302 if ( ! validated . success ) {
277- throw new Error (
278- `Output validation failed: ${ JSON . stringify ( validated . issues , null , 2 ) } ` ,
279- ) ;
303+ throw new Error ( `Output validation failed: ${ JSON . stringify ( validated . issues , null , 2 ) } ` ) ;
280304 }
281305
282306 await writeFile ( output_path , JSON . stringify ( output_data , null , 2 ) , 'utf-8' ) ;
@@ -285,7 +309,9 @@ async function main() {
285309 console . log ( '\n📊 Verification Summary:' ) ;
286310 console . log ( ` Total sections: ${ sections_to_verify . length } ` ) ;
287311 console . log ( ` Verified sections: ${ sections . length } ` ) ;
288- console . log ( ` ✅ Accurate: ${ accurate_count } (${ ( ( accurate_count / sections . length ) * 100 ) . toFixed ( 1 ) } %)` ) ;
312+ console . log (
313+ ` ✅ Accurate: ${ accurate_count } (${ ( ( accurate_count / sections . length ) * 100 ) . toFixed ( 1 ) } %)` ,
314+ ) ;
289315 console . log (
290316 ` ❌ Not Accurate: ${ not_accurate_count } (${ ( ( not_accurate_count / sections . length ) * 100 ) . toFixed ( 1 ) } %)` ,
291317 ) ;
@@ -294,9 +320,13 @@ async function main() {
294320 console . log ( '\n⚠️ Sections with issues:' ) ;
295321 verification_results
296322 . filter ( ( r ) => r . status === 'NOT_ACCURATE' )
323+ . slice ( 0 , 10 ) // Show first 10
297324 . forEach ( ( r ) => {
298325 console . log ( ` - ${ r . slug } : ${ r . reasoning } ` ) ;
299326 } ) ;
327+ if ( not_accurate_count > 10 ) {
328+ console . log ( ` ... and ${ not_accurate_count - 10 } more` ) ;
329+ }
300330 }
301331
302332 console . log ( `\n✅ Results written to: ${ output_path } ` ) ;
0 commit comments