@@ -456,10 +456,10 @@ ${chunk}`,
456456 * Optimized to handle large content by chunking and processing in parallel
457457 */
458458async function parseQuestionsAndAnswers ( content : string ) : Promise < QuestionAnswer [ ] > {
459- // GPT-5-mini can handle ~128k tokens. Chunk by question count + char limit for efficiency .
459+ // GPT-5-mini can handle ~128k tokens. Chunk by individual questions (1 question = 1 chunk) for parallel processing .
460460 const MAX_CHUNK_SIZE_CHARS = 80_000 ;
461461 const MIN_CHUNK_SIZE_CHARS = 5_000 ;
462- const MAX_QUESTIONS_PER_CHUNK = 35 ;
462+ const MAX_QUESTIONS_PER_CHUNK = 1 ; // Each chunk contains exactly one question
463463
464464 const chunkInfos = buildQuestionAwareChunks ( content , {
465465 maxChunkChars : MAX_CHUNK_SIZE_CHARS ,
@@ -482,10 +482,10 @@ async function parseQuestionsAndAnswers(content: string): Promise<QuestionAnswer
482482
483483 const totalEstimatedQuestions = chunkInfos . reduce ( ( sum , chunk ) => sum + chunk . questionCount , 0 ) ;
484484
485- logger . info ( 'Chunking content by question count for parallel processing' , {
485+ logger . info ( 'Chunking content by individual questions (1 question per chunk) for parallel processing' , {
486486 contentLength : content . length ,
487487 totalChunks : chunkInfos . length ,
488- avgQuestionsPerChunk : Number ( ( totalEstimatedQuestions / chunkInfos . length || 0 ) . toFixed ( 2 ) ) ,
488+ questionsPerChunk : 1 , // Each chunk contains exactly one question
489489 } ) ;
490490
491491 // Process all chunks in parallel for maximum speed
@@ -547,60 +547,51 @@ function buildQuestionAwareChunks(
547547 return [ ] ;
548548 }
549549
550- if ( trimmedContent . length <= options . minChunkChars ) {
551- return [
552- {
553- content : trimmedContent ,
554- questionCount : estimateQuestionCount ( trimmedContent ) ,
555- } ,
556- ] ;
557- }
558-
559550 const chunks : ChunkInfo [ ] = [ ] ;
560551 const lines = trimmedContent . split ( / \r ? \n / ) ;
561- let buffer : string [ ] = [ ] ;
562- let bufferCharCount = 0 ;
563- let bufferQuestionCount = 0 ;
552+ let currentChunk : string [ ] = [ ] ;
553+ let currentQuestionFound = false ;
564554
565555 const pushChunk = ( ) => {
566- const chunkText = buffer . join ( '\n' ) . trim ( ) ;
556+ const chunkText = currentChunk . join ( '\n' ) . trim ( ) ;
567557 if ( ! chunkText ) {
568558 return ;
569559 }
570560 chunks . push ( {
571561 content : chunkText ,
572- questionCount : bufferQuestionCount || estimateQuestionCount ( chunkText ) ,
562+ questionCount : 1 , // Each chunk contains exactly one question
573563 } ) ;
574- buffer = [ ] ;
575- bufferCharCount = 0 ;
576- bufferQuestionCount = 0 ;
564+ currentChunk = [ ] ;
565+ currentQuestionFound = false ;
577566 } ;
578567
579568 for ( const line of lines ) {
580- const originalLine = line ;
581569 const trimmedLine = line . trim ( ) ;
582570 const isEmpty = trimmedLine . length === 0 ;
583571 const looksLikeQuestion = ! isEmpty && looksLikeQuestionLine ( trimmedLine ) ;
584572
585- const exceedsCharBudget = bufferCharCount + originalLine . length > options . maxChunkChars ;
586- const exceedsQuestionBudget = bufferQuestionCount >= options . maxQuestionsPerChunk ;
587-
588- if ( ( exceedsCharBudget || ( exceedsQuestionBudget && looksLikeQuestion ) ) && buffer . length ) {
573+ // If we find a new question and we already have a question in the current chunk, start a new chunk
574+ if ( looksLikeQuestion && currentQuestionFound && currentChunk . length > 0 ) {
589575 pushChunk ( ) ;
590576 }
591577
592- if ( ! isEmpty || buffer . length ) {
593- buffer . push ( originalLine ) ;
594- bufferCharCount += originalLine . length + 1 ;
578+ // Add line to current chunk (including empty lines for context)
579+ if ( ! isEmpty || currentChunk . length > 0 ) {
580+ currentChunk . push ( line ) ;
595581 }
596582
583+ // Mark that we've found a question in this chunk
597584 if ( looksLikeQuestion ) {
598- bufferQuestionCount += 1 ;
585+ currentQuestionFound = true ;
599586 }
600587 }
601588
602- pushChunk ( ) ;
589+ // Push the last chunk if it has content
590+ if ( currentChunk . length > 0 ) {
591+ pushChunk ( ) ;
592+ }
603593
594+ // If no questions were detected, return the entire content as a single chunk
604595 return chunks . length > 0
605596 ? chunks
606597 : [
0 commit comments