@@ -7,14 +7,6 @@ import { ICodeParser, CodeBlock } from "../interfaces"
77import { scannerExtensions } from "../shared/supported-extensions"
88import { MAX_BLOCK_CHARS , MIN_BLOCK_CHARS , MIN_CHUNK_REMAINDER_CHARS , MAX_CHARS_TOLERANCE_FACTOR } from "../constants"
99
10- /**
11- * Language-specific minimum block character thresholds
12- */
13- const LANGUAGE_THRESHOLDS : Record < string , number > = {
14- go : 50 , // Go has concise syntax
15- default : MIN_BLOCK_CHARS , // Default for other languages (100)
16- }
17-
1810/**
1911 * Implementation of the code parser interface
2012 */
@@ -75,15 +67,6 @@ export class CodeParser implements ICodeParser {
7567 return scannerExtensions . includes ( extension )
7668 }
7769
78- /**
79- * Gets the minimum block character threshold for a language
80- * @param language Language identifier
81- * @returns Minimum character threshold
82- */
83- private getMinBlockChars ( language : string ) : number {
84- return LANGUAGE_THRESHOLDS [ language ] || LANGUAGE_THRESHOLDS . default
85- }
86-
8770 /**
8871 * Creates a hash for a file
8972 * @param content File content
@@ -103,7 +86,6 @@ export class CodeParser implements ICodeParser {
10386 private async parseContent ( filePath : string , content : string , fileHash : string ) : Promise < CodeBlock [ ] > {
10487 const ext = path . extname ( filePath ) . slice ( 1 ) . toLowerCase ( )
10588 const seenSegmentHashes = new Set < string > ( )
106- const minBlockChars = this . getMinBlockChars ( ext )
10789
10890 // Check if we already have the parser loaded
10991 if ( ! this . loadedParsers [ ext ] ) {
@@ -146,15 +128,9 @@ export class CodeParser implements ICodeParser {
146128
147129 // Check if captures are empty
148130 if ( captures . length === 0 ) {
149- if ( content . length >= minBlockChars ) {
131+ if ( content . length >= MIN_BLOCK_CHARS ) {
150132 // Perform fallback chunking if content is large enough
151- const blocks = this . _performFallbackChunking (
152- filePath ,
153- content ,
154- fileHash ,
155- seenSegmentHashes ,
156- minBlockChars ,
157- )
133+ const blocks = this . _performFallbackChunking ( filePath , content , fileHash , seenSegmentHashes )
158134 return blocks
159135 } else {
160136 // Return empty if content is too small for fallback
@@ -172,7 +148,7 @@ export class CodeParser implements ICodeParser {
172148 // const lineSpan = currentNode.endPosition.row - currentNode.startPosition.row + 1 // Removed as per lint error
173149
174150 // Check if the node meets the minimum character requirement
175- if ( currentNode . text . length >= minBlockChars ) {
151+ if ( currentNode . text . length >= MIN_BLOCK_CHARS ) {
176152 // If it also exceeds the maximum character limit, try to break it down
177153 if ( currentNode . text . length > MAX_BLOCK_CHARS * MAX_CHARS_TOLERANCE_FACTOR ) {
178154 if ( currentNode . children . filter ( ( child ) => child !== null ) . length > 0 ) {
@@ -185,7 +161,6 @@ export class CodeParser implements ICodeParser {
185161 filePath ,
186162 fileHash ,
187163 seenSegmentHashes ,
188- minBlockChars ,
189164 )
190165 results . push ( ...chunkedBlocks )
191166 }
@@ -233,7 +208,6 @@ export class CodeParser implements ICodeParser {
233208 fileHash : string ,
234209 chunkType : string ,
235210 seenSegmentHashes : Set < string > ,
236- minBlockChars : number ,
237211 baseStartLine : number = 1 , // 1-based start line of the *first* line in the `lines` array
238212 ) : CodeBlock [ ] {
239213 const chunks : CodeBlock [ ] = [ ]
@@ -243,7 +217,7 @@ export class CodeParser implements ICodeParser {
243217 const effectiveMaxChars = MAX_BLOCK_CHARS * MAX_CHARS_TOLERANCE_FACTOR
244218
245219 const finalizeChunk = ( endLineIndex : number ) => {
246- if ( currentChunkLength >= minBlockChars && currentChunkLines . length > 0 ) {
220+ if ( currentChunkLength >= MIN_BLOCK_CHARS && currentChunkLines . length > 0 ) {
247221 const chunkContent = currentChunkLines . join ( "\n" )
248222 const startLine = baseStartLine + chunkStartLineIndex
249223 const endLine = baseStartLine + endLineIndex
@@ -324,7 +298,7 @@ export class CodeParser implements ICodeParser {
324298 }
325299
326300 if (
327- currentChunkLength >= minBlockChars &&
301+ currentChunkLength >= MIN_BLOCK_CHARS &&
328302 remainderLength < MIN_CHUNK_REMAINDER_CHARS &&
329303 currentChunkLines . length > 1
330304 ) {
@@ -335,7 +309,7 @@ export class CodeParser implements ICodeParser {
335309 const potentialNextChunkLength = potentialNextChunkLines . join ( "\n" ) . length + 1
336310
337311 if (
338- potentialChunkLength >= minBlockChars &&
312+ potentialChunkLength >= MIN_BLOCK_CHARS &&
339313 potentialNextChunkLength >= MIN_CHUNK_REMAINDER_CHARS
340314 ) {
341315 splitIndex = k
@@ -372,18 +346,16 @@ export class CodeParser implements ICodeParser {
372346 content : string ,
373347 fileHash : string ,
374348 seenSegmentHashes : Set < string > ,
375- minBlockChars : number ,
376349 ) : CodeBlock [ ] {
377350 const lines = content . split ( "\n" )
378- return this . _chunkTextByLines ( lines , filePath , fileHash , "fallback_chunk" , seenSegmentHashes , minBlockChars )
351+ return this . _chunkTextByLines ( lines , filePath , fileHash , "fallback_chunk" , seenSegmentHashes )
379352 }
380353
381354 private _chunkLeafNodeByLines (
382355 node : Node ,
383356 filePath : string ,
384357 fileHash : string ,
385358 seenSegmentHashes : Set < string > ,
386- minBlockChars : number ,
387359 ) : CodeBlock [ ] {
388360 const lines = node . text . split ( "\n" )
389361 const baseStartLine = node . startPosition . row + 1
@@ -393,7 +365,6 @@ export class CodeParser implements ICodeParser {
393365 fileHash ,
394366 node . type , // Use the node's type
395367 seenSegmentHashes ,
396- minBlockChars ,
397368 baseStartLine ,
398369 )
399370 }
0 commit comments