@@ -10,6 +10,7 @@ import { isBinaryFile } from "isbinaryfile"
1010import { ReadFileToolUse , ToolParamName , ToolResponse } from "../../../shared/tools"
1111import { readFileTool } from "../readFileTool"
1212import { formatResponse } from "../../prompts/responses"
13+ import { tiktoken } from "../../../utils/tiktoken"
1314
1415vi . mock ( "path" , async ( ) => {
1516 const originalPath = await vi . importActual ( "path" )
@@ -35,18 +36,27 @@ vi.mock("../../../integrations/misc/read-lines")
3536let mockInputContent = ""
3637
3738// First create all the mocks
38- vi . mock ( "../../../integrations/misc/extract-text" )
39+ vi . mock ( "../../../integrations/misc/extract-text" , ( ) => ( {
40+ extractTextFromFile : vi . fn ( ) ,
41+ addLineNumbers : vi . fn ( ) ,
42+ getSupportedBinaryFormats : vi . fn ( ( ) => [ ".pdf" , ".docx" , ".ipynb" ] ) ,
43+ } ) )
3944vi . mock ( "../../../services/tree-sitter" )
45+ vi . mock ( "../../../utils/tiktoken" )
46+
47+ // Import the mocked functions
48+ import { addLineNumbers , getSupportedBinaryFormats } from "../../../integrations/misc/extract-text"
4049
4150// Then create the mock functions
42- const addLineNumbersMock = vi . fn ( ) . mockImplementation ( ( text , startLine = 1 ) => {
51+ const addLineNumbersMock = vi . mocked ( addLineNumbers )
52+ addLineNumbersMock . mockImplementation ( ( text : string , startLine = 1 ) => {
4353 if ( ! text ) return ""
4454 const lines = typeof text === "string" ? text . split ( "\n" ) : [ text ]
45- return lines . map ( ( line , i ) => `${ startLine + i } | ${ line } ` ) . join ( "\n" )
55+ return lines . map ( ( line : string , i : number ) => `${ startLine + i } | ${ line } ` ) . join ( "\n" )
4656} )
4757
48- const extractTextFromFileMock = vi . fn ( )
49- const getSupportedBinaryFormatsMock = vi . fn ( ( ) => [ ".pdf" , ".docx" , ".ipynb" ] )
58+ const extractTextFromFileMock = vi . mocked ( extractTextFromFile )
59+ const getSupportedBinaryFormatsMock = vi . mocked ( getSupportedBinaryFormats )
5060
5161vi . mock ( "../../ignore/RooIgnoreController" , ( ) => ( {
5262 RooIgnoreController : class {
@@ -520,3 +530,317 @@ describe("read_file tool XML output structure", () => {
520530 } )
521531 } )
522532} )
533+
534+ describe ( "read_file tool with large file safeguard" , ( ) => {
535+ // Test data
536+ const testFilePath = "test/largefile.txt"
537+ const absoluteFilePath = "/test/largefile.txt"
538+
539+ // Mocked functions
540+ const mockedCountFileLines = vi . mocked ( countFileLines )
541+ const mockedReadLines = vi . mocked ( readLines )
542+ const mockedExtractTextFromFile = vi . mocked ( extractTextFromFile )
543+ const mockedIsBinaryFile = vi . mocked ( isBinaryFile )
544+ const mockedPathResolve = vi . mocked ( path . resolve )
545+ const mockedTiktoken = vi . mocked ( tiktoken )
546+
547+ const mockCline : any = { }
548+ let mockProvider : any
549+ let toolResult : ToolResponse | undefined
550+
551+ beforeEach ( ( ) => {
552+ vi . clearAllMocks ( )
553+
554+ mockedPathResolve . mockReturnValue ( absoluteFilePath )
555+ mockedIsBinaryFile . mockResolvedValue ( false )
556+
557+ mockProvider = {
558+ getState : vi . fn ( ) ,
559+ deref : vi . fn ( ) . mockReturnThis ( ) ,
560+ }
561+
562+ mockCline . cwd = "/"
563+ mockCline . task = "Test"
564+ mockCline . providerRef = mockProvider
565+ mockCline . rooIgnoreController = {
566+ validateAccess : vi . fn ( ) . mockReturnValue ( true ) ,
567+ }
568+ mockCline . say = vi . fn ( ) . mockResolvedValue ( undefined )
569+ mockCline . ask = vi . fn ( ) . mockResolvedValue ( { response : "yesButtonClicked" } )
570+ mockCline . fileContextTracker = {
571+ trackFileContext : vi . fn ( ) . mockResolvedValue ( undefined ) ,
572+ }
573+ mockCline . recordToolUsage = vi . fn ( ) . mockReturnValue ( undefined )
574+ mockCline . recordToolError = vi . fn ( ) . mockReturnValue ( undefined )
575+
576+ toolResult = undefined
577+ } )
578+
579+ async function executeReadFileTool (
580+ params : Partial < ReadFileToolUse [ "params" ] > = { } ,
581+ options : {
582+ maxReadFileLine ?: number
583+ totalLines ?: number
584+ tokenCount ?: number
585+ } = { } ,
586+ ) : Promise < ToolResponse | undefined > {
587+ const maxReadFileLine = options . maxReadFileLine ?? - 1
588+ const totalLines = options . totalLines ?? 5
589+ const tokenCount = options . tokenCount ?? 100
590+
591+ mockProvider . getState . mockResolvedValue ( { maxReadFileLine } )
592+ mockedCountFileLines . mockResolvedValue ( totalLines )
593+ mockedTiktoken . mockResolvedValue ( tokenCount )
594+
595+ const argsContent = `<file><path>${ testFilePath } </path></file>`
596+
597+ const toolUse : ReadFileToolUse = {
598+ type : "tool_use" ,
599+ name : "read_file" ,
600+ params : { args : argsContent , ...params } ,
601+ partial : false ,
602+ }
603+
604+ await readFileTool (
605+ mockCline ,
606+ toolUse ,
607+ mockCline . ask ,
608+ vi . fn ( ) ,
609+ ( result : ToolResponse ) => {
610+ toolResult = result
611+ } ,
612+ ( _ : ToolParamName , content ?: string ) => content ?? "" ,
613+ )
614+
615+ return toolResult
616+ }
617+
618+ describe ( "when file has many lines and high token count" , ( ) => {
619+ it ( "should apply safeguard and read only first 2000 lines" , async ( ) => {
620+ // Setup - large file with high token count
621+ const largeFileContent = Array ( 1500 ) . fill ( "This is a line of text" ) . join ( "\n" )
622+ const partialContent = Array ( 2000 ) . fill ( "This is a line of text" ) . join ( "\n" )
623+
624+ mockedExtractTextFromFile . mockResolvedValue ( largeFileContent )
625+ mockedReadLines . mockResolvedValue ( partialContent )
626+
627+ // Setup addLineNumbers mock for this test
628+ addLineNumbersMock . mockImplementation ( ( text : string ) => {
629+ const lines = text . split ( "\n" )
630+ return lines . map ( ( line : string , i : number ) => `${ i + 1 } | ${ line } ` ) . join ( "\n" )
631+ } )
632+
633+ // Execute with high line count and token count
634+ const result = await executeReadFileTool (
635+ { } ,
636+ {
637+ maxReadFileLine : - 1 ,
638+ totalLines : 1500 ,
639+ tokenCount : 60000 , // Above threshold
640+ } ,
641+ )
642+
643+ // Verify safeguard was applied
644+ expect ( mockedTiktoken ) . toHaveBeenCalled ( )
645+ expect ( mockedReadLines ) . toHaveBeenCalledWith ( absoluteFilePath , 1999 , 0 )
646+
647+ // Verify the result contains the safeguard notice
648+ expect ( result ) . toContain ( "<notice>This file contains 1500 lines and approximately 60,000 tokens" )
649+ expect ( result ) . toContain ( "Showing only the first 2000 lines to preserve context space" )
650+ expect ( result ) . toContain ( `<content lines="1-2000">` )
651+ } )
652+
653+ it ( "should not apply safeguard when token count is below threshold" , async ( ) => {
654+ // Setup - large file but with low token count
655+ const fileContent = Array ( 1500 ) . fill ( "Short" ) . join ( "\n" )
656+ const numberedContent = fileContent
657+ . split ( "\n" )
658+ . map ( ( line , i ) => `${ i + 1 } | ${ line } ` )
659+ . join ( "\n" )
660+
661+ mockedExtractTextFromFile . mockImplementation ( ( ) => Promise . resolve ( numberedContent ) )
662+
663+ // Execute with high line count but low token count
664+ const result = await executeReadFileTool (
665+ { } ,
666+ {
667+ maxReadFileLine : - 1 ,
668+ totalLines : 1500 ,
669+ tokenCount : 30000 , // Below threshold
670+ } ,
671+ )
672+
673+ // Verify safeguard was NOT applied
674+ expect ( mockedTiktoken ) . toHaveBeenCalled ( )
675+ expect ( mockedReadLines ) . not . toHaveBeenCalled ( )
676+ expect ( mockedExtractTextFromFile ) . toHaveBeenCalled ( )
677+
678+ // Verify no safeguard notice
679+ expect ( result ) . not . toContain ( "preserve context space" )
680+ expect ( result ) . toContain ( `<content lines="1-1500">` )
681+ } )
682+
683+ it ( "should not apply safeguard for files under 1000 lines" , async ( ) => {
684+ // Setup - file with less than 1000 lines
685+ const fileContent = Array ( 999 ) . fill ( "This is a line of text" ) . join ( "\n" )
686+ const numberedContent = fileContent
687+ . split ( "\n" )
688+ . map ( ( line , i ) => `${ i + 1 } | ${ line } ` )
689+ . join ( "\n" )
690+
691+ mockedExtractTextFromFile . mockImplementation ( ( ) => Promise . resolve ( numberedContent ) )
692+
693+ // Execute
694+ const result = await executeReadFileTool (
695+ { } ,
696+ {
697+ maxReadFileLine : - 1 ,
698+ totalLines : 999 ,
699+ tokenCount : 100000 , // Even with high token count
700+ } ,
701+ )
702+
703+ // Verify tiktoken was NOT called (optimization)
704+ expect ( mockedTiktoken ) . not . toHaveBeenCalled ( )
705+ expect ( mockedReadLines ) . not . toHaveBeenCalled ( )
706+ expect ( mockedExtractTextFromFile ) . toHaveBeenCalled ( )
707+
708+ // Verify no safeguard notice
709+ expect ( result ) . not . toContain ( "preserve context space" )
710+ expect ( result ) . toContain ( `<content lines="1-999">` )
711+ } )
712+
713+ it ( "should apply safeguard for very large files even if token counting fails" , async ( ) => {
714+ // Setup - very large file and token counting fails
715+ const partialContent = Array ( 2000 ) . fill ( "This is a line of text" ) . join ( "\n" )
716+
717+ mockedExtractTextFromFile . mockResolvedValue ( "Large content" )
718+ mockedReadLines . mockResolvedValue ( partialContent )
719+
720+ // Setup addLineNumbers mock for partial content
721+ addLineNumbersMock . mockImplementation ( ( text : string ) => {
722+ const lines = text . split ( "\n" )
723+ return lines . map ( ( line : string , i : number ) => `${ i + 1 } | ${ line } ` ) . join ( "\n" )
724+ } )
725+
726+ // Set up the provider state
727+ mockProvider . getState . mockResolvedValue ( { maxReadFileLine : - 1 } )
728+ mockedCountFileLines . mockResolvedValue ( 6000 )
729+
730+ // IMPORTANT: Set up tiktoken to reject AFTER other mocks are set
731+ mockedTiktoken . mockRejectedValue ( new Error ( "Token counting failed" ) )
732+
733+ const argsContent = `<file><path>${ testFilePath } </path></file>`
734+
735+ const toolUse : ReadFileToolUse = {
736+ type : "tool_use" ,
737+ name : "read_file" ,
738+ params : { args : argsContent } ,
739+ partial : false ,
740+ }
741+
742+ await readFileTool (
743+ mockCline ,
744+ toolUse ,
745+ mockCline . ask ,
746+ vi . fn ( ) ,
747+ ( result : ToolResponse ) => {
748+ toolResult = result
749+ } ,
750+ ( _ : ToolParamName , content ?: string ) => content ?? "" ,
751+ )
752+
753+ // Verify safeguard was applied despite token counting failure
754+ expect ( mockedTiktoken ) . toHaveBeenCalled ( )
755+ expect ( mockedReadLines ) . toHaveBeenCalledWith ( absoluteFilePath , 1999 , 0 )
756+
757+ // Verify the result contains the safeguard notice (without token count)
758+ expect ( toolResult ) . toContain ( "<notice>This file contains 6000 lines" )
759+ expect ( toolResult ) . toContain ( "Showing only the first 2000 lines to preserve context space" )
760+ expect ( toolResult ) . toContain ( `<content lines="1-2000">` )
761+ } )
762+
763+ it ( "should not apply safeguard when maxReadFileLine is not -1" , async ( ) => {
764+ // Setup
765+ const fileContent = Array ( 2000 ) . fill ( "This is a line of text" ) . join ( "\n" )
766+ mockedExtractTextFromFile . mockResolvedValue ( fileContent )
767+
768+ // Execute with maxReadFileLine = 500 (not -1)
769+ const result = await executeReadFileTool (
770+ { } ,
771+ {
772+ maxReadFileLine : 500 ,
773+ totalLines : 2000 ,
774+ tokenCount : 100000 ,
775+ } ,
776+ )
777+
778+ // Verify tiktoken was NOT called
779+ expect ( mockedTiktoken ) . not . toHaveBeenCalled ( )
780+
781+ // The normal maxReadFileLine logic should apply
782+ expect ( mockedReadLines ) . toHaveBeenCalled ( )
783+ } )
784+
785+ it ( "should handle line ranges correctly with safeguard" , async ( ) => {
786+ // When line ranges are specified, safeguard should not apply
787+ const rangeContent = "Line 100\nLine 101\nLine 102"
788+ mockedReadLines . mockResolvedValue ( rangeContent )
789+
790+ const argsContent = `<file><path>${ testFilePath } </path><line_range>100-102</line_range></file>`
791+
792+ const toolUse : ReadFileToolUse = {
793+ type : "tool_use" ,
794+ name : "read_file" ,
795+ params : { args : argsContent } ,
796+ partial : false ,
797+ }
798+
799+ mockProvider . getState . mockResolvedValue ( { maxReadFileLine : - 1 } )
800+ mockedCountFileLines . mockResolvedValue ( 10000 )
801+
802+ await readFileTool (
803+ mockCline ,
804+ toolUse ,
805+ mockCline . ask ,
806+ vi . fn ( ) ,
807+ ( result : ToolResponse ) => {
808+ toolResult = result
809+ } ,
810+ ( _ : ToolParamName , content ?: string ) => content ?? "" ,
811+ )
812+
813+ // Verify tiktoken was NOT called for range reads
814+ expect ( mockedTiktoken ) . not . toHaveBeenCalled ( )
815+ expect ( toolResult ) . toContain ( `<content lines="100-102">` )
816+ expect ( toolResult ) . not . toContain ( "preserve context space" )
817+ } )
818+ } )
819+
820+ describe ( "safeguard thresholds" , ( ) => {
821+ it ( "should use correct thresholds for line count and token count" , async ( ) => {
822+ // Test boundary conditions
823+
824+ // Just below line threshold - no token check
825+ await executeReadFileTool ( { } , { totalLines : 1000 , maxReadFileLine : - 1 } )
826+ expect ( mockedTiktoken ) . not . toHaveBeenCalled ( )
827+
828+ // Just above line threshold - token check performed
829+ vi . clearAllMocks ( )
830+ mockedExtractTextFromFile . mockResolvedValue ( "content" )
831+ await executeReadFileTool ( { } , { totalLines : 1001 , maxReadFileLine : - 1 , tokenCount : 40000 } )
832+ expect ( mockedTiktoken ) . toHaveBeenCalled ( )
833+
834+ // Token count just below threshold - no safeguard
835+ expect ( toolResult ) . not . toContain ( "preserve context space" )
836+
837+ // Token count just above threshold - safeguard applied
838+ vi . clearAllMocks ( )
839+ mockedExtractTextFromFile . mockResolvedValue ( "content" )
840+ mockedReadLines . mockResolvedValue ( "partial content" )
841+ await executeReadFileTool ( { } , { totalLines : 1001 , maxReadFileLine : - 1 , tokenCount : 50001 } )
842+ expect ( mockedReadLines ) . toHaveBeenCalled ( )
843+ expect ( toolResult ) . toContain ( "preserve context space" )
844+ } )
845+ } )
846+ } )
0 commit comments