@@ -709,3 +709,169 @@ describe("processCarriageReturns", () => {
709709 expect ( processCarriageReturns ( input ) ) . toBe ( expected )
710710 } )
711711} )
712+
713+ describe ( "extractTextFromFile with character limit" , ( ) => {
714+ const fs = require ( "fs/promises" )
715+ const path = require ( "path" )
716+ const os = require ( "os" )
717+
718+ let tempDir : string
719+ let testFilePath : string
720+
721+ beforeEach ( async ( ) => {
722+ tempDir = await fs . mkdtemp ( path . join ( os . tmpdir ( ) , "extract-text-test-" ) )
723+ testFilePath = path . join ( tempDir , "test.txt" )
724+ } )
725+
726+ afterEach ( async ( ) => {
727+ try {
728+ await fs . rm ( tempDir , { recursive : true , force : true } )
729+ } catch ( error ) {
730+ // Ignore cleanup errors
731+ }
732+ } )
733+
734+ it ( "should apply character limit when file content exceeds limit" , async ( ) => {
735+ // 创建一个包含大量字符的文件
736+ const longContent = "a" . repeat ( 1000 )
737+ await fs . writeFile ( testFilePath , longContent )
738+
739+ const { extractTextFromFile } = await import ( "../extract-text" )
740+ const result = await extractTextFromFile ( testFilePath , undefined , 100 )
741+
742+ // 应该被字符限制截断
743+ expect ( result . length ) . toBeLessThan ( longContent . length + 50 ) // 加上行号和截断信息
744+ expect ( result ) . toContain ( "[..." ) // 应该包含截断标识
745+ expect ( result ) . toContain ( "characters omitted...]" )
746+ } )
747+
748+ it ( "should apply character limit even when line limit is not exceeded" , async ( ) => {
749+ // 创建少量行但每行很长的文件
750+ const longLine = "x" . repeat ( 500 )
751+ const content = `${ longLine } \n${ longLine } \n${ longLine } `
752+ await fs . writeFile ( testFilePath , content )
753+
754+ const { extractTextFromFile } = await import ( "../extract-text" )
755+ const result = await extractTextFromFile ( testFilePath , 10 , 200 ) // 行数限制10,字符限制200
756+
757+ // 字符限制应该优先生效
758+ expect ( result ) . toContain ( "characters omitted" )
759+ expect ( result ) . not . toContain ( "lines omitted" )
760+ } )
761+
762+ it ( "should apply both line and character limits when line limit is exceeded first" , async ( ) => {
763+ // 创建很多短行的文件
764+ const lines = Array . from ( { length : 50 } , ( _ , i ) => `line${ i + 1 } ` )
765+ const content = lines . join ( "\n" )
766+ await fs . writeFile ( testFilePath , content )
767+
768+ const { extractTextFromFile } = await import ( "../extract-text" )
769+ const result = await extractTextFromFile ( testFilePath , 10 , 10000 ) // 行数限制10,字符限制很大
770+
771+ // 行数限制应该先生效,应该只显示行数截断信息
772+ expect ( result ) . toContain ( "showing 10 of 50 total lines" )
773+ expect ( result ) . not . toContain ( "character limit" )
774+ } )
775+
776+ it ( "should show different truncation messages for different scenarios" , async ( ) => {
777+ // 测试场景1:只有行数限制
778+ const lines = Array . from ( { length : 20 } , ( _ , i ) => `line${ i + 1 } ` )
779+ const content1 = lines . join ( "\n" )
780+ await fs . writeFile ( testFilePath , content1 )
781+
782+ const { extractTextFromFile } = await import ( "../extract-text" )
783+ const result1 = await extractTextFromFile ( testFilePath , 5 , undefined )
784+ expect ( result1 ) . toContain ( "showing 5 of 20 total lines" )
785+ expect ( result1 ) . not . toContain ( "character limit" )
786+
787+ // 测试场景2:行数限制 + 字符限制都生效
788+ const longLines = Array . from ( { length : 20 } , ( _ , i ) => `${ "x" . repeat ( 100 ) } _line${ i + 1 } ` )
789+ const content2 = longLines . join ( "\n" )
790+ await fs . writeFile ( testFilePath , content2 )
791+
792+ const result2 = await extractTextFromFile ( testFilePath , 5 , 200 )
793+ expect ( result2 ) . toContain ( "showing 5 of 20 total lines" )
794+ expect ( result2 ) . toContain ( "character limit (200)" )
795+
796+ // 测试场景3:只有字符限制
797+ const longContent = "a" . repeat ( 1000 )
798+ await fs . writeFile ( testFilePath , longContent )
799+
800+ const result3 = await extractTextFromFile ( testFilePath , undefined , 100 )
801+ expect ( result3 ) . toContain ( "characters omitted" )
802+ expect ( result3 ) . toContain ( "character limit (100)" )
803+ expect ( result3 ) . not . toContain ( "total lines" )
804+ } )
805+
806+ it ( "should not apply character limit when content is within limit" , async ( ) => {
807+ const shortContent = "short content"
808+ await fs . writeFile ( testFilePath , shortContent )
809+
810+ const { extractTextFromFile } = await import ( "../extract-text" )
811+ const result = await extractTextFromFile ( testFilePath , undefined , 1000 )
812+
813+ // 内容应该完整保留,只添加行号
814+ expect ( result ) . toBe ( "1 | short content\n" )
815+ expect ( result ) . not . toContain ( "characters omitted" )
816+ } )
817+
818+ it ( "should handle character limit with line limit when both are exceeded" , async ( ) => {
819+ // 创建很多长行的文件
820+ const longLine = "y" . repeat ( 100 )
821+ const lines = Array . from ( { length : 30 } , ( _ , i ) => `${ longLine } _${ i + 1 } ` )
822+ const content = lines . join ( "\n" )
823+ await fs . writeFile ( testFilePath , content )
824+
825+ const { extractTextFromFile } = await import ( "../extract-text" )
826+ const result = await extractTextFromFile ( testFilePath , 5 , 500 ) // 行数限制5,字符限制500
827+
828+ // 行数限制先生效,然后字符限制应用到截断后的内容
829+ expect ( result ) . toContain ( "showing 5 of 30 total lines" )
830+ // 字符限制也应该应用
831+ expect ( result ) . toContain ( "characters omitted" )
832+ } )
833+
834+ it ( "should validate maxReadCharacterLimit parameter" , async ( ) => {
835+ await fs . writeFile ( testFilePath , "test content" )
836+
837+ const { extractTextFromFile } = await import ( "../extract-text" )
838+
839+ // 测试无效的字符限制参数
840+ await expect ( extractTextFromFile ( testFilePath , undefined , 0 ) ) . rejects . toThrow (
841+ "Invalid maxReadCharacterLimit: 0. Must be a positive integer or undefined for unlimited." ,
842+ )
843+
844+ await expect ( extractTextFromFile ( testFilePath , undefined , - 1 ) ) . rejects . toThrow (
845+ "Invalid maxReadCharacterLimit: -1. Must be a positive integer or undefined for unlimited." ,
846+ )
847+ } )
848+
849+ it ( "should work correctly when maxReadCharacterLimit is undefined" , async ( ) => {
850+ const content = "test content without limit"
851+ await fs . writeFile ( testFilePath , content )
852+
853+ const { extractTextFromFile } = await import ( "../extract-text" )
854+ const result = await extractTextFromFile ( testFilePath , undefined , undefined )
855+
856+ // 应该返回完整内容加行号
857+ expect ( result ) . toBe ( "1 | test content without limit\n" )
858+ } )
859+
860+ it ( "should apply character limit to line-limited content correctly" , async ( ) => {
861+ // 创建内容,行数超限但字符数在限制内
862+ const lines = Array . from ( { length : 20 } , ( _ , i ) => `line${ i + 1 } ` )
863+ const content = lines . join ( "\n" )
864+ await fs . writeFile ( testFilePath , content )
865+
866+ const { extractTextFromFile } = await import ( "../extract-text" )
867+ const result = await extractTextFromFile ( testFilePath , 5 , 200 ) // 行数限制5,字符限制200
868+
869+ // 行数限制先生效
870+ expect ( result ) . toContain ( "showing 5 of 20 total lines" )
871+
872+ // 然后字符限制应用到结果上
873+ if ( result . length > 200 ) {
874+ expect ( result ) . toContain ( "characters omitted" )
875+ }
876+ } )
877+ } )
0 commit comments