@@ -17,13 +17,13 @@ import (
1717 nf "github.com/nightfallai/nightfall-go-sdk"
1818 "github.com/nightfallai/nightfall_code_scanner/internal/clients/diffreviewer"
1919 "github.com/nightfallai/nightfall_code_scanner/internal/clients/logger"
20+ "github.com/nightfallai/nightfall_code_scanner/internal/datastructs"
2021 "github.com/nightfallai/nightfall_code_scanner/internal/nightfallconfig"
2122 "golang.org/x/text/cases"
2223 "golang.org/x/text/language"
2324)
2425
2526const (
26- contentChunkByteSize = 1024
2727 // max number of items that can be sent to Nightfall API at a time
2828 maxItemsForAPIReq = 479
2929 // timeout for the total time spent sending scan requests and receiving responses for a diff
@@ -32,6 +32,8 @@ const (
3232 maxScanAttempts = 5
3333 // initial delay before re-attempting scan request
3434 initialDelay = time .Second
35+
36+ maxAPIRequestSize = 500 * 1024 // 500KB
3537)
3638
3739// Client uses the Nightfall API to scan text for findings
@@ -73,6 +75,12 @@ type contentToScan struct {
7375 LineNumber int
7476}
7577
78+ type fileToScan struct {
79+ Content string
80+ FilePath string
81+ ContentToLineMap * datastructs.RangeMap
82+ }
83+
7684func getCommentMsg (finding * nf.Finding ) string {
7785 if finding .Finding == "" && finding .RedactedFinding == "" {
7886 return ""
@@ -175,6 +183,34 @@ func sliceListBySize(index, numItemsForMaxSize int, contentToScanList []*content
175183 return contentToScanList [startIndex :endIndex ]
176184}
177185
186+ func createCommentsFromScanRespForFiles (inputContent []* fileToScan , resp * nf.ScanTextResponse , tokenExclusionList []string ) []* diffreviewer.Comment {
187+ comments := make ([]* diffreviewer.Comment , 0 )
188+ for j , findingList := range resp .Findings {
189+ for _ , finding := range findingList {
190+ if finding .Finding != "" && ! isFindingInTokenExclusionList (finding .Finding , tokenExclusionList ) {
191+ // Found sensitive info
192+ // Create comment if fragment is not in exclusion set
193+ correspondingContent := inputContent [j ]
194+ exists , lineNumber , _ := correspondingContent .ContentToLineMap .Find (int (finding .Location .CodepointRange .Start ))
195+ if ! exists {
196+ // should not come here
197+ continue
198+ }
199+ findingMsg := getCommentMsg (finding )
200+ findingTitle := getCommentTitle (finding )
201+ c := diffreviewer.Comment {
202+ FilePath : correspondingContent .FilePath ,
203+ LineNumber : lineNumber ,
204+ Body : findingMsg ,
205+ Title : findingTitle ,
206+ }
207+ comments = append (comments , & c )
208+ }
209+ }
210+ }
211+ return comments
212+ }
213+
178214func createCommentsFromScanResp (inputContent []* contentToScan , resp * nf.ScanTextResponse , tokenExclusionList []string ) []* diffreviewer.Comment {
179215 comments := make ([]* diffreviewer.Comment , 0 )
180216 for j , findingList := range resp .Findings {
@@ -230,9 +266,9 @@ func (n *Client) buildScanRequest(items []string) *nf.ScanTextRequest {
230266 }
231267}
232268
233- func (n * Client ) scanContent (
269+ func (n * Client ) scanFileContent (
234270 ctx context.Context ,
235- cts []* contentToScan ,
271+ cts []* fileToScan ,
236272 requestNum int ,
237273 logger logger.Logger ,
238274) ([]* diffreviewer.Comment , error ) {
@@ -250,45 +286,70 @@ func (n *Client) scanContent(
250286 }
251287
252288 // Determine findings from response and create comments
253- createdComments := createCommentsFromScanResp (cts , resp , n .TokenExclusionList )
289+ createdComments := createCommentsFromScanRespForFiles (cts , resp , n .TokenExclusionList )
254290 logger .Info (fmt .Sprintf ("Got %d annotations for request #%d" , len (createdComments ), requestNum ))
255291 return createdComments , nil
256292}
257293
258- func (n * Client ) scanAllContent (
294+ func (n * Client ) scanAllFiles (
259295 ctx context.Context ,
260296 logger logger.Logger ,
261- cts []* contentToScan ,
297+ cts []* fileToScan ,
262298 commentCh chan <- []* diffreviewer.Comment ,
263299) {
264300 defer close (commentCh )
265301 blockingCh := make (chan struct {}, n .MaxNumberRoutines )
266302 var wg sync.WaitGroup
303+ requestBatches := make ([][]* fileToScan , 0 )
304+ endIndex := 0
305+ for endIndex < len (cts ) {
306+ requestBatch := make ([]* fileToScan , 0 )
307+ startIndex := endIndex
308+ currentSize := 0
309+ for {
310+ if endIndex >= len (cts ) {
311+ break
312+ }
313+ size := len (cts [endIndex ].Content )
314+ if size > maxAPIRequestSize {
315+ // file diff size is greater than supported by API platform, should not come here
316+ logger .Error ("terminating the scan early as large file scanning is not supported by API platform" )
317+ return
318+ }
319+ if (size + currentSize ) > maxAPIRequestSize {
320+ break
321+ }
267322
268- // Integer round up division
269- numRequestsRequired := (len (cts ) + maxItemsForAPIReq - 1 ) / maxItemsForAPIReq
323+ if (endIndex - startIndex ) >= maxItemsForAPIReq {
324+ break
325+ }
326+ currentSize += size
327+ endIndex ++
328+ }
329+ requestBatch = append (requestBatch , cts [startIndex :endIndex ]... )
330+ requestBatches = append (requestBatches , requestBatch )
331+ }
270332
333+ numRequestsRequired := len (requestBatches )
271334 logger .Info (fmt .Sprintf ("Sending %d requests to Nightfall API" , numRequestsRequired ))
272335 for i := 0 ; i < numRequestsRequired ; i ++ {
273336 // Use max number of items to determine content to send in request
274- contentSlice := sliceListBySize (i , maxItemsForAPIReq , cts )
275-
276337 wg .Add (1 )
277338 blockingCh <- struct {}{}
278- go func (loopCount int , cts []* contentToScan ) {
339+ go func (loopCount int , cts []* fileToScan ) {
279340 defer wg .Done ()
280341 if ctx .Err () != nil {
281342 return
282343 }
283344
284- c , err := n .scanContent (ctx , cts , loopCount + 1 , logger )
345+ c , err := n .scanFileContent (ctx , cts , loopCount + 1 , logger )
285346 if err != nil {
286347 logger .Error (fmt .Sprintf ("Unable to scan %d content items" , len (cts )))
287348 } else {
288349 commentCh <- c
289350 }
290351 <- blockingCh
291- }(i , contentSlice )
352+ }(i , requestBatches [ i ] )
292353 }
293354 wg .Wait ()
294355}
@@ -303,26 +364,25 @@ func (n *Client) Scan(ctx context.Context, items []string) (*nf.ScanTextResponse
303364// contains sensitive data
304365func (n * Client ) ReviewDiff (ctx context.Context , logger logger.Logger , fileDiffs []* diffreviewer.FileDiff ) ([]* diffreviewer.Comment , error ) {
305366 fileDiffs = filterFileDiffs (fileDiffs , n .FileInclusionList , n .FileExclusionList , logger )
306- contentToScanList := make ([]* contentToScan , 0 , len (fileDiffs ))
307- // Chunk fileDiffs content and store chunk and its metadata
367+ fileToScanList := make ([]* fileToScan , 0 , len (fileDiffs ))
368+
308369 for _ , fd := range fileDiffs {
309- for _ , hunk := range fd .Hunks {
310- for _ , line := range hunk .Lines {
311- chunkedContent , err := chunkContent (contentChunkByteSize , line , fd .PathNew )
312- if err != nil {
313- logger .Error ("Error chunking git diff" )
314- return nil , err
315- }
316- contentToScanList = append (contentToScanList , chunkedContent ... )
317- }
370+ file , err := getFileToScan (fd )
371+ if err != nil {
372+ return nil , err
318373 }
374+ if len (file .Content ) > maxAPIRequestSize {
375+ logger .Warning (fmt .Sprintf ("unable to scan file %s as its size exceeds the supported limit of %d Kbs" , file .FilePath , maxAPIRequestSize / 1024 ))
376+ continue
377+ }
378+ fileToScanList = append (fileToScanList , file )
319379 }
320380
321381 commentCh := make (chan []* diffreviewer.Comment )
322382 newCtx , cancel := context .WithDeadline (ctx , time .Now ().Add (defaultTimeout ))
323383 defer cancel ()
324384
325- go n .scanAllContent (newCtx , logger , contentToScanList , commentCh )
385+ go n .scanAllFiles (newCtx , logger , fileToScanList , commentCh )
326386
327387 comments := make ([]* diffreviewer.Comment , 0 )
328388 for {
@@ -338,6 +398,35 @@ func (n *Client) ReviewDiff(ctx context.Context, logger logger.Logger, fileDiffs
338398 }
339399}
340400
401+ func getFileToScan (fd * diffreviewer.FileDiff ) (* fileToScan , error ) {
402+ fts := & fileToScan {
403+ FilePath : fd .PathNew ,
404+ ContentToLineMap : datastructs .NewRangeMap (),
405+ }
406+
407+ bufferString := bytes .NewBufferString ("" )
408+ startCodePointRange , endCodePointRange := 0 , - 1
409+ for _ , hunk := range fd .Hunks {
410+ for _ , line := range hunk .Lines {
411+ startCodePointRange = endCodePointRange + 1
412+ // adding space between each line
413+ strToAdd := fmt .Sprintf ("%s " , line .Content )
414+ _ , err := bufferString .WriteString (strToAdd )
415+ if err != nil {
416+ return nil , err
417+ }
418+ endCodePointRange += len ([]rune (strToAdd ))
419+ err = fts .ContentToLineMap .AddRange (startCodePointRange , endCodePointRange , line .LnumNew )
420+ if err != nil {
421+ return nil , err
422+ }
423+ }
424+ }
425+
426+ fts .Content = bufferString .String ()
427+ return fts , nil
428+ }
429+
341430func filterFileDiffs (fileDiffs []* diffreviewer.FileDiff , fileIncludeList , fileExcludeList []string , logger logger.Logger ) []* diffreviewer.FileDiff {
342431 if len (fileIncludeList ) > 0 {
343432 fileDiffs = filterByFilePath (fileDiffs , fileIncludeList , true , logger )
0 commit comments