@@ -182,7 +182,33 @@ func (si *SearchIndexer) createIndexMapping() mapping.IndexMapping {
182182}
183183
184184// handleConfigScan processes scanned config files and indexes them
185- func (si * SearchIndexer ) handleConfigScan (configPath string , content []byte ) error {
185+ func (si * SearchIndexer ) handleConfigScan (configPath string , content []byte ) (err error ) {
186+ // Add panic recovery to prevent the entire application from crashing
187+ defer func () {
188+ if r := recover (); r != nil {
189+ err = fmt .Errorf ("panic during config scan: %v" , r )
190+ logger .Error ("Panic occurred while scanning config" , "config_path" , configPath , "content_size" , len (content ), "error" , err )
191+ }
192+ }()
193+
194+ // File size limit: 10MB to prevent memory overflow
195+ const maxFileSize = 10 * 1024 * 1024 // 10MB
196+ if len (content ) > maxFileSize {
197+ logger .Warn ("Skipping file due to size limit" , "path" , configPath , "size" , len (content ), "limit" , maxFileSize )
198+ return nil
199+ }
200+
201+ // Skip empty files
202+ if len (content ) == 0 {
203+ return nil
204+ }
205+
206+ // Basic content validation: check if it's text content
207+ if ! isTextContent (content ) {
208+ logger .Warn ("Skipping non-text file" , "path" , configPath )
209+ return nil
210+ }
211+
186212 docType := si .determineConfigType (configPath )
187213 if docType == "" {
188214 return nil // Skip unsupported file types
@@ -214,14 +240,27 @@ func (si *SearchIndexer) determineConfigType(configPath string) string {
214240}
215241
216242// IndexDocument indexes a single document
217- func (si * SearchIndexer ) IndexDocument (doc SearchDocument ) error {
243+ func (si * SearchIndexer ) IndexDocument (doc SearchDocument ) (err error ) {
244+ // Add panic recovery to prevent the entire application from crashing
245+ defer func () {
246+ if r := recover (); r != nil {
247+ err = fmt .Errorf ("panic during indexing: %v" , r )
248+ logger .Error ("Panic occurred while indexing document" , "document_id" , doc .ID , "error" , err )
249+ }
250+ }()
251+
218252 si .indexMutex .RLock ()
219253 defer si .indexMutex .RUnlock ()
220254
221255 if si .index == nil {
222256 return fmt .Errorf ("search index not initialized" )
223257 }
224258
259+ // Additional size check as a safety measure
260+ if len (doc .Content ) > 50 * 1024 * 1024 { // 50MB absolute limit
261+ return fmt .Errorf ("document content too large: %d bytes" , len (doc .Content ))
262+ }
263+
225264 // logger.Debugf("Indexing document: ID=%s, Type=%s, Name=%s, Path=%s",
226265 // doc.ID, doc.Type, doc.Name, doc.Path)
227266
@@ -497,3 +536,50 @@ func SearchConfigs(ctx context.Context, query string, limit int) ([]SearchResult
497536func SearchAll (ctx context.Context , query string , limit int ) ([]SearchResult , error ) {
498537 return GetSearchIndexer ().Search (ctx , query , limit )
499538}
539+
540+ // isTextContent checks if the content appears to be text-based
541+ // This helps prevent indexing binary files that might have been misidentified
542+ func isTextContent (content []byte ) bool {
543+ if len (content ) == 0 {
544+ return true // Empty content is considered text
545+ }
546+
547+ // Check for common binary file signatures
548+ if len (content ) >= 4 {
549+ // Check for some common binary file headers
550+ switch {
551+ case content [0 ] == 0x7F && content [1 ] == 0x45 && content [2 ] == 0x4C && content [3 ] == 0x46 : // ELF
552+ return false
553+ case content [0 ] == 0x89 && content [1 ] == 0x50 && content [2 ] == 0x4E && content [3 ] == 0x47 : // PNG
554+ return false
555+ case content [0 ] == 0xFF && content [1 ] == 0xD8 && content [2 ] == 0xFF : // JPEG
556+ return false
557+ case content [0 ] == 0x50 && content [1 ] == 0x4B && content [2 ] == 0x03 && content [3 ] == 0x04 : // ZIP
558+ return false
559+ case content [0 ] == 0x50 && content [1 ] == 0x4B && content [2 ] == 0x05 && content [3 ] == 0x06 : // ZIP (empty)
560+ return false
561+ case content [0 ] == 0x50 && content [1 ] == 0x4B && content [2 ] == 0x07 && content [3 ] == 0x08 : // ZIP (spanned)
562+ return false
563+ }
564+ }
565+
566+ // Check if the first part of the content contains mostly printable characters
567+ // Sample up to 8KB for performance
568+ sampleSize := len (content )
569+ if sampleSize > 8192 {
570+ sampleSize = 8192
571+ }
572+
573+ nonPrintableCount := 0
574+ for i := 0 ; i < sampleSize ; i ++ {
575+ b := content [i ]
576+ // Allow printable ASCII characters, newlines, tabs, and carriage returns
577+ if (b < 32 && b != 9 && b != 10 && b != 13 ) || b > 126 {
578+ nonPrintableCount ++
579+ }
580+ }
581+
582+ // If more than 30% of the sampled content is non-printable, consider it binary
583+ threshold := float64 (sampleSize ) * 0.3
584+ return float64 (nonPrintableCount ) <= threshold
585+ }
0 commit comments