Skip to content

Commit 43426c0

Browse files
committed
fix: also strip \x00
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
1 parent 8ce78de commit 43426c0

File tree

2 files changed

+7
-5
lines changed

2 files changed

+7
-5
lines changed

rag/engine/postgres.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -481,16 +481,16 @@ func (p *PostgresDB) StoreDocuments(s []string, metadata map[string]string) ([]R
481481

482482
// Insert documents
483483
for i, content := range s {
484-
// Sanitize content to prevent invalid UTF-8 from reaching PostgreSQL
485-
content = strings.ToValidUTF8(content, " ")
484+
// Sanitize content to prevent invalid UTF-8 and null bytes from reaching PostgreSQL
485+
content = strings.ReplaceAll(strings.ToValidUTF8(content, " "), "\x00", "")
486486

487487
embedding := resp.Data[i].Embedding
488488
embeddingStr := formatVector(embedding)
489489

490490
// Extract title from metadata if available
491-
title := strings.ToValidUTF8(metadata["title"], " ")
491+
title := strings.ReplaceAll(strings.ToValidUTF8(metadata["title"], " "), "\x00", "")
492492
if title == "" {
493-
title = strings.ToValidUTF8(metadata["source"], " ")
493+
title = strings.ReplaceAll(strings.ToValidUTF8(metadata["source"], " "), "\x00", "")
494494
}
495495

496496
// Calculate word count

rag/persistency.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -476,7 +476,9 @@ func fileToText(fpath string) (string, error) {
476476
buf.ReadFrom(b)
477477
// PDF extraction can produce invalid UTF-8 byte sequences that PostgreSQL rejects.
478478
// Sanitize by replacing invalid sequences with the Unicode replacement character.
479-
return strings.ToValidUTF8(buf.String(), " "), nil
479+
text := strings.ToValidUTF8(buf.String(), " ")
480+
text = strings.ReplaceAll(text, "\x00", "")
481+
return text, nil
480482
case ".txt", ".md":
481483
f, err := os.Open(fpath)
482484
if err != nil {

0 commit comments

Comments
 (0)