From 96fdb9bd1607d034d5ec5c824ad9e6a58a53efd9 Mon Sep 17 00:00:00 2001 From: Neil Johari Date: Mon, 15 Sep 2025 23:58:45 -0700 Subject: [PATCH 1/3] Add debug logging and simple repro --- programs/dibio.c | 20 +++++++++++++++++++ test_allocation_bug.sh | 44 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) create mode 100755 test_allocation_bug.sh diff --git a/programs/dibio.c b/programs/dibio.c index 63c455a2216..f5f33f1bd5a 100644 --- a/programs/dibio.c +++ b/programs/dibio.c @@ -279,10 +279,16 @@ static fileStats DiB_fileStats(const char** fileNamesTable, int nbFiles, size_t for (n=0; n alloc_test/good_$i.txt +done + +echo "Valid files created (about 6 bytes each = 30 bytes total)" +echo "" + +# We need enough bad files to make totalSizeToLoad negative +# 30 bytes positive, so we need at least 31 bad files +echo "Adding 1000 non-existent files to make totalSizeToLoad very negative..." +echo "Expected: totalSizeToLoad = 30 + (1000 * -1) = -970 bytes" +echo "" + +# Build command +CMD="./zstd --train alloc_test/good_*.txt" +for i in {1..1000}; do + CMD="$CMD alloc_test/BAD_$i" +done +CMD="$CMD -o alloc_test/dict.zst --maxdict=65536 2>&1" + +echo "Running command..." +echo "=================" + +# Run and capture ALL debug output related to our issue +eval $CMD | grep -E "\[DEBUG FINAL\]|\[DEBUG\] Memory calc|\[BUG\]|About to malloc|Error|not enough memory" + +echo "" +echo "Output should show something like the following:" +echo "1. [DEBUG FINAL] fileStats: totalSizeToLoad=-970 (NEGATIVE!)" +echo "2. [BUG] totalSizeToLoad is NEGATIVE!" +echo "3. [DEBUG] Memory calc: showing huge loadedSize value" +echo "4. Error about memory allocation" From 85f4a7e84e266d9545a309ff3f25e71345d8be62 Mon Sep 17 00:00:00 2001 From: Neil Johari Date: Tue, 16 Sep 2025 00:02:04 -0700 Subject: [PATCH 2/3] Fix bug --- programs/dibio.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/programs/dibio.c b/programs/dibio.c index f5f33f1bd5a..29a27d9dbb4 100644 --- a/programs/dibio.c +++ b/programs/dibio.c @@ -282,13 +282,16 @@ static fileStats DiB_fileStats(const char** fileNamesTable, int nbFiles, size_t DISPLAYLEVEL(1, "[DEBUG] File '%s': size=%lld\n", fileNamesTable[n], (long long)fileSize); /* TODO: is there a minimum sample size? What if the file is 1-byte? */ - if (fileSize == 0) { - DISPLAYLEVEL(3, "Sample file '%s' has zero size, skipping...\n", fileNamesTable[n]); + /* Skip empty or invalid files */ + if (fileSize <= 0) { + if (fileSize < 0) { + DISPLAYLEVEL(3, "Sample file '%s' is unreadable or stat failed, skipping...\n", + fileNamesTable[n]); + } else { + DISPLAYLEVEL(3, "Sample file '%s' has zero size, skipping...\n", + fileNamesTable[n]); + } continue; - } else if (fileSize < 0) { - /* BUG: This path is NOT skipped but should be! */ - DISPLAYLEVEL(1, "[BUG] File '%s' has NEGATIVE size %lld but is NOT skipped!\n", - fileNamesTable[n], (long long)fileSize); } /* the case where we are breaking up files in sample chunks */ From 236e44f00fed5cbcd02b4936b765f3bb97ad0e98 Mon Sep 17 00:00:00 2001 From: Neil Johari Date: Tue, 16 Sep 2025 00:03:08 -0700 Subject: [PATCH 3/3] Remove debug logging --- programs/dibio.c | 16 --------------- test_allocation_bug.sh | 44 ------------------------------------------ 2 files changed, 60 deletions(-) delete mode 100755 test_allocation_bug.sh diff --git a/programs/dibio.c b/programs/dibio.c index 29a27d9dbb4..dc629103ba2 100644 --- a/programs/dibio.c +++ b/programs/dibio.c @@ -279,8 +279,6 @@ static fileStats DiB_fileStats(const char** fileNamesTable, int nbFiles, size_t for (n=0; n alloc_test/good_$i.txt -done - -echo "Valid files created (about 6 bytes each = 30 bytes total)" -echo "" - -# We need enough bad files to make totalSizeToLoad negative -# 30 bytes positive, so we need at least 31 bad files -echo "Adding 1000 non-existent files to make totalSizeToLoad very negative..." -echo "Expected: totalSizeToLoad = 30 + (1000 * -1) = -970 bytes" -echo "" - -# Build command -CMD="./zstd --train alloc_test/good_*.txt" -for i in {1..1000}; do - CMD="$CMD alloc_test/BAD_$i" -done -CMD="$CMD -o alloc_test/dict.zst --maxdict=65536 2>&1" - -echo "Running command..." -echo "=================" - -# Run and capture ALL debug output related to our issue -eval $CMD | grep -E "\[DEBUG FINAL\]|\[DEBUG\] Memory calc|\[BUG\]|About to malloc|Error|not enough memory" - -echo "" -echo "Output should show something like the following:" -echo "1. [DEBUG FINAL] fileStats: totalSizeToLoad=-970 (NEGATIVE!)" -echo "2. [BUG] totalSizeToLoad is NEGATIVE!" -echo "3. [DEBUG] Memory calc: showing huge loadedSize value" -echo "4. Error about memory allocation"