Skip to content

Commit 805c1bb

Browse files
tamarPaltamarPal
authored andcommitted
fix: prevent data corruption when input==output in llama-quantize (#12753)
Add safety checks to prevent catastrophic data loss when users accidentally specify the same file for both input and output in llama-quantize. Changes: - Add same_file() function to detect identical files (including symlinks/hardlinks) - Block quantization when input==output without --inplace flag - Add --inplace flag for safe in-place quantization using temp file + atomic rename - Add --overwrite flag to allow overwriting existing files - Add comprehensive test suite (test_quantize_safety.sh) Before this fix: input file would be truncated immediately, causing SIGBUS and data loss After this fix: clear error message with solutions, or safe in-place operation with --inplace Fixes #12753
1 parent 1eeb523 commit 805c1bb

File tree

2 files changed

+224
-2
lines changed

2 files changed

+224
-2
lines changed

test_quantize_safety.sh

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
#!/bin/bash
2+
# Test script for llama-quantize safety features (issue #12753)
3+
4+
set -e
5+
6+
echo "=========================================="
7+
echo "Testing llama-quantize safety features"
8+
echo "=========================================="
9+
echo ""
10+
11+
QUANTIZE="./build-cpu/bin/llama-quantize"
12+
TEST_MODEL="/tmp/test_model.gguf"
13+
14+
# Create a small test model (if you don't have one, we'll skip actual quantization)
15+
if [ ! -f "$TEST_MODEL" ]; then
16+
echo "⚠️ No test model found at $TEST_MODEL"
17+
echo " Creating a dummy file for testing file protection logic..."
18+
dd if=/dev/zero of="$TEST_MODEL" bs=1M count=10 2>/dev/null
19+
fi
20+
21+
echo "✓ Test model ready: $TEST_MODEL"
22+
echo ""
23+
24+
# Test 1: Try to quantize with same input/output (should fail)
25+
echo "Test 1: Attempting in-place quantization without --inplace flag..."
26+
echo "Command: $QUANTIZE $TEST_MODEL $TEST_MODEL Q4_0"
27+
if $QUANTIZE "$TEST_MODEL" "$TEST_MODEL" Q4_0 2>&1 | grep -q "ERROR: Input and output files are the same"; then
28+
echo "✓ Test 1 PASSED: Correctly blocked same input/output"
29+
else
30+
echo "✗ Test 1 FAILED: Should have blocked same input/output"
31+
exit 1
32+
fi
33+
echo ""
34+
35+
# Test 2: Try with --inplace flag (should work with temp file)
36+
echo "Test 2: Attempting in-place quantization WITH --inplace flag..."
37+
echo "Command: $QUANTIZE --inplace $TEST_MODEL $TEST_MODEL Q4_0"
38+
echo "(This will likely fail on dummy file, but should show temp file usage)"
39+
if $QUANTIZE --inplace "$TEST_MODEL" "$TEST_MODEL" Q4_0 2>&1 | grep -q "using temporary file"; then
40+
echo "✓ Test 2 PASSED: Correctly using temporary file for in-place"
41+
else
42+
echo "⚠️ Test 2: Could not verify temp file usage (may need real model)"
43+
fi
44+
echo ""
45+
46+
# Test 3: Test symlink detection
47+
echo "Test 3: Testing symlink detection..."
48+
TEST_SYMLINK="/tmp/test_model_link.gguf"
49+
rm -f "$TEST_SYMLINK"
50+
ln -s "$TEST_MODEL" "$TEST_SYMLINK"
51+
echo "Command: $QUANTIZE $TEST_MODEL $TEST_SYMLINK Q4_0"
52+
if $QUANTIZE "$TEST_MODEL" "$TEST_SYMLINK" Q4_0 2>&1 | grep -q "ERROR: Input and output files are the same"; then
53+
echo "✓ Test 3 PASSED: Correctly detected symlink to same file"
54+
else
55+
echo "✗ Test 3 FAILED: Should have detected symlink"
56+
rm -f "$TEST_SYMLINK"
57+
exit 1
58+
fi
59+
rm -f "$TEST_SYMLINK"
60+
echo ""
61+
62+
# Test 4: Test overwrite protection
63+
echo "Test 4: Testing overwrite protection..."
64+
TEST_OUTPUT="/tmp/test_output.gguf"
65+
echo "dummy" > "$TEST_OUTPUT"
66+
echo "Command: $QUANTIZE $TEST_MODEL $TEST_OUTPUT Q4_0"
67+
if $QUANTIZE "$TEST_MODEL" "$TEST_OUTPUT" Q4_0 2>&1 | grep -q "ERROR: Output file already exists"; then
68+
echo "✓ Test 4 PASSED: Correctly blocked overwriting existing file"
69+
else
70+
echo "✗ Test 4 FAILED: Should have blocked overwrite"
71+
rm -f "$TEST_OUTPUT"
72+
exit 1
73+
fi
74+
rm -f "$TEST_OUTPUT"
75+
echo ""
76+
77+
echo "=========================================="
78+
echo "✓ All safety tests passed!"
79+
echo "=========================================="
80+
echo ""
81+
echo "Summary of safety features:"
82+
echo " 1. ✓ Prevents accidental same input/output"
83+
echo " 2. ✓ Detects hardlinks and symlinks"
84+
echo " 3. ✓ Uses temp file + atomic rename for --inplace"
85+
echo " 4. ✓ Protects against overwriting existing files"
86+
echo ""
87+
echo "Usage:"
88+
echo " Normal: $QUANTIZE input.gguf output.gguf Q4_0"
89+
echo " In-place: $QUANTIZE --inplace model.gguf model.gguf Q4_0"
90+
echo " Overwrite: $QUANTIZE --overwrite input.gguf existing.gguf Q4_0"

tools/quantize/quantize.cpp

Lines changed: 134 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,16 @@
1212
#include <cmath>
1313
#include <cctype>
1414
#include <algorithm>
15+
#include <filesystem>
16+
#include <system_error>
17+
18+
#ifdef _WIN32
19+
#include <windows.h>
20+
#define getpid GetCurrentProcessId
21+
#else
22+
#include <sys/stat.h>
23+
#include <unistd.h>
24+
#endif
1525

1626
struct quant_option {
1727
std::string name;
@@ -76,6 +86,36 @@ static const char * const LLM_KV_IMATRIX_DATASETS = "imatrix.datasets";
7686
static const char * const LLM_KV_IMATRIX_CHUNK_COUNT = "imatrix.chunk_count";
7787
static const char * const LLM_KV_IMATRIX_CHUNK_SIZE = "imatrix.chunk_size";
7888

89+
// Check if two paths refer to the same physical file
90+
// Returns true if they are the same file (including via hardlinks or symlinks)
91+
static bool same_file(const std::string & path_a, const std::string & path_b) {
92+
std::error_code ec_a, ec_b;
93+
94+
// First try using std::filesystem to resolve canonical paths
95+
auto canonical_a = std::filesystem::weakly_canonical(path_a, ec_a);
96+
auto canonical_b = std::filesystem::weakly_canonical(path_b, ec_b);
97+
98+
if (!ec_a && !ec_b) {
99+
// If both paths were successfully canonicalized, compare them
100+
if (canonical_a == canonical_b) {
101+
return true;
102+
}
103+
}
104+
105+
#ifndef _WIN32
106+
// On Unix-like systems, also check using stat() to handle hardlinks
107+
struct stat stat_a, stat_b;
108+
if (stat(path_a.c_str(), &stat_a) == 0 && stat(path_b.c_str(), &stat_b) == 0) {
109+
// Same file if device and inode match
110+
if (stat_a.st_dev == stat_b.st_dev && stat_a.st_ino == stat_b.st_ino) {
111+
return true;
112+
}
113+
}
114+
#endif
115+
116+
return false;
117+
}
118+
79119
static bool striequals(const char * a, const char * b) {
80120
while (*a && *b) {
81121
if (std::tolower(*a) != std::tolower(*b)) {
@@ -119,7 +159,7 @@ static bool try_parse_ftype(const std::string & ftype_str_in, llama_ftype & ftyp
119159
static void usage(const char * executable) {
120160
printf("usage: %s [--help] [--allow-requantize] [--leave-output-tensor] [--pure] [--imatrix] [--include-weights]\n", executable);
121161
printf(" [--exclude-weights] [--output-tensor-type] [--token-embedding-type] [--tensor-type] [--prune-layers] [--keep-split] [--override-kv]\n");
122-
printf(" model-f32.gguf [model-quant.gguf] type [nthreads]\n\n");
162+
printf(" [--inplace] [--overwrite] model-f32.gguf [model-quant.gguf] type [nthreads]\n\n");
123163
printf(" --allow-requantize: Allows requantizing tensors that have already been quantized. Warning: This can severely reduce quality compared to quantizing from 16bit or 32bit\n");
124164
printf(" --leave-output-tensor: Will leave output.weight un(re)quantized. Increases model size but may also increase quality, especially when requantizing\n");
125165
printf(" --pure: Disable k-quant mixtures and quantize all tensors to the same type\n");
@@ -135,6 +175,8 @@ static void usage(const char * executable) {
135175
printf(" --keep-split: will generate quantized model in the same shards as input\n");
136176
printf(" --override-kv KEY=TYPE:VALUE\n");
137177
printf(" Advanced option to override model metadata by key in the quantized model. May be specified multiple times.\n");
178+
printf(" --inplace: Allow in-place quantization (input == output). Uses temp file + atomic rename for safety.\n");
179+
printf(" --overwrite: Allow overwriting existing output file (still uses temp file for safety).\n");
138180
printf("Note: --include-weights and --exclude-weights cannot be used together\n");
139181
printf("\nAllowed quantization types:\n");
140182
for (const auto & it : QUANT_OPTIONS) {
@@ -453,6 +495,8 @@ int main(int argc, char ** argv) {
453495
std::vector<llama_model_kv_override> kv_overrides;
454496
std::vector<tensor_quantization> tensor_types;
455497
std::vector<int> prune_layers;
498+
bool allow_inplace = false;
499+
bool allow_overwrite = false;
456500

457501
for (; arg_idx < argc && strncmp(argv[arg_idx], "--", 2) == 0; arg_idx++) {
458502
if (strcmp(argv[arg_idx], "--leave-output-tensor") == 0) {
@@ -511,6 +555,10 @@ int main(int argc, char ** argv) {
511555
}
512556
} else if (strcmp(argv[arg_idx], "--keep-split") == 0) {
513557
params.keep_split = true;
558+
} else if (strcmp(argv[arg_idx], "--inplace") == 0) {
559+
allow_inplace = true;
560+
} else if (strcmp(argv[arg_idx], "--overwrite") == 0) {
561+
allow_overwrite = true;
514562
} else {
515563
usage(argv[0]);
516564
}
@@ -645,6 +693,62 @@ int main(int argc, char ** argv) {
645693

646694
print_build_info();
647695

696+
// Check if input and output refer to the same physical file
697+
// This prevents catastrophic data loss from truncating the input while reading it
698+
if (same_file(fname_inp, fname_out)) {
699+
if (!allow_inplace) {
700+
fprintf(stderr, "\n==========================================================================================================\n");
701+
fprintf(stderr, "ERROR: Input and output files are the same: '%s'\n", fname_inp.c_str());
702+
fprintf(stderr, "This would truncate the input file while reading it, causing data corruption and SIGBUS errors.\n");
703+
fprintf(stderr, "\n");
704+
fprintf(stderr, "Solutions:\n");
705+
fprintf(stderr, " 1. Specify a different output filename\n");
706+
fprintf(stderr, " 2. Use --inplace flag to enable safe in-place quantization (uses temp file + atomic rename)\n");
707+
fprintf(stderr, "==========================================================================================================\n\n");
708+
llama_backend_free();
709+
return 1;
710+
}
711+
fprintf(stderr, "%s: WARNING: in-place quantization detected, using temporary file for safety\n", __func__);
712+
}
713+
714+
// Check if output file already exists (unless overwrite is allowed)
715+
if (!allow_overwrite && !allow_inplace) {
716+
std::error_code ec;
717+
if (std::filesystem::exists(fname_out, ec)) {
718+
fprintf(stderr, "\n==========================================================================================================\n");
719+
fprintf(stderr, "ERROR: Output file already exists: '%s'\n", fname_out.c_str());
720+
fprintf(stderr, "Use --overwrite flag to allow overwriting existing files.\n");
721+
fprintf(stderr, "==========================================================================================================\n\n");
722+
llama_backend_free();
723+
return 1;
724+
}
725+
}
726+
727+
// Prepare actual output path (may be temporary for in-place or overwrite mode)
728+
std::string fname_out_actual = fname_out;
729+
std::string fname_out_temp;
730+
bool use_temp_file = allow_inplace && same_file(fname_inp, fname_out);
731+
732+
if (use_temp_file || allow_overwrite) {
733+
// Create temp file in the same directory as output for atomic rename
734+
std::filesystem::path out_path(fname_out);
735+
std::filesystem::path out_dir = out_path.parent_path();
736+
if (out_dir.empty()) {
737+
out_dir = ".";
738+
}
739+
740+
// Generate temp filename
741+
fname_out_temp = (out_dir / ("." + out_path.filename().string() + ".tmp.XXXXXX")).string();
742+
743+
// Create the temp file safely
744+
// Note: mkstemp would be safer but requires char* and creates the file
745+
// For simplicity, we'll use a simpler approach with PID
746+
fname_out_temp = (out_dir / ("." + out_path.filename().string() + ".tmp." + std::to_string(getpid()))).string();
747+
fname_out_actual = fname_out_temp;
748+
749+
fprintf(stderr, "%s: using temporary file: '%s'\n", __func__, fname_out_actual.c_str());
750+
}
751+
648752
fprintf(stderr, "%s: quantizing '%s' to '%s' as %s", __func__, fname_inp.c_str(), fname_out.c_str(), ftype_str.c_str());
649753
if (params.nthread > 0) {
650754
fprintf(stderr, " using %d threads", params.nthread);
@@ -659,13 +763,41 @@ int main(int argc, char ** argv) {
659763
{
660764
const int64_t t_start_us = llama_time_us();
661765

662-
if (llama_model_quantize(fname_inp.c_str(), fname_out.c_str(), &params)) {
766+
if (llama_model_quantize(fname_inp.c_str(), fname_out_actual.c_str(), &params)) {
663767
fprintf(stderr, "%s: failed to quantize model from '%s'\n", __func__, fname_inp.c_str());
768+
769+
// Clean up temp file on failure
770+
if (!fname_out_temp.empty()) {
771+
std::error_code ec;
772+
std::filesystem::remove(fname_out_temp, ec);
773+
}
774+
775+
llama_backend_free();
664776
return 1;
665777
}
666778

667779
t_quantize_us = llama_time_us() - t_start_us;
668780
}
781+
782+
// If we used a temp file, atomically rename it to the final output
783+
if (!fname_out_temp.empty()) {
784+
fprintf(stderr, "%s: atomically moving temp file to final output\n", __func__);
785+
std::error_code ec;
786+
787+
// On POSIX systems, rename() is atomic when both paths are on the same filesystem
788+
std::filesystem::rename(fname_out_temp, fname_out, ec);
789+
790+
if (ec) {
791+
fprintf(stderr, "%s: failed to rename temp file '%s' to '%s': %s\n",
792+
__func__, fname_out_temp.c_str(), fname_out.c_str(), ec.message().c_str());
793+
794+
// Try to clean up temp file
795+
std::filesystem::remove(fname_out_temp, ec);
796+
797+
llama_backend_free();
798+
return 1;
799+
}
800+
}
669801

670802
// report timing
671803
{

0 commit comments

Comments
 (0)