1212#include < cmath>
1313#include < cctype>
1414#include < algorithm>
15+ #include < filesystem>
16+ #include < system_error>
17+
18+ #ifdef _WIN32
19+ #include < windows.h>
20+ #define getpid GetCurrentProcessId
21+ #else
22+ #include < sys/stat.h>
23+ #include < unistd.h>
24+ #endif
1525
1626struct quant_option {
1727 std::string name;
@@ -76,6 +86,36 @@ static const char * const LLM_KV_IMATRIX_DATASETS = "imatrix.datasets";
7686static const char * const LLM_KV_IMATRIX_CHUNK_COUNT = " imatrix.chunk_count" ;
7787static const char * const LLM_KV_IMATRIX_CHUNK_SIZE = " imatrix.chunk_size" ;
7888
89+ // Check if two paths refer to the same physical file
90+ // Returns true if they are the same file (including via hardlinks or symlinks)
91+ static bool same_file (const std::string & path_a, const std::string & path_b) {
92+ std::error_code ec_a, ec_b;
93+
94+ // First try using std::filesystem to resolve canonical paths
95+ auto canonical_a = std::filesystem::weakly_canonical (path_a, ec_a);
96+ auto canonical_b = std::filesystem::weakly_canonical (path_b, ec_b);
97+
98+ if (!ec_a && !ec_b) {
99+ // If both paths were successfully canonicalized, compare them
100+ if (canonical_a == canonical_b) {
101+ return true ;
102+ }
103+ }
104+
105+ #ifndef _WIN32
106+ // On Unix-like systems, also check using stat() to handle hardlinks
107+ struct stat stat_a, stat_b;
108+ if (stat (path_a.c_str (), &stat_a) == 0 && stat (path_b.c_str (), &stat_b) == 0 ) {
109+ // Same file if device and inode match
110+ if (stat_a.st_dev == stat_b.st_dev && stat_a.st_ino == stat_b.st_ino ) {
111+ return true ;
112+ }
113+ }
114+ #endif
115+
116+ return false ;
117+ }
118+
79119static bool striequals (const char * a, const char * b) {
80120 while (*a && *b) {
81121 if (std::tolower (*a) != std::tolower (*b)) {
@@ -119,7 +159,7 @@ static bool try_parse_ftype(const std::string & ftype_str_in, llama_ftype & ftyp
119159static void usage (const char * executable) {
120160 printf (" usage: %s [--help] [--allow-requantize] [--leave-output-tensor] [--pure] [--imatrix] [--include-weights]\n " , executable);
121161 printf (" [--exclude-weights] [--output-tensor-type] [--token-embedding-type] [--tensor-type] [--prune-layers] [--keep-split] [--override-kv]\n " );
122- printf (" model-f32.gguf [model-quant.gguf] type [nthreads]\n\n " );
162+ printf (" [--inplace] [--overwrite] model-f32.gguf [model-quant.gguf] type [nthreads]\n\n " );
123163 printf (" --allow-requantize: Allows requantizing tensors that have already been quantized. Warning: This can severely reduce quality compared to quantizing from 16bit or 32bit\n " );
124164 printf (" --leave-output-tensor: Will leave output.weight un(re)quantized. Increases model size but may also increase quality, especially when requantizing\n " );
125165 printf (" --pure: Disable k-quant mixtures and quantize all tensors to the same type\n " );
@@ -135,6 +175,8 @@ static void usage(const char * executable) {
135175 printf (" --keep-split: will generate quantized model in the same shards as input\n " );
136176 printf (" --override-kv KEY=TYPE:VALUE\n " );
137177 printf (" Advanced option to override model metadata by key in the quantized model. May be specified multiple times.\n " );
178+ printf (" --inplace: Allow in-place quantization (input == output). Uses temp file + atomic rename for safety.\n " );
179+ printf (" --overwrite: Allow overwriting existing output file (still uses temp file for safety).\n " );
138180 printf (" Note: --include-weights and --exclude-weights cannot be used together\n " );
139181 printf (" \n Allowed quantization types:\n " );
140182 for (const auto & it : QUANT_OPTIONS) {
@@ -453,6 +495,8 @@ int main(int argc, char ** argv) {
453495 std::vector<llama_model_kv_override> kv_overrides;
454496 std::vector<tensor_quantization> tensor_types;
455497 std::vector<int > prune_layers;
498+ bool allow_inplace = false ;
499+ bool allow_overwrite = false ;
456500
457501 for (; arg_idx < argc && strncmp (argv[arg_idx], " --" , 2 ) == 0 ; arg_idx++) {
458502 if (strcmp (argv[arg_idx], " --leave-output-tensor" ) == 0 ) {
@@ -511,6 +555,10 @@ int main(int argc, char ** argv) {
511555 }
512556 } else if (strcmp (argv[arg_idx], " --keep-split" ) == 0 ) {
513557 params.keep_split = true ;
558+ } else if (strcmp (argv[arg_idx], " --inplace" ) == 0 ) {
559+ allow_inplace = true ;
560+ } else if (strcmp (argv[arg_idx], " --overwrite" ) == 0 ) {
561+ allow_overwrite = true ;
514562 } else {
515563 usage (argv[0 ]);
516564 }
@@ -645,6 +693,62 @@ int main(int argc, char ** argv) {
645693
646694 print_build_info ();
647695
696+ // Check if input and output refer to the same physical file
697+ // This prevents catastrophic data loss from truncating the input while reading it
698+ if (same_file (fname_inp, fname_out)) {
699+ if (!allow_inplace) {
700+ fprintf (stderr, " \n ==========================================================================================================\n " );
701+ fprintf (stderr, " ERROR: Input and output files are the same: '%s'\n " , fname_inp.c_str ());
702+ fprintf (stderr, " This would truncate the input file while reading it, causing data corruption and SIGBUS errors.\n " );
703+ fprintf (stderr, " \n " );
704+ fprintf (stderr, " Solutions:\n " );
705+ fprintf (stderr, " 1. Specify a different output filename\n " );
706+ fprintf (stderr, " 2. Use --inplace flag to enable safe in-place quantization (uses temp file + atomic rename)\n " );
707+ fprintf (stderr, " ==========================================================================================================\n\n " );
708+ llama_backend_free ();
709+ return 1 ;
710+ }
711+ fprintf (stderr, " %s: WARNING: in-place quantization detected, using temporary file for safety\n " , __func__);
712+ }
713+
714+ // Check if output file already exists (unless overwrite is allowed)
715+ if (!allow_overwrite && !allow_inplace) {
716+ std::error_code ec;
717+ if (std::filesystem::exists (fname_out, ec)) {
718+ fprintf (stderr, " \n ==========================================================================================================\n " );
719+ fprintf (stderr, " ERROR: Output file already exists: '%s'\n " , fname_out.c_str ());
720+ fprintf (stderr, " Use --overwrite flag to allow overwriting existing files.\n " );
721+ fprintf (stderr, " ==========================================================================================================\n\n " );
722+ llama_backend_free ();
723+ return 1 ;
724+ }
725+ }
726+
727+ // Prepare actual output path (may be temporary for in-place or overwrite mode)
728+ std::string fname_out_actual = fname_out;
729+ std::string fname_out_temp;
730+ bool use_temp_file = allow_inplace && same_file (fname_inp, fname_out);
731+
732+ if (use_temp_file || allow_overwrite) {
733+ // Create temp file in the same directory as output for atomic rename
734+ std::filesystem::path out_path (fname_out);
735+ std::filesystem::path out_dir = out_path.parent_path ();
736+ if (out_dir.empty ()) {
737+ out_dir = " ." ;
738+ }
739+
740+ // Generate temp filename
741+ fname_out_temp = (out_dir / (" ." + out_path.filename ().string () + " .tmp.XXXXXX" )).string ();
742+
743+ // Create the temp file safely
744+ // Note: mkstemp would be safer but requires char* and creates the file
745+ // For simplicity, we'll use a simpler approach with PID
746+ fname_out_temp = (out_dir / (" ." + out_path.filename ().string () + " .tmp." + std::to_string (getpid ()))).string ();
747+ fname_out_actual = fname_out_temp;
748+
749+ fprintf (stderr, " %s: using temporary file: '%s'\n " , __func__, fname_out_actual.c_str ());
750+ }
751+
648752 fprintf (stderr, " %s: quantizing '%s' to '%s' as %s" , __func__, fname_inp.c_str (), fname_out.c_str (), ftype_str.c_str ());
649753 if (params.nthread > 0 ) {
650754 fprintf (stderr, " using %d threads" , params.nthread );
@@ -659,13 +763,41 @@ int main(int argc, char ** argv) {
659763 {
660764 const int64_t t_start_us = llama_time_us ();
661765
662- if (llama_model_quantize (fname_inp.c_str (), fname_out .c_str (), ¶ms)) {
766+ if (llama_model_quantize (fname_inp.c_str (), fname_out_actual .c_str (), ¶ms)) {
663767 fprintf (stderr, " %s: failed to quantize model from '%s'\n " , __func__, fname_inp.c_str ());
768+
769+ // Clean up temp file on failure
770+ if (!fname_out_temp.empty ()) {
771+ std::error_code ec;
772+ std::filesystem::remove (fname_out_temp, ec);
773+ }
774+
775+ llama_backend_free ();
664776 return 1 ;
665777 }
666778
667779 t_quantize_us = llama_time_us () - t_start_us;
668780 }
781+
782+ // If we used a temp file, atomically rename it to the final output
783+ if (!fname_out_temp.empty ()) {
784+ fprintf (stderr, " %s: atomically moving temp file to final output\n " , __func__);
785+ std::error_code ec;
786+
787+ // On POSIX systems, rename() is atomic when both paths are on the same filesystem
788+ std::filesystem::rename (fname_out_temp, fname_out, ec);
789+
790+ if (ec) {
791+ fprintf (stderr, " %s: failed to rename temp file '%s' to '%s': %s\n " ,
792+ __func__, fname_out_temp.c_str (), fname_out.c_str (), ec.message ().c_str ());
793+
794+ // Try to clean up temp file
795+ std::filesystem::remove (fname_out_temp, ec);
796+
797+ llama_backend_free ();
798+ return 1 ;
799+ }
800+ }
669801
670802 // report timing
671803 {
0 commit comments