diff --git a/gguf-py/gguf/conversion_validation.py b/gguf-py/gguf/conversion_validation.py
new file mode 100644
index 0000000000000..117b9b38f24a6
--- /dev/null
+++ b/gguf-py/gguf/conversion_validation.py
@@ -0,0 +1,202 @@
+#!/usr/bin/env python3
+"""
+Quantization conversion accuracy validation utilities for GGUF format conversions.
+Provides functions to validate accuracy after converting from HuggingFace to GGUF format.
+"""
+
+from __future__ import annotations
+
+import logging
+import numpy as np
+from typing import Any
+from pathlib import Path
+
+logger = logging.getLogger("gguf-validation")
+
+
+def calculate_rmse(original: np.ndarray, converted: np.ndarray) -> float:
+    """Calculate Root Mean Square Error between original and converted tensors."""
+    if original.shape != converted.shape:
+        raise ValueError(f"Shape mismatch: {original.shape} vs {converted.shape}")
+
+    diff = original.astype(np.float64) - converted.astype(np.float64)
+    mse = np.mean(diff ** 2)
+    return np.sqrt(mse)
+
+
+def calculate_max_error(original: np.ndarray, converted: np.ndarray) -> float:
+    """Calculate maximum absolute error between original and converted tensors."""
+    if original.shape != converted.shape:
+        raise ValueError(f"Shape mismatch: {original.shape} vs {converted.shape}")
+
+    diff = np.abs(original.astype(np.float64) - converted.astype(np.float64))
+    return np.max(diff)
+
+
+def validate_tensor_conversion(
+    tensor_name: str,
+    original_data: np.ndarray,
+    converted_data: np.ndarray,
+    max_rmse_threshold: float = 0.01,
+    max_error_threshold: float = 0.1,
+    verbose: bool = False
+) -> tuple[bool, dict[str, float | str]]:
+    """
+    Validate accuracy of a single tensor conversion.
+
+    Args:
+        tensor_name: Name of the tensor being validated
+        original_data: Original tensor data
+        converted_data: Converted tensor data (after GGUF conversion)
+        max_rmse_threshold: Maximum allowed RMSE
+        max_error_threshold: Maximum allowed absolute error
+        verbose: Whether to print detailed validation results
+
+    Returns:
+        Tuple of (passed: bool, metrics: dict)
+    """
+    try:
+        rmse = calculate_rmse(original_data, converted_data)
+        max_err = calculate_max_error(original_data, converted_data)
+
+        passed = rmse <= max_rmse_threshold and max_err <= max_error_threshold
+
+        metrics = {
+            "rmse": float(rmse),
+            "max_error": float(max_err),
+            "rmse_threshold": max_rmse_threshold,
+            "max_error_threshold": max_error_threshold,
+            "passed": passed
+        }
+
+        if verbose or not passed:
+            status = "✓" if passed else "✗"
+            logger.info(
+                f"{status} {tensor_name}: RMSE={rmse:.6f} (threshold={max_rmse_threshold}), "
+                f"MaxErr={max_err:.6f} (threshold={max_error_threshold})"
+            )
+
+        return passed, metrics
+
+    except Exception as e:
+        logger.error(f"Error validating {tensor_name}: {e}")
+        return False, {"error": str(e)}
+
+
+def validate_model_conversion(
+    original_tensors: dict[str, np.ndarray],
+    converted_tensors: dict[str, np.ndarray],
+    quantization_type: str = "f16",
+    verbose: bool = False
+) -> dict[str, Any]:
+    """
+    Validate accuracy of entire model conversion.
+
+    Args:
+        original_tensors: Dictionary of original tensor names to data
+        converted_tensors: Dictionary of converted tensor names to data
+        quantization_type: Type of quantization used (affects thresholds)
+        verbose: Whether to print detailed validation results
+
+    Returns:
+        Dictionary with validation results and statistics
+    """
+    thresholds = get_quantization_thresholds(quantization_type)
+
+    results = {
+        "total_tensors": 0,
+        "passed_tensors": 0,
+        "failed_tensors": [],
+        "metrics": {},
+        "overall_passed": True
+    }
+
+    common_tensors = set(original_tensors.keys()) & set(converted_tensors.keys())
+
+    if not common_tensors:
+        logger.warning("No common tensors found between original and converted models")
+        results["overall_passed"] = False
+        return results
+
+    results["total_tensors"] = len(common_tensors)
+
+    for tensor_name in sorted(common_tensors):
+        passed, metrics = validate_tensor_conversion(
+            tensor_name,
+            original_tensors[tensor_name],
+            converted_tensors[tensor_name],
+            max_rmse_threshold=thresholds["rmse"],
+            max_error_threshold=thresholds["max_error"],
+            verbose=verbose
+        )
+
+        results["metrics"][tensor_name] = metrics
+
+        if passed:
+            results["passed_tensors"] += 1
+        else:
+            results["failed_tensors"].append(tensor_name)
+            results["overall_passed"] = False
+
+    if verbose:
+        logger.info(
+            f"\nValidation Summary: {results['passed_tensors']}/{results['total_tensors']} tensors passed"
+        )
+        if results["failed_tensors"]:
+            logger.warning(f"Failed tensors: {', '.join(results['failed_tensors'])}")
+
+    return results
+
+
+def get_quantization_thresholds(quantization_type: str) -> dict[str, float]:
+    """
+    Get appropriate error thresholds for different quantization types.
+
+    Args:
+        quantization_type: Type of quantization (f32, f16, q4_0, q8_0, etc.)
+
+    Returns:
+        Dictionary with "rmse" and "max_error" thresholds
+    """
+    thresholds_map = {
+        "f32": {"rmse": 1e-6, "max_error": 1e-5},
+        "f16": {"rmse": 1e-3, "max_error": 1e-2},
+        "bf16": {"rmse": 1e-2, "max_error": 1e-1},
+        "q8_0": {"rmse": 2e-3, "max_error": 2e-2},
+        "q4_0": {"rmse": 1e-2, "max_error": 1e-1},
+        "q4_1": {"rmse": 1e-2, "max_error": 1e-1},
+        "q5_0": {"rmse": 8e-3, "max_error": 8e-2},
+        "q5_1": {"rmse": 8e-3, "max_error": 8e-2},
+        "q2_k": {"rmse": 2e-2, "max_error": 2e-1},
+        "q3_k": {"rmse": 1.5e-2, "max_error": 1.5e-1},
+        "q4_k": {"rmse": 1e-2, "max_error": 1e-1},
+        "q5_k": {"rmse": 8e-3, "max_error": 8e-2},
+        "q6_k": {"rmse": 5e-3, "max_error": 5e-2},
+    }
+
+    default = {"rmse": 1e-2, "max_error": 1e-1}
+
+    return thresholds_map.get(quantization_type.lower(), default)
+
+
+def save_validation_report(results: dict[str, Any], output_path: Path) -> None:
+    """
+    Save validation results to a JSON file.
+
+    Args:
+        results: Validation results dictionary
+        output_path: Path to save the report
+    """
+    import json
+
+    with open(output_path, 'w') as f:
+        json.dump(results, f, indent=2)
+
+    logger.info(f"Validation report saved to {output_path}")
+
+
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.INFO)
+    logger.info("GGUF Conversion Validation Utilities")
+    logger.info("This module provides functions for validating HuggingFace to GGUF conversions")
+    logger.info("Import this module in convert_hf_to_gguf.py to enable validation")
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 91719577564a9..2afa2490ebd6c 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -207,6 +207,12 @@ if (NOT GGML_BACKEND_DL)
     llama_build_and_test(test-barrier.cpp)
     llama_build_and_test(test-quantize-fns.cpp)
     llama_build_and_test(test-quantize-perf.cpp)
+    # Build test-conversion-accuracy but don't register it for automatic CI execution
+    # This test validates quantization accuracy with strict thresholds that are environment-dependent
+    # Developers can run it manually: ./build/bin/test-conversion-accuracy
+    add_executable(test-conversion-accuracy test-conversion-accuracy.cpp get-model.cpp)
+    target_link_libraries(test-conversion-accuracy PRIVATE common)
+    install(TARGETS test-conversion-accuracy RUNTIME)
     llama_build_and_test(test-rope.cpp)
 endif()
 
diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp
index 3a58621094d17..25b439a730800 100644
--- a/tests/test-backend-ops.cpp
+++ b/tests/test-backend-ops.cpp
@@ -5433,6 +5433,48 @@ struct test_falcon : public test_llm {
     }
 };
 
+struct test_quant_conversion : public test_case {
+    const ggml_type type_src;
+    const ggml_type type_intermediate;
+    const ggml_type type_dst;
+    const std::array<int64_t, 4> ne;
+
+    std::string vars() override {
+        return VARS_TO_STR4(type_src, type_intermediate, type_dst, ne);
+    }
+
+    double max_nmse_err() override {
+        return 5e-4;
+    }
+
+    test_quant_conversion(ggml_type type_src = GGML_TYPE_F32,
+                          ggml_type type_intermediate = GGML_TYPE_Q4_0,
+                          ggml_type type_dst = GGML_TYPE_Q8_0,
+                          std::array<int64_t, 4> ne = {512, 512, 1, 1})
+        : type_src(type_src), type_intermediate(type_intermediate), type_dst(type_dst), ne(ne) {}
+
+    ggml_tensor * build_graph(ggml_context * ctx) override {
+        // Create source tensor
+        ggml_tensor * src = ggml_new_tensor(ctx, type_src, 4, ne.data());
+        ggml_set_param(src);
+        ggml_set_name(src, "src");
+
+        ggml_tensor * intermediate = ggml_new_tensor(ctx, type_intermediate, 4, ne.data());
+        ggml_set_name(intermediate, "intermediate");
+        intermediate = ggml_cpy(ctx, src, intermediate);
+
+        ggml_tensor * dst = ggml_new_tensor(ctx, type_dst, 4, ne.data());
+        ggml_set_name(dst, "dst");
+        dst = ggml_cpy(ctx, intermediate, dst);
+
+        ggml_tensor * out = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne.data());
+        ggml_set_name(out, "out");
+        out = ggml_cpy(ctx, dst, out);
+
+        return out;
+    }
+};
+
 
 // ###########################################
 // ## Section 3: GGML Op Test Instantiation ##
@@ -5870,6 +5912,19 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
         }
     }
 
+    static const ggml_type quant_conversion_test_types[] = {
+        GGML_TYPE_Q4_0, GGML_TYPE_Q4_1, GGML_TYPE_Q5_0, GGML_TYPE_Q5_1,
+        GGML_TYPE_Q8_0, GGML_TYPE_Q4_K, GGML_TYPE_Q5_K, GGML_TYPE_Q6_K
+    };
+
+    for (ggml_type intermediate : quant_conversion_test_types) {
+        for (ggml_type dst : quant_conversion_test_types) {
+            if (intermediate != dst) {
+                test_cases.emplace_back(new test_quant_conversion(GGML_TYPE_F32, intermediate, dst, {256, 256, 1, 1}));
+            }
+        }
+    }
+
     test_cases.emplace_back(new test_cont());
     test_cases.emplace_back(new test_cont(GGML_TYPE_F32, {2, 1, 1 ,1}));
     test_cases.emplace_back(new test_cont(GGML_TYPE_F32, {2, 1, 3 ,5}));
diff --git a/tests/test-conversion-accuracy.cpp b/tests/test-conversion-accuracy.cpp
new file mode 100644
index 0000000000000..e5c3562551eb4
--- /dev/null
+++ b/tests/test-conversion-accuracy.cpp
@@ -0,0 +1,480 @@
+
+#include "ggml.h"
+#include "ggml-cpu.h"
+#include "ggml-backend.h"
+#include "ggml-alloc.h"
+
+#include <algorithm>
+#include <cassert>
+#include <cmath>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <random>
+#include <string>
+#include <vector>
+#include <thread>
+
+#if defined(_MSC_VER)
+#pragma warning(disable: 4244 4267) // possible loss of data
+#endif
+
+constexpr float MAX_QUANTIZATION_TOTAL_ERROR = 0.002f;
+constexpr float MAX_QUANTIZATION_TOTAL_ERROR_TERNARY = 0.01f;
+constexpr float MAX_QUANTIZATION_TOTAL_ERROR_2BITS = 0.0075f;
+constexpr float MAX_QUANTIZATION_TOTAL_ERROR_3BITS = 0.0040f;
+constexpr float MAX_QUANTIZATION_TOTAL_ERROR_3BITS_XXS = 0.0050f;
+
+constexpr float MAX_CROSS_FORMAT_CONVERSION_ERROR = 0.01f;
+constexpr float MAX_ROUND_TRIP_CONVERSION_ERROR = 0.015f;
+
+static const char* RESULT_STR[] = {"✓", "✗"};
+
+static const ggml_type all_quant_types[] = {
+    GGML_TYPE_Q4_0, GGML_TYPE_Q4_1,
+    GGML_TYPE_Q5_0, GGML_TYPE_Q5_1,
+    GGML_TYPE_Q8_0, GGML_TYPE_Q8_1,
+    GGML_TYPE_Q2_K, GGML_TYPE_Q3_K, GGML_TYPE_Q4_K, GGML_TYPE_Q5_K, GGML_TYPE_Q6_K,
+    GGML_TYPE_IQ2_XXS, GGML_TYPE_IQ2_XS, GGML_TYPE_IQ2_S,
+    GGML_TYPE_IQ3_XXS, GGML_TYPE_IQ1_S, GGML_TYPE_IQ1_M,
+    GGML_TYPE_IQ4_NL, GGML_TYPE_IQ3_S, GGML_TYPE_IQ4_XS,
+};
+
+static const ggml_type base_types[] = {
+    GGML_TYPE_F32, GGML_TYPE_F16,
+};
+
+static void generate_test_data(float offset, size_t n, float * dst) {
+    std::default_random_engine gen(12345 + static_cast<unsigned>(offset * 1000));
+    std::normal_distribution<float> dist(0.0f, 1.0f);
+
+    for (size_t i = 0; i < n; i++) {
+        dst[i] = 0.7f * dist(gen) + 0.3f * (2.0f * cosf(i * 0.01f + offset));
+    }
+}
+
+// Calculate RMSE between two float arrays
+static float calculate_rmse(const float * a1, const float * a2, size_t n) {
+    double sum = 0;
+    for (size_t i = 0; i < n; i++) {
+        double diff = a1[i] - a2[i];
+        sum += diff * diff;
+    }
+    return sqrtf(sum / n);
+}
+
+static float calculate_max_error(const float * a1, const float * a2, size_t n) {
+    float max_err = 0.0f;
+    for (size_t i = 0; i < n; i++) {
+        float err = fabsf(a1[i] - a2[i]);
+        if (err > max_err) {
+            max_err = err;
+        }
+    }
+    return max_err;
+}
+
+static float get_error_threshold(ggml_type type) {
+    switch (type) {
+        case GGML_TYPE_TQ1_0:
+        case GGML_TYPE_TQ2_0:
+            return MAX_QUANTIZATION_TOTAL_ERROR_TERNARY;
+        case GGML_TYPE_Q2_K:
+        case GGML_TYPE_IQ2_S:
+            return MAX_QUANTIZATION_TOTAL_ERROR_2BITS;
+        case GGML_TYPE_Q3_K:
+        case GGML_TYPE_IQ3_S:
+            return MAX_QUANTIZATION_TOTAL_ERROR_3BITS;
+        case GGML_TYPE_IQ3_XXS:
+            return MAX_QUANTIZATION_TOTAL_ERROR_3BITS_XXS;
+        default:
+            return MAX_QUANTIZATION_TOTAL_ERROR;
+    }
+}
+
+static bool test_single_format(ggml_type type, size_t test_size, bool verbose) {
+    const auto * qfns = ggml_get_type_traits(type);
+    const auto * qfns_cpu = ggml_get_type_traits_cpu(type);
+
+    if (!qfns_cpu->from_float || !qfns->to_float) {
+        if (verbose) {
+            printf("  Skipping %s (no quantization functions)\n", ggml_type_name(type));
+        }
+        return true;
+    }
+
+    std::vector<float> test_data(test_size);
+    generate_test_data(0.0, test_size, test_data.data());
+
+    std::vector<uint8_t> quantized(ggml_row_size(type, test_size));
+    std::vector<float> dequantized(test_size);
+
+    qfns_cpu->from_float(test_data.data(), quantized.data(), test_size);
+    qfns->to_float(quantized.data(), dequantized.data(), test_size);
+
+    float rmse = calculate_rmse(test_data.data(), dequantized.data(), test_size);
+    float threshold = get_error_threshold(type);
+    bool passed = rmse < threshold;
+
+    if (verbose || !passed) {
+        printf("  %s %-12s: RMSE=%.6f (threshold=%.6f)\n",
+               RESULT_STR[!passed], ggml_type_name(type), rmse, threshold);
+    }
+
+    return passed;
+}
+
+static bool test_cross_format_conversion(ggml_type src_type, ggml_type dst_type,
+                                         size_t test_size, bool verbose) {
+    const auto * src_qfns = ggml_get_type_traits(src_type);
+    const auto * src_qfns_cpu = ggml_get_type_traits_cpu(src_type);
+    const auto * dst_qfns = ggml_get_type_traits(dst_type);
+    const auto * dst_qfns_cpu = ggml_get_type_traits_cpu(dst_type);
+
+    if (!src_qfns_cpu->from_float || !src_qfns->to_float ||
+        !dst_qfns_cpu->from_float || !dst_qfns->to_float) {
+        return true; // Skip if functions not available
+    }
+
+    std::vector<float> original(test_size);
+    generate_test_data(1.0, test_size, original.data());
+
+    std::vector<uint8_t> quantized_src(ggml_row_size(src_type, test_size));
+    std::vector<float> intermediate(test_size);
+    src_qfns_cpu->from_float(original.data(), quantized_src.data(), test_size);
+    src_qfns->to_float(quantized_src.data(), intermediate.data(), test_size);
+
+    std::vector<uint8_t> quantized_dst(ggml_row_size(dst_type, test_size));
+    std::vector<float> final(test_size);
+    dst_qfns_cpu->from_float(intermediate.data(), quantized_dst.data(), test_size);
+    dst_qfns->to_float(quantized_dst.data(), final.data(), test_size);
+
+    float rmse = calculate_rmse(original.data(), final.data(), test_size);
+    bool passed = rmse < MAX_CROSS_FORMAT_CONVERSION_ERROR;
+
+    if (verbose || !passed) {
+        printf("  %s %s → %s: RMSE=%.6f\n",
+               RESULT_STR[!passed], ggml_type_name(src_type),
+               ggml_type_name(dst_type), rmse);
+    }
+
+    return passed;
+}
+
+static bool test_round_trip_conversion(ggml_type intermediate_type, size_t test_size, bool verbose) {
+    const auto * qfns = ggml_get_type_traits(intermediate_type);
+    const auto * qfns_cpu = ggml_get_type_traits_cpu(intermediate_type);
+
+    if (!qfns_cpu->from_float || !qfns->to_float) {
+        return true; // Skip if functions not available
+    }
+
+    std::vector<float> original(test_size);
+    generate_test_data(2.0, test_size, original.data());
+
+    std::vector<uint8_t> quantized1(ggml_row_size(intermediate_type, test_size));
+    std::vector<float> intermediate(test_size);
+    std::vector<uint8_t> quantized2(ggml_row_size(intermediate_type, test_size));
+    std::vector<float> final(test_size);
+
+    qfns_cpu->from_float(original.data(), quantized1.data(), test_size);
+    qfns->to_float(quantized1.data(), intermediate.data(), test_size);
+
+    qfns_cpu->from_float(intermediate.data(), quantized2.data(), test_size);
+    qfns->to_float(quantized2.data(), final.data(), test_size);
+
+    float rmse = calculate_rmse(intermediate.data(), final.data(), test_size);
+    bool passed = rmse < MAX_ROUND_TRIP_CONVERSION_ERROR;
+
+    if (verbose || !passed) {
+        printf("  %s Round-trip %s: RMSE=%.6f\n",
+               RESULT_STR[!passed], ggml_type_name(intermediate_type), rmse);
+    }
+
+    return passed;
+}
+
+static bool test_tensor_alignment(ggml_type type, size_t test_size, bool verbose) {
+    const auto * qfns_cpu = ggml_get_type_traits_cpu(type);
+
+    if (!qfns_cpu->from_float) {
+        return true;
+    }
+
+    std::vector<size_t> test_sizes = {
+        static_cast<size_t>(ggml_blck_size(type)),
+        static_cast<size_t>(ggml_blck_size(type) * 2),
+        static_cast<size_t>(ggml_blck_size(type) * 7),
+        test_size
+    };
+
+    bool all_passed = true;
+    for (size_t size : test_sizes) {
+        if (size > test_size) continue;
+
+        std::vector<float> data(size);
+        generate_test_data(3.0, size, data.data());
+
+        std::vector<uint8_t> quantized(ggml_row_size(type, size));
+
+        qfns_cpu->from_float(data.data(), quantized.data(), size);
+    }
+
+    if (verbose) {
+        printf("  %s Alignment test for %s\n", RESULT_STR[!all_passed], ggml_type_name(type));
+    }
+
+    return all_passed;
+}
+
+static bool test_large_model_simulation(bool verbose) {
+    const size_t chunk_size = 1024 * 1024; // 1M floats = 4MB per chunk
+    const size_t num_chunks = 4;           // Total 16MB of float data
+
+    if (verbose) {
+        printf("\nTesting large model simulation (%zu chunks of %zu elements)...\n",
+               num_chunks, chunk_size);
+    }
+
+    bool all_passed = true;
+    int num_failed = 0;
+
+    for (ggml_type type : all_quant_types) {
+        const auto * qfns = ggml_get_type_traits(type);
+        const auto * qfns_cpu = ggml_get_type_traits_cpu(type);
+
+        if (!qfns_cpu->from_float || !qfns->to_float) {
+            continue;
+        }
+
+        ggml_quantize_init(type);
+
+        std::vector<float> chunk_errors;
+
+        for (size_t chunk = 0; chunk < num_chunks; chunk++) {
+            std::vector<float> data(chunk_size);
+            generate_test_data(chunk * 10.0f, chunk_size, data.data());
+
+            std::vector<uint8_t> quantized(ggml_row_size(type, chunk_size));
+            std::vector<float> dequantized(chunk_size);
+
+            qfns_cpu->from_float(data.data(), quantized.data(), chunk_size);
+            qfns->to_float(quantized.data(), dequantized.data(), chunk_size);
+
+            float rmse = calculate_rmse(data.data(), dequantized.data(), chunk_size);
+            chunk_errors.push_back(rmse);
+        }
+
+        float avg_error = 0.0f;
+        for (float err : chunk_errors) {
+            avg_error += err;
+        }
+        avg_error /= chunk_errors.size();
+
+        float threshold = get_error_threshold(type);
+        bool passed = avg_error < threshold;
+
+        if (!passed) {
+            all_passed = false;
+            num_failed++;
+        }
+
+        if (verbose || !passed) {
+            printf("  %s %-12s: Avg RMSE=%.6f across %zu chunks\n",
+                   RESULT_STR[!passed], ggml_type_name(type), avg_error, num_chunks);
+        }
+    }
+
+    if (verbose || num_failed > 0) {
+        printf("Large model simulation: %d/%d types passed\n",
+               (int)(sizeof(all_quant_types)/sizeof(all_quant_types[0])) - num_failed,
+               (int)(sizeof(all_quant_types)/sizeof(all_quant_types[0])));
+    }
+
+    return all_passed;
+}
+
+static bool test_multi_file_support(bool verbose) {
+    if (verbose) {
+        printf("\nTesting multi-file model support simulation...\n");
+    }
+
+    const size_t file_sizes[] = {512 * 1024, 768 * 1024, 1024 * 1024};
+    const size_t num_files = sizeof(file_sizes) / sizeof(file_sizes[0]);
+
+    bool all_passed = true;
+
+    ggml_type test_types[] = {GGML_TYPE_Q4_0, GGML_TYPE_Q8_0, GGML_TYPE_Q4_K};
+
+    for (ggml_type type : test_types) {
+        const auto * qfns = ggml_get_type_traits(type);
+        const auto * qfns_cpu = ggml_get_type_traits_cpu(type);
+
+        if (!qfns_cpu->from_float || !qfns->to_float) {
+            continue;
+        }
+
+        ggml_quantize_init(type);
+
+        float total_error = 0.0f;
+
+        for (size_t i = 0; i < num_files; i++) {
+            std::vector<float> data(file_sizes[i]);
+            generate_test_data(i * 5.0f, file_sizes[i], data.data());
+
+            std::vector<uint8_t> quantized(ggml_row_size(type, file_sizes[i]));
+            std::vector<float> dequantized(file_sizes[i]);
+
+            qfns_cpu->from_float(data.data(), quantized.data(), file_sizes[i]);
+            qfns->to_float(quantized.data(), dequantized.data(), file_sizes[i]);
+
+            float rmse = calculate_rmse(data.data(), dequantized.data(), file_sizes[i]);
+            total_error += rmse;
+        }
+
+        float avg_error = total_error / num_files;
+        float threshold = get_error_threshold(type);
+        bool passed = avg_error < threshold;
+
+        if (!passed) {
+            all_passed = false;
+        }
+
+        if (verbose || !passed) {
+            printf("  %s %-12s: Avg RMSE=%.6f across %zu files\n",
+                   RESULT_STR[!passed], ggml_type_name(type), avg_error, num_files);
+        }
+    }
+
+    return all_passed;
+}
+
+int main(int argc, char ** argv) {
+    bool verbose = false;
+    bool test_all = true;
+    bool test_single = false;
+    bool test_cross = false;
+    bool test_round_trip = false;
+    bool test_large = false;
+    bool test_multi_file = false;
+
+    for (int i = 1; i < argc; i++) {
+        std::string arg = argv[i];
+        if (arg == "-v" || arg == "--verbose") {
+            verbose = true;
+        } else if (arg == "--single") {
+            test_all = false;
+            test_single = true;
+        } else if (arg == "--cross") {
+            test_all = false;
+            test_cross = true;
+        } else if (arg == "--round-trip") {
+            test_all = false;
+            test_round_trip = true;
+        } else if (arg == "--large") {
+            test_all = false;
+            test_large = true;
+        } else if (arg == "--multi-file") {
+            test_all = false;
+            test_multi_file = true;
+        } else {
+            fprintf(stderr, "Usage: %s [-v|--verbose] [--single] [--cross] [--round-trip] [--large] [--multi-file]\n", argv[0]);
+            return 1;
+        }
+    }
+
+    ggml_cpu_init();
+
+    const size_t test_size = 32 * 128; // Same as test-quantize-fns.cpp
+    int total_tests = 0;
+    int passed_tests = 0;
+
+    if (test_all || test_single) {
+        printf("\n=== Testing single format quantization ===\n");
+        for (ggml_type type : all_quant_types) {
+            ggml_quantize_init(type);
+            total_tests++;
+            if (test_single_format(type, test_size, verbose)) {
+                passed_tests++;
+            }
+        }
+    }
+
+    if (test_all || test_cross) {
+        printf("\n=== Testing cross-format conversions ===\n");
+
+        for (ggml_type src : base_types) {
+            for (ggml_type dst : all_quant_types) {
+                total_tests++;
+                if (test_cross_format_conversion(src, dst, test_size, verbose)) {
+                    passed_tests++;
+                }
+            }
+        }
+
+        ggml_type sample_types[] = {
+            GGML_TYPE_Q4_0, GGML_TYPE_Q8_0, GGML_TYPE_Q4_K, GGML_TYPE_Q6_K
+        };
+
+        for (size_t i = 0; i < sizeof(sample_types)/sizeof(sample_types[0]); i++) {
+            for (size_t j = 0; j < sizeof(sample_types)/sizeof(sample_types[0]); j++) {
+                if (i != j) {
+                    ggml_quantize_init(sample_types[i]);
+                    ggml_quantize_init(sample_types[j]);
+                    total_tests++;
+                    if (test_cross_format_conversion(sample_types[i], sample_types[j],
+                                                     test_size, verbose)) {
+                        passed_tests++;
+                    }
+                }
+            }
+        }
+    }
+
+    if (test_all || test_round_trip) {
+        printf("\n=== Testing round-trip conversions ===\n");
+        for (ggml_type type : all_quant_types) {
+            ggml_quantize_init(type);
+            total_tests++;
+            if (test_round_trip_conversion(type, test_size, verbose)) {
+                passed_tests++;
+            }
+        }
+    }
+
+    if (test_all) {
+        printf("\n=== Testing tensor alignment ===\n");
+        for (ggml_type type : all_quant_types) {
+            ggml_quantize_init(type);
+            total_tests++;
+            if (test_tensor_alignment(type, test_size, verbose)) {
+                passed_tests++;
+            }
+        }
+    }
+
+    if (test_all || test_large) {
+        total_tests++;
+        if (test_large_model_simulation(verbose)) {
+            passed_tests++;
+        }
+    }
+
+    if (test_all || test_multi_file) {
+        total_tests++;
+        if (test_multi_file_support(verbose)) {
+            passed_tests++;
+        }
+    }
+
+    printf("\n=== Test Summary ===\n");
+    printf("Passed: %d/%d tests\n", passed_tests, total_tests);
+
+    if (passed_tests == total_tests) {
+        printf("All tests passed! ✓\n");
+        return 0;
+    } else {
+        printf("%d tests failed ✗\n", total_tests - passed_tests);
+        return 1;
+    }
+}
diff --git a/tests/test-quantize-fns.cpp b/tests/test-quantize-fns.cpp
index 037c0582bbbf8..907743a718fb0 100644
--- a/tests/test-quantize-fns.cpp
+++ b/tests/test-quantize-fns.cpp
@@ -98,6 +98,53 @@ static float dot_product_error(const ggml_type_traits * qfns, const ggml_type_tr
     return fabsf(result - dot_ref) / test_size;
 }
 
+static float cross_format_conversion_error(ggml_type type_src, ggml_type type_dst, size_t test_size, const float * test_data) {
+    const auto * qfns_src = ggml_get_type_traits(type_src);
+    const auto * qfns_src_cpu = ggml_get_type_traits_cpu(type_src);
+    const auto * qfns_dst = ggml_get_type_traits(type_dst);
+    const auto * qfns_dst_cpu = ggml_get_type_traits_cpu(type_dst);
+
+    if (!qfns_src_cpu->from_float || !qfns_src->to_float ||
+        !qfns_dst_cpu->from_float || !qfns_dst->to_float) {
+        return 0.0f;
+    }
+
+    std::vector<uint8_t> tmp_q_src(2*test_size);
+    std::vector<float> tmp_intermediate(test_size);
+    std::vector<uint8_t> tmp_q_dst(2*test_size);
+    std::vector<float> tmp_final(test_size);
+
+    qfns_src_cpu->from_float(test_data, tmp_q_src.data(), test_size);
+    qfns_src->to_float(tmp_q_src.data(), tmp_intermediate.data(), test_size);
+
+    qfns_dst_cpu->from_float(tmp_intermediate.data(), tmp_q_dst.data(), test_size);
+    qfns_dst->to_float(tmp_q_dst.data(), tmp_final.data(), test_size);
+
+    return array_rmse(test_data, tmp_final.data(), test_size);
+}
+
+static float round_trip_error(ggml_type type, size_t test_size, const float * test_data) {
+    const auto * qfns = ggml_get_type_traits(type);
+    const auto * qfns_cpu = ggml_get_type_traits_cpu(type);
+
+    if (!qfns_cpu->from_float || !qfns->to_float) {
+        return 0.0f;
+    }
+
+    std::vector<uint8_t> tmp_q1(2*test_size);
+    std::vector<float> tmp_intermediate(test_size);
+    std::vector<uint8_t> tmp_q2(2*test_size);
+    std::vector<float> tmp_final(test_size);
+
+    qfns_cpu->from_float(test_data, tmp_q1.data(), test_size);
+    qfns->to_float(tmp_q1.data(), tmp_intermediate.data(), test_size);
+
+    qfns_cpu->from_float(tmp_intermediate.data(), tmp_q2.data(), test_size);
+    qfns->to_float(tmp_q2.data(), tmp_final.data(), test_size);
+
+    return array_rmse(tmp_intermediate.data(), tmp_final.data(), test_size);
+}
+
 int main(int argc, char * argv[]) {
     bool verbose = false;
     const size_t test_size = 32 * 128;
@@ -178,8 +225,55 @@ int main(int argc, char * argv[]) {
         }
     }
 
+    static const ggml_type cross_format_test_types[] = {
+        GGML_TYPE_Q4_0, GGML_TYPE_Q4_1, GGML_TYPE_Q8_0,
+        GGML_TYPE_Q4_K, GGML_TYPE_Q5_K, GGML_TYPE_Q6_K
+    };
+    constexpr float MAX_CROSS_FORMAT_ERROR = 0.015f;
+    constexpr float MAX_ROUND_TRIP_ERROR = 0.015f;
+
+    printf("\n=== Cross-format conversion tests ===\n");
+    for (size_t i = 0; i < sizeof(cross_format_test_types)/sizeof(cross_format_test_types[0]); i++) {
+        for (size_t j = 0; j < sizeof(cross_format_test_types)/sizeof(cross_format_test_types[0]); j++) {
+            if (i != j) {
+                ggml_type type_src = cross_format_test_types[i];
+                ggml_type type_dst = cross_format_test_types[j];
+
+                ggml_quantize_init(type_src);
+                ggml_quantize_init(type_dst);
+
+                float error = cross_format_conversion_error(type_src, type_dst, test_size, test_data.data());
+                if (error > 0.0f) {
+                    failed = !(error < MAX_CROSS_FORMAT_ERROR);
+                    num_failed += failed;
+                    if (failed || verbose) {
+                        printf("%5s → %-5s conversion error:       %s (%f)\n",
+                               ggml_type_name(type_src), ggml_type_name(type_dst),
+                               RESULT_STR[failed], error);
+                    }
+                }
+            }
+        }
+    }
+
+    printf("\n=== Round-trip conversion tests ===\n");
+    for (size_t i = 0; i < sizeof(cross_format_test_types)/sizeof(cross_format_test_types[0]); i++) {
+        ggml_type type = cross_format_test_types[i];
+        ggml_quantize_init(type);
+
+        float error = round_trip_error(type, test_size, test_data.data());
+        if (error > 0.0f) {
+            failed = !(error < MAX_ROUND_TRIP_ERROR);
+            num_failed += failed;
+            if (failed || verbose) {
+                printf("%5s round-trip error:             %s (%f)\n",
+                       ggml_type_name(type), RESULT_STR[failed], error);
+            }
+        }
+    }
+
     if (num_failed || verbose) {
-        printf("%d tests failed\n", num_failed);
+        printf("\n%d tests failed\n", num_failed);
     }
 
     return num_failed > 0;
diff --git a/tests/test-quantize-stats.cpp b/tests/test-quantize-stats.cpp
index a284a1f0c5e31..2d0f6046623b9 100644
--- a/tests/test-quantize-stats.cpp
+++ b/tests/test-quantize-stats.cpp
@@ -165,6 +165,62 @@ static void test_roundtrip_on_chunk(
 
 
 // Run quantization function for a single layer and update error stats
+static double calculate_perplexity(const float * logits, const int * targets, int n_tokens, int vocab_size) {
+    double neg_log_likelihood = 0.0;
+
+    for (int i = 0; i < n_tokens; i++) {
+        int target = targets[i];
+        if (target < 0 || target >= vocab_size) continue;
+
+        const float * token_logits = logits + i * vocab_size;
+
+        float max_logit = token_logits[0];
+        for (int j = 1; j < vocab_size; j++) {
+            if (token_logits[j] > max_logit) max_logit = token_logits[j];
+        }
+
+        double sum_exp = 0.0;
+        for (int j = 0; j < vocab_size; j++) {
+            sum_exp += exp(token_logits[j] - max_logit);
+        }
+
+        double log_prob = (token_logits[target] - max_logit) - log(sum_exp);
+        neg_log_likelihood += -log_prob;
+    }
+
+    return exp(neg_log_likelihood / n_tokens);
+}
+
+static void compare_perplexity_across_formats(
+    llama_model * model,
+    llama_context * ctx,
+    const std::vector<int> & test_tokens,
+    const std::vector<ggml_type> & quant_types
+) {
+    (void)model;
+    (void)ctx;
+    (void)test_tokens;
+
+    printf("\n=== Perplexity Comparison Across Quantization Formats ===\n");
+    printf("Note: Lower perplexity indicates better model quality\n\n");
+
+    for (ggml_type qtype : quant_types) {
+        const auto * qfns_cpu = ggml_get_type_traits_cpu(qtype);
+        const auto * qfns = ggml_get_type_traits(qtype);
+        if (!qfns_cpu->from_float || !qfns->to_float) continue;
+
+        printf("%-12s: perplexity calculation requires model inference\n", ggml_type_name(qtype));
+    }
+
+    printf("\nNote: Full perplexity measurement requires model inference.\n");
+    printf("      This is a placeholder for the perplexity framework.\n");
+    printf("      Actual implementation would:\n");
+    printf("      1. Quantize model weights to each format\n");
+    printf("      2. Run inference on test set\n");
+    printf("      3. Calculate perplexity from output logits\n");
+    printf("      4. Compare perplexity degradation across formats\n");
+}
+
 static void test_roundtrip_on_layer(
     std::string & name, bool print_layer_stats, const ggml_type_traits & qfns, const ggml_type_traits_cpu & qfns_cpu, bool use_reference,
     const ggml_tensor * layer, std::vector<float> & input_scratch, std::vector<char> & quantized_scratch,