diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h
index 60c6b63d05978..09d3942e8ca3e 100644
--- a/ggml/include/ggml.h
+++ b/ggml/include/ggml.h
@@ -417,7 +417,8 @@ extern "C" {
         // GGML_TYPE_IQ4_NL_4_8 = 37,
         // GGML_TYPE_IQ4_NL_8_8 = 38,
         GGML_TYPE_MXFP4   = 39, // MXFP4 (1 block)
-        GGML_TYPE_COUNT   = 40,
+        GGML_TYPE_BC6H_0  = 40,
+        GGML_TYPE_COUNT   = 41,
     };
 
     // precision
@@ -716,6 +717,7 @@ extern "C" {
     GGML_API size_t  ggml_element_size(const struct ggml_tensor * tensor);
 
     GGML_API bool    ggml_is_quantized(enum ggml_type type);
+    GGML_API bool    ggml_allows_empty_border(enum ggml_type type);
 
     // TODO: temporary until model loading of ggml examples is refactored
     GGML_API enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype);
@@ -2525,6 +2527,7 @@ extern "C" {
         int64_t                  blck_size_interleave; // interleave elements in blocks
         size_t                   type_size;
         bool                     is_quantized;
+        bool                     allows_empty_border;
         ggml_to_float_t          to_float;
         ggml_from_float_t        from_float_ref;
     };
diff --git a/ggml/src/ggml-common.h b/ggml/src/ggml-common.h
index 93ab7ea446e26..7e5628c2f59bf 100644
--- a/ggml/src/ggml-common.h
+++ b/ggml/src/ggml-common.h
@@ -427,6 +427,13 @@ typedef struct {
 } block_iq4_xs;
 static_assert(sizeof(block_iq4_xs) == sizeof(ggml_half) + sizeof(uint16_t) + QK_K/64 + QK_K/2, "wrong iq4_xs block size/padding");
 
+// Texture compression quants
+#define BC6H_BITS_PER_BLOCK (128)
+#define BC6H_WEIGHTS_PER_BLOCK (16*3)
+typedef struct {
+    uint8_t block[BC6H_BITS_PER_BLOCK/8];
+} block_bc6h_0;
+
 #endif // GGML_COMMON_DECL
 #endif // GGML_COMMON_DECL
 
diff --git a/ggml/src/ggml-cpu/ops.cpp b/ggml/src/ggml-cpu/ops.cpp
index 6275c8305a971..6ffb6569a47e7 100644
--- a/ggml/src/ggml-cpu/ops.cpp
+++ b/ggml/src/ggml-cpu/ops.cpp
@@ -5428,6 +5428,7 @@ void ggml_compute_forward_clamp(
         case GGML_TYPE_I32:
         case GGML_TYPE_I64:
         case GGML_TYPE_F64:
+        case GGML_TYPE_BC6H_0:
         case GGML_TYPE_COUNT:
             {
                 GGML_ABORT("fatal error");
diff --git a/ggml/src/ggml-quants.c b/ggml/src/ggml-quants.c
index de5cbd75e868e..4d8d721600f63 100644
--- a/ggml/src/ggml-quants.c
+++ b/ggml/src/ggml-quants.c
@@ -2550,6 +2550,11 @@ void dequantize_row_iq4_xs(const block_iq4_xs * GGML_RESTRICT x, float * GGML_RE
     }
 }
 
+void dequantize_row_bc6h_0(const block_bc6h_0 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
+    fprintf(stderr, "dequantize_row_bc6h_0(x=%p, y=%p, k=%ld)\n", x, y, k);
+    exit(1);
+}
+
 //===================================== Q8_K ==============================================
 
 void quantize_row_q8_K_ref(const float * GGML_RESTRICT x, block_q8_K * GGML_RESTRICT y, int64_t k) {
@@ -4997,6 +5002,28 @@ void quantize_row_iq2_s_ref(const float * GGML_RESTRICT x, block_iq2_s * GGML_RE
     quantize_iq2_s(x, y, 1, k, NULL);
 }
 
+// BC6H_0 quantization
+static int64_t roundup(int64_t value, int64_t to) {
+    int64_t rem = value % to;
+    if(rem == 0) {
+        return value;
+    } else {
+        return value - rem + to;
+    }
+}
+
+size_t quantize_bc6h_0(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
+    int64_t blocks_per_row = roundup(n_per_row, sizeof(block_bc6h_0))/sizeof(block_bc6h_0);
+
+    size_t bytes_per_row = blocks_per_row * sizeof(block_bc6h_0);
+    memset(dst, 0, bytes_per_row * nrow);
+    return nrow * bytes_per_row;
+}
+
+void quantize_row_bc6h_0_ref(const float * GGML_RESTRICT x, block_bc6h_0 * GGML_RESTRICT y, int64_t k) {
+    quantize_bc6h_0(x, y, 1, k, NULL);
+}
+
 // =============================== data validation
 
 static bool validate_float(float f, size_t i) {
@@ -5044,6 +5071,10 @@ static bool validate_e_e8m0(uint8_t e, size_t i) {
     return true;
 }
 
+static bool validate_bc6h_0(block_bc6h_0 * block) {
+    return true;
+}
+
 #define VALIDATE_ROW_DATA_D_F16_IMPL(type, data, nb) \
     const type * q = (const type *) (data); \
     for (size_t i = 0; i < (nb); ++i) { \
@@ -5307,6 +5338,10 @@ bool ggml_validate_row_data(enum ggml_type type, const void * data, size_t nbyte
             {
                 VALIDATE_ROW_DATA_D_F16_IMPL(block_iq4_nl, data, nb);
             } break;
+        case GGML_TYPE_BC6H_0:
+            {
+                return validate_bc6h_0(data);
+            } break;
 
         case GGML_TYPE_I8:
         case GGML_TYPE_I16:
diff --git a/ggml/src/ggml-quants.h b/ggml/src/ggml-quants.h
index 3b688f31c2145..e6ff88b48b73e 100644
--- a/ggml/src/ggml-quants.h
+++ b/ggml/src/ggml-quants.h
@@ -39,6 +39,8 @@ GGML_API void quantize_row_iq4_xs_ref (const float * GGML_RESTRICT x, block_iq4_
 GGML_API void quantize_row_iq3_s_ref  (const float * GGML_RESTRICT x, block_iq3_s   * GGML_RESTRICT y, int64_t k);
 GGML_API void quantize_row_iq2_s_ref  (const float * GGML_RESTRICT x, block_iq2_s   * GGML_RESTRICT y, int64_t k);
 
+GGML_API void quantize_row_bc6h_0_ref(const float * GGML_RESTRICT x, block_bc6h_0 * GGML_RESTRICT y, int64_t k);
+
 // Dequantization
 GGML_API void dequantize_row_q4_0(const block_q4_0 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
 GGML_API void dequantize_row_q4_1(const block_q4_1 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
@@ -69,6 +71,8 @@ GGML_API void dequantize_row_iq4_nl (const block_iq4_nl  * GGML_RESTRICT x, floa
 GGML_API void dequantize_row_iq4_xs (const block_iq4_xs  * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
 GGML_API void dequantize_row_iq3_s  (const block_iq3_s   * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
 
+GGML_API void dequantize_row_bc6h_0(const block_bc6h_0 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
+
 // Quantization utilizing an importance matrix (a.k.a. "Activation aWare Quantization")
 GGML_API size_t quantize_iq2_xxs(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
 GGML_API size_t quantize_iq2_xs (const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
@@ -96,6 +100,8 @@ GGML_API size_t quantize_q8_0(const float * GGML_RESTRICT src, void * GGML_RESTR
 
 GGML_API size_t quantize_mxfp4(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
 
+GGML_API size_t quantize_bc6h_0(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
+
 GGML_API void iq2xs_init_impl(enum ggml_type type);
 GGML_API void iq2xs_free_impl(enum ggml_type type);
 GGML_API void iq3xs_init_impl(int grid_size);
diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c
index 2bce1375ba3c0..63a53b7a14e51 100644
--- a/ggml/src/ggml.c
+++ b/ggml/src/ggml.c
@@ -873,6 +873,15 @@ static const struct ggml_type_traits type_traits[GGML_TYPE_COUNT] = {
         .type_size                = 0,
         .is_quantized             = false,
     },
+    [GGML_TYPE_BC6H_0] = {
+        .type_name                = "bc6h_0",
+        .blck_size                = BC6H_WEIGHTS_PER_BLOCK,
+        .type_size                = sizeof(block_bc6h_0),
+        .is_quantized             = true,
+        .allows_empty_border      = true,
+        .to_float                 = (ggml_to_float_t) dequantize_row_bc6h_0,
+        .from_float_ref           = (ggml_from_float_t) quantize_row_bc6h_0_ref,
+    },
 };
 
 const struct ggml_type_traits * ggml_get_type_traits(enum ggml_type type) {
@@ -1233,9 +1242,22 @@ size_t ggml_type_size(enum ggml_type type) {
     return type_traits[type].type_size;
 }
 
+static int64_t roundup(int64_t value, int64_t to) {
+    int64_t rem = value % to;
+    if(rem == 0) {
+        return value;
+    } else {
+        return value - rem + to;
+    }
+}
+
 size_t ggml_row_size(enum ggml_type type, int64_t ne) {
-    assert(ne % ggml_blck_size(type) == 0);
-    return ggml_type_size(type)*ne/ggml_blck_size(type);
+    if(type_traits[type].allows_empty_border) {
+        return roundup(ggml_type_size(type)*ne, ggml_blck_size(type))/ggml_blck_size(type);
+    } else {
+        assert(ne % ggml_blck_size(type) == 0 );
+        return ggml_type_size(type)*ne/ggml_blck_size(type);
+    }
 }
 
 double ggml_type_sizef(enum ggml_type type) {
@@ -1250,6 +1272,10 @@ bool ggml_is_quantized(enum ggml_type type) {
     return type_traits[type].is_quantized;
 }
 
+bool ggml_allows_empty_border(enum ggml_type type) {
+    return type_traits[type].allows_empty_border;
+}
+
 const char * ggml_op_name(enum ggml_op op) {
     return GGML_OP_NAME[op];
 }
@@ -7151,7 +7177,8 @@ size_t ggml_quantize_chunk(
         GGML_ASSERT(imatrix != NULL);
     }
 
-    GGML_ASSERT(start % type_traits[type].blck_size == 0);
+    // TURBOLLAMA-TODO: calculate this better rather than just disabling the assert
+    GGML_ASSERT(start % type_traits[type].blck_size == 0 || type_traits[type].allows_empty_border);
     GGML_ASSERT(start % n_per_row == 0);
 
     ggml_quantize_init(type); // this is noop if already initialized
@@ -7184,6 +7211,7 @@ size_t ggml_quantize_chunk(
         case GGML_TYPE_IQ1_M:   result = quantize_iq1_m  (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
         case GGML_TYPE_IQ4_NL:  result = quantize_iq4_nl (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
         case GGML_TYPE_IQ4_XS:  result = quantize_iq4_xs (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
+        case GGML_TYPE_BC6H_0:  result = quantize_bc6h_0 (src + start, (char *) dst + start_row + row_size, nrows, n_per_row, imatrix); break;
         case GGML_TYPE_F16:
             {
                 size_t elemsize = sizeof(ggml_fp16_t);
@@ -7206,7 +7234,8 @@ size_t ggml_quantize_chunk(
             assert(false);
     }
 
-    GGML_ASSERT(result == nrows * row_size);
+    // TURBOLLAMA-TODO: calculate this better rather than just disabling the assert
+    GGML_ASSERT(result == nrows * row_size || ggml_allows_empty_border(type));
 
     return result;
 }
diff --git a/ggml/src/gguf.cpp b/ggml/src/gguf.cpp
index 8cc4ef1cf4435..308016b5ba69c 100644
--- a/ggml/src/gguf.cpp
+++ b/ggml/src/gguf.cpp
@@ -1142,7 +1142,9 @@ void gguf_set_tensor_type(struct gguf_context * ctx, const char * name, enum ggm
     const int64_t blck_size = ggml_blck_size(type);
 
     tensor->type = type;
-    GGML_ASSERT(tensor->ne[0] % blck_size == 0 && "tensor row size not divisible by block size of new type");
+
+    // TURBOLLAMA-TODO: calculate this better rather than just disabling the assert
+    GGML_ASSERT((tensor->ne[0] % blck_size == 0 || ggml_allows_empty_border(type)) && "tensor row size not divisible by block size of new type");
 
     tensor->nb[0] = type_size;
     tensor->nb[1] = tensor->nb[0]*(tensor->ne[0]/blck_size);
diff --git a/include/llama.h b/include/llama.h
index 8fc3d7db5a917..10509094bf4b2 100644
--- a/include/llama.h
+++ b/include/llama.h
@@ -151,6 +151,7 @@ extern "C" {
         LLAMA_FTYPE_MOSTLY_TQ1_0         = 36, // except 1d tensors
         LLAMA_FTYPE_MOSTLY_TQ2_0         = 37, // except 1d tensors
         LLAMA_FTYPE_MOSTLY_MXFP4_MOE     = 38, // except 1d tensors
+        LLAMA_FTYPE_MOSTLY_BC6H_0        = 39, // except 1d tensors
 
         LLAMA_FTYPE_GUESSED = 1024, // not specified in the model file
     };
diff --git a/src/llama-quant.cpp b/src/llama-quant.cpp
index 97228b2a69324..48efacca390cf 100644
--- a/src/llama-quant.cpp
+++ b/src/llama-quant.cpp
@@ -439,7 +439,7 @@ static ggml_type llama_tensor_get_type(quantize_state_impl & qs, ggml_type new_t
         const int64_t ny = tensor->ne[1];
         const int64_t qk_k = ggml_blck_size(new_type);
 
-        if (nx % qk_k != 0) {
+        if (nx % qk_k != 0 && !ggml_allows_empty_border(new_type)) {
             LLAMA_LOG_WARN("\n\n%s : tensor cols %" PRId64 " x %" PRId64 " are not divisible by %" PRId64 ", required for %s", __func__, nx, ny, qk_k, ggml_type_name(new_type));
             convert_incompatible_tensor = true;
         } else {
@@ -571,6 +571,7 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
         case LLAMA_FTYPE_MOSTLY_IQ4_XS:  default_type = GGML_TYPE_IQ4_XS;  break;
         case LLAMA_FTYPE_MOSTLY_IQ3_S:   default_type = GGML_TYPE_IQ3_S;   break;
         case LLAMA_FTYPE_MOSTLY_IQ3_M:   default_type = GGML_TYPE_IQ3_S;   break;
+        case LLAMA_FTYPE_MOSTLY_BC6H_0:  default_type = GGML_TYPE_BC6H_0;  break;
 
         default: throw std::runtime_error(format("invalid output file type %d\n", ftype));
     }
@@ -1030,7 +1031,7 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
 
         // update the gguf meta data as we go
         gguf_set_tensor_type(ctx_outs[cur_split].get(), name.c_str(), new_type);
-        GGML_ASSERT(gguf_get_tensor_size(ctx_outs[cur_split].get(), gguf_find_tensor(ctx_outs[cur_split].get(), name.c_str())) == new_size);
+        GGML_ASSERT((gguf_get_tensor_size(ctx_outs[cur_split].get(), gguf_find_tensor(ctx_outs[cur_split].get(), name.c_str())) == new_size) || ggml_allows_empty_border(new_type));
         gguf_set_tensor_data(ctx_outs[cur_split].get(), name.c_str(), new_data);
 
         // write tensor data + padding
diff --git a/tools/quantize/quantize.cpp b/tools/quantize/quantize.cpp
index 470dc3d916b90..6174058b628c4 100644
--- a/tools/quantize/quantize.cpp
+++ b/tools/quantize/quantize.cpp
@@ -55,6 +55,7 @@ static const std::vector<quant_option> QUANT_OPTIONS = {
     { "Q8_0",     LLAMA_FTYPE_MOSTLY_Q8_0,     " 7.96G, +0.0026 ppl @ Llama-3-8B",  },
     { "F16",      LLAMA_FTYPE_MOSTLY_F16,      "14.00G, +0.0020 ppl @ Mistral-7B",  },
     { "BF16",     LLAMA_FTYPE_MOSTLY_BF16,     "14.00G, -0.0050 ppl @ Mistral-7B",  },
+    { "BC6H_0",   LLAMA_FTYPE_MOSTLY_BC6H_0,   "BC6H texture compression",          },
     { "F32",      LLAMA_FTYPE_ALL_F32,         "26.00G              @ 7B",          },
     // Note: Ensure COPY comes after F32 to avoid ftype 0 from matching.
     { "COPY",     LLAMA_FTYPE_ALL_F32,         "only copy tensors, no quantizing",  },