diff --git a/docs/source/en/gguf.md b/docs/source/en/gguf.md
index 359ed4d5e1e8..16e990f77f6d 100644
--- a/docs/source/en/gguf.md
+++ b/docs/source/en/gguf.md
@@ -53,9 +53,10 @@ on the Hub.
 - Q5_K
 - Q6_K
 - Q8_0
+- IQ2_XXS
 
 We take example from the excellent [99991/pygguf](https://github.com/99991/pygguf) Python parser to dequantize the 
-weights.
+weights in k-quants.
 
 ### Supported model architectures
 
diff --git a/src/transformers/integrations/ggml.py b/src/transformers/integrations/ggml.py
index 7da09be841e1..ca1670dc9ae5 100644
--- a/src/transformers/integrations/ggml.py
+++ b/src/transformers/integrations/ggml.py
@@ -44,6 +44,7 @@
     "Q4_K": 12,
     "Q5_K": 13,
     "Q6_K": 14,
+    "IQ2_XXS": 16,
 }
 
 # The Blocksizes are reported in bytes
@@ -58,6 +59,7 @@
     "Q2_K": 256 // 16 + 256 // 4 + 2 + 2,
     "Q3_K": 256 // 8 + 256 // 4 + 12 + 2,
     "Q5_K": 2 + 2 + 12 + 256 // 8 + 256 // 2,
+    "IQ2_XXS": 2 + 256 // 8 * 2,
 }
 
 # Listed here: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md
@@ -487,6 +489,48 @@ def dequantize_q5_k(data, n_bytes: int):
     )
 
 
+def dequantize_iq2_xxs(data, n_bytes):
+    # C implementation
+    # https://github.com/ggerganov/ggml/blob/3f5a4bbe59285c0f679b376f6259187d5514ff9c/src/ggml-quants.c#L3311
+    # C struct definition
+    # https://github.com/ggerganov/ggml/blob/3f5a4bbe59285c0f679b376f6259187d5514ff9c/src/ggml-common.h#L314-L321
+    def _dequantize_iq2xxs_column(qs_block, d):
+        """
+        The qs matrix could be splitted into 8 sub_blocks (4 x int16 bytes each block) for each row:
+        | Block_11 | Block_12 | Block_13 | Block_14 | Block_15 | Block_16 | Block_17 | Block_18 |
+        | Block_21 | Block_22 | Block_23 | Block_24 | Block_25 | Block_26 | Block_27 | Block_28 |
+        ...
+        | Block_n1 | Block_n2 | Block_n3 | Block_n4 | Block_n5 | Block_n6 | Block_n7 | Block_n8 |
+
+        This function process n rows at a time (Block_11 to Block_n1, Block_12 to Block_n2 etc).
+        """
+        from .ggml_utils import IQ2XXS_GRID, KMASK_IQ2XS, KSIGNS_IQ2XS
+
+        aux32 = np.frombuffer(qs_block, dtype=np.uint32).reshape(num_blocks, 2)
+        aux8 = np.frombuffer(qs_block, dtype=np.uint8).reshape(num_blocks, 8)
+
+        l = np.arange(4)
+        db = d * (0.5 + (aux32[:, [1]] >> 28)) * 0.25
+
+        grid = np.frombuffer(np.ascontiguousarray(IQ2XXS_GRID[aux8[:, l]]), dtype=np.uint8).reshape(num_blocks, 32)
+        signs = KSIGNS_IQ2XS[(aux32[:, [1]] >> 7 * l) & 127]
+
+        y = db * grid * np.where(signs.repeat(8, axis=1) & np.tile(KMASK_IQ2XS, 4), -1, 1)
+        return y
+
+    num_blocks = n_bytes // GGML_BLOCK_SIZES["IQ2_XXS"]
+
+    data_f16 = np.frombuffer(data, dtype=np.float16).reshape(num_blocks, GGML_BLOCK_SIZES["IQ2_XXS"] // 2)
+    data_i16 = np.frombuffer(data, dtype=np.int16).reshape(num_blocks, GGML_BLOCK_SIZES["IQ2_XXS"] // 2)
+
+    d = data_f16[:, 0].reshape(num_blocks, 1).astype(np.float32)
+    qs = data_i16[:, 1:].reshape(num_blocks, 32)
+
+    y = [_dequantize_iq2xxs_column(np.ascontiguousarray(qs[:, 4 * i : 4 * (i + 1)]), d) for i in range(8)]
+    y = np.concatenate(y, axis=1)
+    return y
+
+
 def load_dequant_gguf_tensor(shape, ggml_type, data, n_bytes):
     if ggml_type == GGML_TYPES["F32"]:
         values = data
@@ -506,6 +550,8 @@ def load_dequant_gguf_tensor(shape, ggml_type, data, n_bytes):
         values = dequantize_q3_k(data, n_bytes)
     elif ggml_type == GGML_TYPES["Q5_K"]:
         values = dequantize_q5_k(data, n_bytes)
+    elif ggml_type == GGML_TYPES["IQ2_XXS"]:
+        values = dequantize_iq2_xxs(data, n_bytes)
     else:
         raise NotImplementedError(
             f"ggml_type {ggml_type} not implemented - please raise an issue on huggingface transformers: https://github.com/huggingface/transformers/issues/new/choose"
diff --git a/src/transformers/integrations/ggml_utils.py b/src/transformers/integrations/ggml_utils.py
new file mode 100644
index 000000000000..59ba176a8725
--- /dev/null
+++ b/src/transformers/integrations/ggml_utils.py
@@ -0,0 +1,84 @@
+# Constants for ggml imatrix dequantization
+# migrate from https://github.com/ggerganov/ggml/blob/3f5a4bbe59285c0f679b376f6259187d5514ff9c/src/ggml-common.h#L437
+import numpy as np
+
+
+IQ2XXS_GRID = np.array([
+    0x0808080808080808, 0x080808080808082b, 0x0808080808081919, 0x0808080808082b08,
+    0x0808080808082b2b, 0x0808080808190819, 0x0808080808191908, 0x08080808082b0808,
+    0x08080808082b082b, 0x08080808082b2b08, 0x08080808082b2b2b, 0x0808080819080819,
+    0x0808080819081908, 0x0808080819190808, 0x0808080819192b08, 0x08080808192b0819,
+    0x08080808192b1908, 0x080808082b080808, 0x080808082b08082b, 0x080808082b082b2b,
+    0x080808082b2b082b, 0x0808081908080819, 0x0808081908081908, 0x0808081908190808,
+    0x0808081908191919, 0x0808081919080808, 0x080808192b081908, 0x080808192b192b08,
+    0x0808082b08080808, 0x0808082b0808082b, 0x0808082b082b082b, 0x0808082b2b08082b,
+    0x0808190808080819, 0x0808190808081908, 0x0808190808190808, 0x08081908082b0819,
+    0x08081908082b1908, 0x0808190819080808, 0x080819081908082b, 0x0808190819082b08,
+    0x08081908192b0808, 0x080819082b080819, 0x080819082b081908, 0x080819082b190808,
+    0x080819082b2b1908, 0x0808191908080808, 0x080819190808082b, 0x0808191908082b08,
+    0x08081919082b0808, 0x080819191908192b, 0x08081919192b2b19, 0x080819192b080808,
+    0x080819192b190819, 0x0808192b08082b19, 0x0808192b08190808, 0x0808192b19080808,
+    0x0808192b2b081908, 0x0808192b2b2b1908, 0x08082b0808080808, 0x08082b0808081919,
+    0x08082b0808082b08, 0x08082b0808191908, 0x08082b08082b2b08, 0x08082b0819080819,
+    0x08082b0819081908, 0x08082b0819190808, 0x08082b081919082b, 0x08082b082b082b08,
+    0x08082b1908081908, 0x08082b1919080808, 0x08082b2b0808082b, 0x08082b2b08191908,
+    0x0819080808080819, 0x0819080808081908, 0x0819080808190808, 0x08190808082b0819,
+    0x0819080819080808, 0x08190808192b0808, 0x081908082b081908, 0x081908082b190808,
+    0x081908082b191919, 0x0819081908080808, 0x0819081908082b08, 0x08190819082b0808,
+    0x0819081919190808, 0x0819081919192b2b, 0x081908192b080808, 0x0819082b082b1908,
+    0x0819082b19081919, 0x0819190808080808, 0x0819190808082b08, 0x08191908082b0808,
+    0x08191908082b1919, 0x0819190819082b19, 0x081919082b080808, 0x0819191908192b08,
+    0x08191919192b082b, 0x0819192b08080808, 0x0819192b0819192b, 0x08192b0808080819,
+    0x08192b0808081908, 0x08192b0808190808, 0x08192b0819080808, 0x08192b082b080819,
+    0x08192b1908080808, 0x08192b1908081919, 0x08192b192b2b0808, 0x08192b2b19190819,
+    0x082b080808080808, 0x082b08080808082b, 0x082b080808082b2b, 0x082b080819081908,
+    0x082b0808192b0819, 0x082b08082b080808, 0x082b08082b08082b, 0x082b0819082b2b19,
+    0x082b081919082b08, 0x082b082b08080808, 0x082b082b0808082b, 0x082b190808080819,
+    0x082b190808081908, 0x082b190808190808, 0x082b190819080808, 0x082b19081919192b,
+    0x082b191908080808, 0x082b191919080819, 0x082b1919192b1908, 0x082b192b2b190808,
+    0x082b2b0808082b08, 0x082b2b08082b0808, 0x082b2b082b191908, 0x082b2b2b19081908,
+    0x1908080808080819, 0x1908080808081908, 0x1908080808190808, 0x1908080808192b08,
+    0x19080808082b0819, 0x19080808082b1908, 0x1908080819080808, 0x1908080819082b08,
+    0x190808081919192b, 0x19080808192b0808, 0x190808082b080819, 0x190808082b081908,
+    0x190808082b190808, 0x1908081908080808, 0x19080819082b0808, 0x19080819192b0819,
+    0x190808192b080808, 0x190808192b081919, 0x1908082b08080819, 0x1908082b08190808,
+    0x1908082b19082b08, 0x1908082b1919192b, 0x1908082b192b2b08, 0x1908190808080808,
+    0x1908190808082b08, 0x19081908082b0808, 0x190819082b080808, 0x190819082b192b19,
+    0x190819190819082b, 0x19081919082b1908, 0x1908192b08080808, 0x19082b0808080819,
+    0x19082b0808081908, 0x19082b0808190808, 0x19082b0819080808, 0x19082b0819081919,
+    0x19082b1908080808, 0x19082b1919192b08, 0x19082b19192b0819, 0x19082b192b08082b,
+    0x19082b2b19081919, 0x19082b2b2b190808, 0x1919080808080808, 0x1919080808082b08,
+    0x1919080808190819, 0x1919080808192b19, 0x19190808082b0808, 0x191908082b080808,
+    0x191908082b082b08, 0x1919081908081908, 0x191908191908082b, 0x191908192b2b1908,
+    0x1919082b2b190819, 0x191919082b190808, 0x191919082b19082b, 0x1919191908082b2b,
+    0x1919192b08080819, 0x1919192b19191908, 0x19192b0808080808, 0x19192b0808190819,
+    0x19192b0808192b19, 0x19192b08192b1908, 0x19192b1919080808, 0x19192b2b08082b08,
+    0x192b080808081908, 0x192b080808190808, 0x192b080819080808, 0x192b0808192b2b08,
+    0x192b081908080808, 0x192b081919191919, 0x192b082b08192b08, 0x192b082b192b0808,
+    0x192b190808080808, 0x192b190808081919, 0x192b191908190808, 0x192b19190819082b,
+    0x192b19192b081908, 0x192b2b081908082b, 0x2b08080808080808, 0x2b0808080808082b,
+    0x2b08080808082b2b, 0x2b08080819080819, 0x2b0808082b08082b, 0x2b08081908081908,
+    0x2b08081908192b08, 0x2b08081919080808, 0x2b08082b08190819, 0x2b08190808080819,
+    0x2b08190808081908, 0x2b08190808190808, 0x2b08190808191919, 0x2b08190819080808,
+    0x2b081908192b0808, 0x2b08191908080808, 0x2b0819191908192b, 0x2b0819192b191908,
+    0x2b08192b08082b19, 0x2b08192b19080808, 0x2b08192b192b0808, 0x2b082b080808082b,
+    0x2b082b1908081908, 0x2b082b2b08190819, 0x2b19080808081908, 0x2b19080808190808,
+    0x2b190808082b1908, 0x2b19080819080808, 0x2b1908082b2b0819, 0x2b1908190819192b,
+    0x2b1908192b080808, 0x2b19082b19081919, 0x2b19190808080808, 0x2b191908082b082b,
+    0x2b19190819081908, 0x2b19191919190819, 0x2b192b082b080819, 0x2b192b19082b0808,
+    0x2b2b08080808082b, 0x2b2b080819190808, 0x2b2b08082b081919, 0x2b2b081908082b19,
+    0x2b2b082b08080808, 0x2b2b190808192b08, 0x2b2b2b0819190808, 0x2b2b2b1908081908,
+])
+
+KSIGNS_IQ2XS = np.array([
+      0, 129, 130,   3, 132,   5,   6, 135, 136,   9,  10, 139,  12, 141, 142,  15,
+    144,  17,  18, 147,  20, 149, 150,  23,  24, 153, 154,  27, 156,  29,  30, 159,
+    160,  33,  34, 163,  36, 165, 166,  39,  40, 169, 170,  43, 172,  45,  46, 175,
+     48, 177, 178,  51, 180,  53,  54, 183, 184,  57,  58, 187,  60, 189, 190,  63,
+    192,  65,  66, 195,  68, 197, 198,  71,  72, 201, 202,  75, 204,  77,  78, 207,
+     80, 209, 210,  83, 212,  85,  86, 215, 216,  89,  90, 219,  92, 221, 222,  95,
+     96, 225, 226,  99, 228, 101, 102, 231, 232, 105, 106, 235, 108, 237, 238, 111,
+    240, 113, 114, 243, 116, 245, 246, 119, 120, 249, 250, 123, 252, 125, 126, 255,
+])
+
+KMASK_IQ2XS = np.array([1, 2, 4, 8, 16, 32, 64, 128])
diff --git a/tests/quantization/ggml/test_ggml.py b/tests/quantization/ggml/test_ggml.py
index e42900a1d51b..a7931bcb3e09 100644
--- a/tests/quantization/ggml/test_ggml.py
+++ b/tests/quantization/ggml/test_ggml.py
@@ -34,6 +34,7 @@ class GgufIntegrationTests(unittest.TestCase):
     qwen2_model_id = "Qwen/Qwen1.5-0.5B-Chat-GGUF"
     llama3_model_id = "NousResearch/Meta-Llama-3-8B-GGUF"
     tinyllama_model_id = "PenutChen/TinyLlama-1.1B-Chat-v1.0-GGUF"
+    imatrix_model_id = "duyntnet/TinyLlama-1.1B-Chat-v1.0-imatrix-GGUF"
 
     q4_0_gguf_model_id = "tinyllama-1.1b-chat-v1.0.Q4_0.gguf"
     q4_k_gguf_model_id = "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
@@ -42,6 +43,7 @@ class GgufIntegrationTests(unittest.TestCase):
     q5_k_gguf_model_id = "tinyllama-1.1b-chat-v1.0.Q5_K_M.gguf"
     q6_k_gguf_model_id = "tinyllama-1.1b-chat-v1.0.Q6_K.gguf"
     q8_0_gguf_model_id = "tinyllama-1.1b-chat-v1.0.Q8_0.gguf"
+    iq2_xxs_gguf_model_id = "TinyLlama-1.1B-Chat-v1.0-IQ2_XXS.gguf"
 
     q4_0_mistral_model_id = "mistral-7b-instruct-v0.2.Q4_0.gguf"
     q4_0_qwen2_model_id = "qwen1_5-0_5b-chat-q4_0.gguf"
@@ -163,6 +165,18 @@ def test_f16(self):
         EXPECTED_TEXT = "Hello, World!\n\n5. Node.js"
         self.assertEqual(tokenizer.decode(out[0], skip_special_tokens=True), EXPECTED_TEXT)
 
+    def test_iq2_xxs(self):
+        tokenizer = AutoTokenizer.from_pretrained(self.imatrix_model_id, gguf_file=self.iq2_xxs_gguf_model_id)
+        model = AutoModelForCausalLM.from_pretrained(
+            self.imatrix_model_id, gguf_file=self.iq2_xxs_gguf_model_id
+        ).to(torch_device)
+
+        text = tokenizer(self.example_text, return_tensors="pt").to(torch_device)
+        out = model.generate(**text, max_new_tokens=10)
+
+        EXPECTED_TEXT = "Hello, I'm a software engineer. I'"
+        self.assertEqual(tokenizer.decode(out[0], skip_special_tokens=True), EXPECTED_TEXT)
+
     def test_mistral_q4_0(self):
         tokenizer = AutoTokenizer.from_pretrained(self.mistral_model_id, gguf_file=self.q4_0_mistral_model_id)
         model = AutoModelForCausalLM.from_pretrained(