huggingface · LysandreJik · Jan 2, 2025 · Dec 19, 2024
diff --git a/tests/quantization/bnb/test_4bit.py b/tests/quantization/bnb/test_4bit.py
@@ -172,7 +172,7 @@ def test_memory_footprint(self):
         mem_fp16 = self.model_fp16.get_memory_footprint()
         mem_4bit = self.model_4bit.get_memory_footprint()
 
-        self.assertAlmostEqual(mem_fp16 / mem_4bit, self.EXPECTED_RELATIVE_DIFFERENCE)
+        self.assertAlmostEqual(mem_fp16 / mem_4bit, self.EXPECTED_RELATIVE_DIFFERENCE, delta=1e-5)
         linear = get_some_linear_layer(self.model_4bit)
         self.assertTrue(linear.weight.__class__ == Params4bit)
 

diff --git a/tests/quantization/bnb/test_mixed_int8.py b/tests/quantization/bnb/test_mixed_int8.py
@@ -229,7 +229,7 @@ def test_memory_footprint(self):
         mem_fp16 = self.model_fp16.get_memory_footprint()
         mem_8bit = self.model_8bit.get_memory_footprint()
 
-        self.assertAlmostEqual(mem_fp16 / mem_8bit, self.EXPECTED_RELATIVE_DIFFERENCE)
+        self.assertAlmostEqual(mem_fp16 / mem_8bit, self.EXPECTED_RELATIVE_DIFFERENCE, delta=1e-5)
         self.assertTrue(get_some_linear_layer(self.model_8bit).weight.__class__ == Int8Params)
 
     def test_linear_are_8bit(self):
@@ -938,8 +938,13 @@ class MixedInt8LlamaTest(MixedInt8Test):
     model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
     EXPECTED_RELATIVE_DIFFERENCE = 1.7869331026479096
     EXPECTED_OUTPUTS = set()
+
+    # Expected on Intel XPU
     EXPECTED_OUTPUTS.add("Hello my name is John Smith and I am a software engineer. I")
 
+    # Expected on NVIDIA T4
+    EXPECTED_OUTPUTS.add("Hello my name is John and I am a software engineer. I have")
+
     def test_int8_from_pretrained(self):
         r"""
         Test whether loading a 8bit model from the Hub works as expected