Update server/text_generation_server/utils/gptq/marlin.py

cyang49 · njhill · cyang49 · commit 96f3cd3304ad · 2024-03-22T12:50:11.000Z
Co-authored-by: Nick Hill &lt;nickhill@us.ibm.com&gt;
Signed-off-by: Chih-Chieh Yang &lt;7364402+cyang49@users.noreply.github.com&gt;
Signed-off-by: Chih-Chieh-Yang &lt;7364402+cyang49@users.noreply.github.com&gt;
diff --git a/server/text_generation_server/utils/gptq/marlin.py b/server/text_generation_server/utils/gptq/marlin.py
@@ -116,8 +116,9 @@ def __init__(self, qweight, qzeros, scales, g_idx, bias, bits, group_size):
         infeatures = qweight.shape[0] * pack_size
         outfeatures = qweight.shape[1]
         
-        if not torch.cuda.get_device_capability()[0] >= 8:
-            raise ValueError(f'Can not use Marlin int4*fp16 kernel with a device of compute capability {torch.cuda.get_device_capability()}.')
+        device_capability = torch.cuda.get_device_capability()
+        if not device_capability[0] >= 8:
+            raise ValueError(f'Can not use Marlin int4*fp16 kernel with a device of compute capability {device_capability}.')
         if infeatures % 128 != 0 or outfeatures % 256 != 0:
             raise ValueError("`infeatures` must be divisible by 128 and `outfeatures` by 256.")
         if bits not in [4]: