Skip to content

Commit 96f3cd3

Browse files
cyang49njhill
andcommitted
Update server/text_generation_server/utils/gptq/marlin.py
Co-authored-by: Nick Hill <[email protected]> Signed-off-by: Chih-Chieh Yang <[email protected]> Signed-off-by: Chih-Chieh-Yang <[email protected]>
1 parent e1b2110 commit 96f3cd3

File tree

1 file changed

+3
-2
lines changed
  • server/text_generation_server/utils/gptq

1 file changed

+3
-2
lines changed

server/text_generation_server/utils/gptq/marlin.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -116,8 +116,9 @@ def __init__(self, qweight, qzeros, scales, g_idx, bias, bits, group_size):
116116
infeatures = qweight.shape[0] * pack_size
117117
outfeatures = qweight.shape[1]
118118

119-
if not torch.cuda.get_device_capability()[0] >= 8:
120-
raise ValueError(f'Can not use Marlin int4*fp16 kernel with a device of compute capability {torch.cuda.get_device_capability()}.')
119+
device_capability = torch.cuda.get_device_capability()
120+
if not device_capability[0] >= 8:
121+
raise ValueError(f'Can not use Marlin int4*fp16 kernel with a device of compute capability {device_capability}.')
121122
if infeatures % 128 != 0 or outfeatures % 256 != 0:
122123
raise ValueError("`infeatures` must be divisible by 128 and `outfeatures` by 256.")
123124
if bits not in [4]:

0 commit comments

Comments
 (0)