@@ -3044,23 +3044,23 @@ MAKE_kQuantizeBlockwise(half, 1024, 4, 0, General8bit)
30443044MAKE_kQuantizeBlockwise(half, 512 , 2 , 0 , General8bit)
30453045MAKE_kQuantizeBlockwise(half, 256 , 2 , 0 , General8bit)
30463046MAKE_kQuantizeBlockwise(half, 128 , 2 , 0 , General8bit)
3047- // MAKE_kQuantizeBlockwise(half, 64, 2, 0, General8bit)
3047+ MAKE_kQuantizeBlockwise(half, 64 , 2 , 0 , General8bit)
30483048
30493049MAKE_kQuantizeBlockwise(half, 4096 , 4 , 0 , FP4)
30503050MAKE_kQuantizeBlockwise(half, 2048 , 4 , 0 , FP4)
30513051MAKE_kQuantizeBlockwise(half, 1024 , 4 , 0 , FP4)
30523052MAKE_kQuantizeBlockwise(half, 512 , 2 , 0 , FP4)
30533053MAKE_kQuantizeBlockwise(half, 256 , 2 , 0 , FP4)
30543054MAKE_kQuantizeBlockwise(half, 128 , 2 , 0 , FP4)
3055- // MAKE_kQuantizeBlockwise(half, 64, 2, 0, FP4)
3055+ MAKE_kQuantizeBlockwise(half, 64 , 2 , 0 , FP4)
30563056
30573057MAKE_kQuantizeBlockwise(half, 4096 , 4 , 0 , NF4)
30583058MAKE_kQuantizeBlockwise(half, 2048 , 4 , 0 , NF4)
30593059MAKE_kQuantizeBlockwise(half, 1024 , 4 , 0 , NF4)
30603060MAKE_kQuantizeBlockwise(half, 512 , 2 , 0 , NF4)
30613061MAKE_kQuantizeBlockwise(half, 256 , 2 , 0 , NF4)
30623062MAKE_kQuantizeBlockwise(half, 128 , 2 , 0 , NF4)
3063- // MAKE_kQuantizeBlockwise(half, 64, 2, 0, NF4)
3063+ MAKE_kQuantizeBlockwise(half, 64 , 2 , 0 , NF4)
30643064
30653065MAKE_kQuantizeBlockwise(float , 4096 , 4 , 0 , General8bit)
30663066MAKE_kQuantizeBlockwise(float , 4096 , 4 , 1 , General8bit)
@@ -3069,23 +3069,23 @@ MAKE_kQuantizeBlockwise(float, 1024, 4, 0, General8bit)
30693069MAKE_kQuantizeBlockwise(float , 512 , 2 , 0 , General8bit)
30703070MAKE_kQuantizeBlockwise(float , 256 , 2 , 0 , General8bit)
30713071MAKE_kQuantizeBlockwise(float , 128 , 2 , 0 , General8bit)
3072- // MAKE_kQuantizeBlockwise(float, 64, 2, 0, General8bit)
3072+ MAKE_kQuantizeBlockwise(float , 64 , 2 , 0 , General8bit)
30733073
30743074MAKE_kQuantizeBlockwise(float , 4096 , 4 , 0 , FP4)
30753075MAKE_kQuantizeBlockwise(float , 2048 , 4 , 0 , FP4)
30763076MAKE_kQuantizeBlockwise(float , 1024 , 4 , 0 , FP4)
30773077MAKE_kQuantizeBlockwise(float , 512 , 2 , 0 , FP4)
30783078MAKE_kQuantizeBlockwise(float , 256 , 2 , 0 , FP4)
30793079MAKE_kQuantizeBlockwise(float , 128 , 2 , 0 , FP4)
3080- // MAKE_kQuantizeBlockwise(float, 64, 2, 0, FP4)
3080+ MAKE_kQuantizeBlockwise(float , 64 , 2 , 0 , FP4)
30813081
30823082MAKE_kQuantizeBlockwise(float , 4096 , 4 , 0 , NF4)
30833083MAKE_kQuantizeBlockwise(float , 2048 , 4 , 0 , NF4)
30843084MAKE_kQuantizeBlockwise(float , 1024 , 4 , 0 , NF4)
30853085MAKE_kQuantizeBlockwise(float , 512 , 2 , 0 , NF4)
30863086MAKE_kQuantizeBlockwise(float , 256 , 2 , 0 , NF4)
30873087MAKE_kQuantizeBlockwise(float , 128 , 2 , 0 , NF4)
3088- // MAKE_kQuantizeBlockwise(float, 64, 2, 0, NF4)
3088+ MAKE_kQuantizeBlockwise(float , 64 , 2 , 0 , NF4)
30893089
30903090MAKE_kQuantizeBlockwise(hip_bfloat16, 4096 , 4 , 0 , General8bit)
30913091MAKE_kQuantizeBlockwise(hip_bfloat16, 4096 , 4 , 1 , General8bit)
@@ -3094,23 +3094,23 @@ MAKE_kQuantizeBlockwise(hip_bfloat16, 1024, 4, 0, General8bit)
30943094MAKE_kQuantizeBlockwise(hip_bfloat16, 512 , 2 , 0 , General8bit)
30953095MAKE_kQuantizeBlockwise(hip_bfloat16, 256 , 2 , 0 , General8bit)
30963096MAKE_kQuantizeBlockwise(hip_bfloat16, 128 , 2 , 0 , General8bit)
3097- // MAKE_kQuantizeBlockwise(hip_bfloat16, 64, 2, 0, General8bit)
3097+ MAKE_kQuantizeBlockwise(hip_bfloat16, 64 , 2 , 0 , General8bit)
30983098
30993099MAKE_kQuantizeBlockwise(hip_bfloat16, 4096 , 4 , 0 , FP4)
31003100MAKE_kQuantizeBlockwise(hip_bfloat16, 2048 , 4 , 0 , FP4)
31013101MAKE_kQuantizeBlockwise(hip_bfloat16, 1024 , 4 , 0 , FP4)
31023102MAKE_kQuantizeBlockwise(hip_bfloat16, 512 , 2 , 0 , FP4)
31033103MAKE_kQuantizeBlockwise(hip_bfloat16, 256 , 2 , 0 , FP4)
31043104MAKE_kQuantizeBlockwise(hip_bfloat16, 128 , 2 , 0 , FP4)
3105- // MAKE_kQuantizeBlockwise(hip_bfloat16, 64, 2, 0, FP4)
3105+ MAKE_kQuantizeBlockwise(hip_bfloat16, 64 , 2 , 0 , FP4)
31063106
31073107MAKE_kQuantizeBlockwise(hip_bfloat16, 4096 , 4 , 0 , NF4)
31083108MAKE_kQuantizeBlockwise(hip_bfloat16, 2048 , 4 , 0 , NF4)
31093109MAKE_kQuantizeBlockwise(hip_bfloat16, 1024 , 4 , 0 , NF4)
31103110MAKE_kQuantizeBlockwise(hip_bfloat16, 512 , 2 , 0 , NF4)
31113111MAKE_kQuantizeBlockwise(hip_bfloat16, 256 , 2 , 0 , NF4)
31123112MAKE_kQuantizeBlockwise(hip_bfloat16, 128 , 2 , 0 , NF4)
3113- // MAKE_kQuantizeBlockwise(hip_bfloat16, 64, 2, 0, NF4)
3113+ MAKE_kQuantizeBlockwise(hip_bfloat16, 64 , 2 , 0 , NF4)
31143114
31153115template __global__ void kDequantizeBlockwise<half, 512, 64, 8, FP4>(float *code, unsigned char * A, float * absmax, half *out, const int blocksize, const int n);
31163116template __global__ void kDequantizeBlockwise <half, 512 , 64 , 8 , General8bit>(float *code, unsigned char * A, float * absmax, half *out, const int blocksize, const int n);
0 commit comments