@@ -421,8 +421,23 @@ struct RawBufferOpLowering : public ConvertOpToLLVMPattern<GpuOp> {
421421
422422// TODO: AMDGPU backend already have all this bitpacking logic, we should move
423423// it to some common place.
424+ // / \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows:
425+ // / \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9)
426+ // / \p Vmcnt = \p Waitcnt[15:14,3:0] (gfx9,10)
427+ // / \p Vmcnt = \p Waitcnt[15:10] (gfx11)
428+ // / \p Expcnt = \p Waitcnt[6:4] (pre-gfx11)
429+ // / \p Expcnt = \p Waitcnt[2:0] (gfx11)
430+ // / \p Lgkmcnt = \p Waitcnt[11:8] (pre-gfx10)
431+ // / \p Lgkmcnt = \p Waitcnt[13:8] (gfx10)
432+ // / \p Lgkmcnt = \p Waitcnt[9:4] (gfx11)
424433static FailureOr<unsigned > encodeWaitcnt (Chipset chipset, unsigned vmcnt,
425434 unsigned expcnt, unsigned lgkmcnt) {
435+ if (chipset.majorVersion < 9 ) {
436+ vmcnt = std::min (15u , vmcnt);
437+ expcnt = std::min (7u , expcnt);
438+ lgkmcnt = std::min (15u , lgkmcnt);
439+ return vmcnt | (expcnt << 4 ) | (lgkmcnt << 8 );
440+ }
426441 if (chipset.majorVersion == 9 ) {
427442 vmcnt = std::min (63u , vmcnt);
428443 expcnt = std::min (7u , expcnt);
@@ -432,6 +447,21 @@ static FailureOr<unsigned> encodeWaitcnt(Chipset chipset, unsigned vmcnt,
432447 unsigned otherCnts = (expcnt << 4 ) | (lgkmcnt << 8 );
433448 return lowBits | highBits | otherCnts;
434449 }
450+ if (chipset.majorVersion == 10 ) {
451+ vmcnt = std::min (63u , vmcnt);
452+ expcnt = std::min (7u , expcnt);
453+ lgkmcnt = std::min (63u , lgkmcnt);
454+ unsigned lowBits = vmcnt & 0xF ;
455+ unsigned highBits = (vmcnt >> 4 ) << 14 ;
456+ unsigned otherCnts = (expcnt << 4 ) | (lgkmcnt << 8 );
457+ return lowBits | highBits | otherCnts;
458+ }
459+ if (chipset.majorVersion == 11 ) {
460+ vmcnt = std::min (63u , vmcnt);
461+ expcnt = std::min (7u , expcnt);
462+ lgkmcnt = std::min (63u , lgkmcnt);
463+ return (vmcnt << 10 ) | expcnt | (lgkmcnt << 4 );
464+ }
435465 return failure ();
436466}
437467
0 commit comments