@@ -632,7 +632,7 @@ Syntax:
632632
633633.. code-block :: llvm
634634
635- declare i32 @llvm.nvvm.prmt(i32 %a , i32 %b , i32 %c )
635+ declare i32 @llvm.nvvm.prmt(i32 %lo , i32 %hi , i32 %selector )
636636
637637 Overview:
638638"""""""""
@@ -644,7 +644,7 @@ Semantics:
644644""""""""""
645645
646646The bytes in the first two source operands are numbered from 0 to 7:
647- {%b , %a } = {{b7, b6, b5, b4}, {b3, b2, b1, b0}}. For each byte in the target
647+ {%hi , %lo } = {{b7, b6, b5, b4}, {b3, b2, b1, b0}}. For each byte in the target
648648register, a 4-bit selection value is defined.
649649
650650The 3 lsbs of the selection value specify which of the 8 source bytes should be
@@ -653,7 +653,7 @@ copied, or if the sign (msb of the byte) should be replicated over all 8 bits
653653of the target position (sign extend of the byte value); msb=0 means copy the
654654literal value; msb=1 means replicate the sign.
655655
656- These 4-bit selection values are pulled from the lower 16-bits of the third
656+ These 4-bit selection values are pulled from the lower 16-bits of the %selector
657657operand, with the least significant selection value corresponding to the least
658658significant byte of the destination.
659659
@@ -666,13 +666,13 @@ Syntax:
666666
667667.. code-block :: llvm
668668
669- declare i32 @llvm.nvvm.prmt.f4e(i32 %a , i32 %b , i32 %c )
670- declare i32 @llvm.nvvm.prmt.b4e(i32 %a , i32 %b , i32 %c )
669+ declare i32 @llvm.nvvm.prmt.f4e(i32 %lo , i32 %hi , i32 %selector )
670+ declare i32 @llvm.nvvm.prmt.b4e(i32 %lo , i32 %hi , i32 %selector )
671671
672- declare i32 @llvm.nvvm.prmt.rc8(i32 %a , i32 %c )
673- declare i32 @llvm.nvvm.prmt.ecl(i32 %a , i32 %c )
674- declare i32 @llvm.nvvm.prmt.ecr(i32 %a , i32 %c )
675- declare i32 @llvm.nvvm.prmt.rc16(i32 %a , i32 %c )
672+ declare i32 @llvm.nvvm.prmt.rc8(i32 %lo , i32 %selector )
673+ declare i32 @llvm.nvvm.prmt.ecl(i32 %lo , i32 %selector )
674+ declare i32 @llvm.nvvm.prmt.ecr(i32 %lo , i32 %selector )
675+ declare i32 @llvm.nvvm.prmt.rc16(i32 %lo , i32 %selector )
676676
677677 Overview:
678678"""""""""
@@ -685,64 +685,64 @@ Semantics:
685685""""""""""
686686
687687As with the generic '``llvm.nvvm.prmt ``' intrinsic, the bytes in the first one
688- or two source operands are numbered. The first source operand (%a ) is numbered
688+ or two source operands are numbered. The first source operand (%lo ) is numbered
689689{b3, b2, b1, b0}, in the case of the '``f4e ``' and '``b4e ``' variants, the
690- second source operand (%b ) is numbered {b7, b6, b5, b4}.
691-
692- Depending on the 2 least significant bits of the final operand, the result of
693- the permutation is defined as follows:
694-
695- +------------+---------+--------------+
696- | Mode | %c [1:0] | Output |
697- +------------+---------+--------------+
698- | '``f4e ``' | 0 | {3, 2, 1, 0} |
699- | +---------+--------------+
700- | | 1 | {4, 3, 2, 1} |
701- | +---------+--------------+
702- | | 2 | {5, 4, 3, 2} |
703- | +---------+--------------+
704- | | 3 | {6, 5, 4, 3} |
705- +------------+---------+--------------+
706- | '``b4e ``' | 0 | {5, 6, 7, 0} |
707- | +---------+--------------+
708- | | 1 | {6, 7, 0, 1} |
709- | +---------+--------------+
710- | | 2 | {7, 0, 1, 2} |
711- | +---------+--------------+
712- | | 3 | {0, 1, 2, 3} |
713- +------------+---------+--------------+
714- | '``rc8 ``' | 0 | {0, 0, 0, 0} |
715- | +---------+--------------+
716- | | 1 | {1, 1, 1, 1} |
717- | +---------+--------------+
718- | | 2 | {2, 2, 2, 2} |
719- | +---------+--------------+
720- | | 3 | {3, 3, 3, 3} |
721- +------------+---------+--------------+
722- | '``ecl ``' | 0 | {3, 2, 1, 0} |
723- | +---------+--------------+
724- | | 1 | {3, 2, 1, 1} |
725- | +---------+--------------+
726- | | 2 | {3, 2, 2, 2} |
727- | +---------+--------------+
728- | | 3 | {3, 3, 3, 3} |
729- +------------+---------+--------------+
730- | '``ecr ``' | 0 | {0, 0, 0, 0} |
731- | +---------+--------------+
732- | | 1 | {1, 1, 1, 0} |
733- | +---------+--------------+
734- | | 2 | {2, 2, 1, 0} |
735- | +---------+--------------+
736- | | 3 | {3, 2, 1, 0} |
737- +------------+---------+--------------+
738- | '``rc16 ``' | 0 | {1, 0, 1, 0} |
739- | +---------+--------------+
740- | | 1 | {3, 2, 3, 2} |
741- | +---------+--------------+
742- | | 2 | {1, 0, 1, 0} |
743- | +---------+--------------+
744- | | 3 | {3, 2, 3, 2} |
745- +------------+---------+--------------+
690+ second source operand (%hi ) is numbered {b7, b6, b5, b4}.
691+
692+ Depending on the 2 least significant bits of the %selector operand, the result
693+ of the permutation is defined as follows:
694+
695+ +------------+---------------- +--------------+
696+ | Mode | %selector [1:0] | Output |
697+ +------------+---------------- +--------------+
698+ | '``f4e ``' | 0 | {3, 2, 1, 0} |
699+ | +---------------- +--------------+
700+ | | 1 | {4, 3, 2, 1} |
701+ | +---------------- +--------------+
702+ | | 2 | {5, 4, 3, 2} |
703+ | +---------------- +--------------+
704+ | | 3 | {6, 5, 4, 3} |
705+ +------------+---------------- +--------------+
706+ | '``b4e ``' | 0 | {5, 6, 7, 0} |
707+ | +---------------- +--------------+
708+ | | 1 | {6, 7, 0, 1} |
709+ | +---------------- +--------------+
710+ | | 2 | {7, 0, 1, 2} |
711+ | +---------------- +--------------+
712+ | | 3 | {0, 1, 2, 3} |
713+ +------------+---------------- +--------------+
714+ | '``rc8 ``' | 0 | {0, 0, 0, 0} |
715+ | +---------------- +--------------+
716+ | | 1 | {1, 1, 1, 1} |
717+ | +---------------- +--------------+
718+ | | 2 | {2, 2, 2, 2} |
719+ | +---------------- +--------------+
720+ | | 3 | {3, 3, 3, 3} |
721+ +------------+---------------- +--------------+
722+ | '``ecl ``' | 0 | {3, 2, 1, 0} |
723+ | +---------------- +--------------+
724+ | | 1 | {3, 2, 1, 1} |
725+ | +---------------- +--------------+
726+ | | 2 | {3, 2, 2, 2} |
727+ | +---------------- +--------------+
728+ | | 3 | {3, 3, 3, 3} |
729+ +------------+---------------- +--------------+
730+ | '``ecr ``' | 0 | {0, 0, 0, 0} |
731+ | +---------------- +--------------+
732+ | | 1 | {1, 1, 1, 0} |
733+ | +---------------- +--------------+
734+ | | 2 | {2, 2, 1, 0} |
735+ | +---------------- +--------------+
736+ | | 3 | {3, 2, 1, 0} |
737+ +------------+---------------- +--------------+
738+ | '``rc16 ``' | 0 | {1, 0, 1, 0} |
739+ | +---------------- +--------------+
740+ | | 1 | {3, 2, 3, 2} |
741+ | +---------------- +--------------+
742+ | | 2 | {1, 0, 1, 0} |
743+ | +---------------- +--------------+
744+ | | 3 | {3, 2, 3, 2} |
745+ +------------+---------------- +--------------+
746746
747747TMA family of Intrinsics
748748------------------------
0 commit comments