@@ -866,3 +866,41 @@ llvm.func @nvvm_dot_accumulate_4way(%a: vector<4xi8>, %b: vector<4xi8>, %c: i32)
866866 %3 = nvvm.dot.accumulate.4way %a <signed >, %b <signed >, %c: vector <4 xi8 >, vector <4 xi8 >
867867 llvm.return
868868}
869+
870+ // -----
871+ // CHECK-LABEL: @nvvm_dot_accumulate_2way
872+ llvm.func @nvvm_dot_accumulate_2way (%a: vector <2 xi16 >, %b: vector <4 xi8 >, %c: i32 ) {
873+ // CHECK: %[[a_cast:.*]] = bitcast <2 x i16> %{{.*}} to i32
874+ // CHECK: %[[b_cast:.*]] = bitcast <4 x i8> %{{.*}} to i32
875+ // CHECK: call i32 @llvm.nvvm.idp2a.u.u(i32 %[[a_cast]], i32 %[[b_cast]], i1 false, i32 %{{.*}})
876+ %0 = nvvm.dot.accumulate.2way %a <unsigned >, %b <unsigned >, %c {b_hi = false } : vector <2 xi16 >, vector <4 xi8 >
877+ // CHECK: %[[a_cast:.*]] = bitcast <2 x i16> %{{.*}} to i32
878+ // CHECK: %[[b_cast:.*]] = bitcast <4 x i8> %{{.*}} to i32
879+ // CHECK: call i32 @llvm.nvvm.idp2a.u.u(i32 %[[a_cast]], i32 %[[b_cast]], i1 true, i32 %{{.*}})
880+ %1 = nvvm.dot.accumulate.2way %a <unsigned >, %b <unsigned >, %c {b_hi = true }: vector <2 xi16 >, vector <4 xi8 >
881+ // CHECK: %[[a_cast:.*]] = bitcast <2 x i16> %{{.*}} to i32
882+ // CHECK: %[[b_cast:.*]] = bitcast <4 x i8> %{{.*}} to i32
883+ // CHECK: call i32 @llvm.nvvm.idp2a.s.u(i32 %[[a_cast]], i32 %[[b_cast]], i1 false, i32 %{{.*}})
884+ %2 = nvvm.dot.accumulate.2way %a <signed >, %b <unsigned >, %c {b_hi = false }: vector <2 xi16 >, vector <4 xi8 >
885+ // CHECK: %[[a_cast:.*]] = bitcast <2 x i16> %{{.*}} to i32
886+ // CHECK: %[[b_cast:.*]] = bitcast <4 x i8> %{{.*}} to i32
887+ // CHECK: call i32 @llvm.nvvm.idp2a.s.u(i32 %[[a_cast]], i32 %[[b_cast]], i1 true, i32 %{{.*}})
888+ %3 = nvvm.dot.accumulate.2way %a <signed >, %b <unsigned >, %c {b_hi = true }: vector <2 xi16 >, vector <4 xi8 >
889+ // CHECK: %[[a_cast:.*]] = bitcast <2 x i16> %{{.*}} to i32
890+ // CHECK: %[[b_cast:.*]] = bitcast <4 x i8> %{{.*}} to i32
891+ // CHECK: call i32 @llvm.nvvm.idp2a.u.s(i32 %[[a_cast]], i32 %[[b_cast]], i1 false, i32 %{{.*}})
892+ %4 = nvvm.dot.accumulate.2way %a <unsigned >, %b <signed >, %c {b_hi = false }: vector <2 xi16 >, vector <4 xi8 >
893+ // CHECK: %[[a_cast:.*]] = bitcast <2 x i16> %{{.*}} to i32
894+ // CHECK: %[[b_cast:.*]] = bitcast <4 x i8> %{{.*}} to i32
895+ // CHECK: call i32 @llvm.nvvm.idp2a.u.s(i32 %[[a_cast]], i32 %[[b_cast]], i1 true, i32 %{{.*}})
896+ %5 = nvvm.dot.accumulate.2way %a <unsigned >, %b <signed >, %c {b_hi = true }: vector <2 xi16 >, vector <4 xi8 >
897+ // CHECK: %[[a_cast:.*]] = bitcast <2 x i16> %{{.*}} to i32
898+ // CHECK: %[[b_cast:.*]] = bitcast <4 x i8> %{{.*}} to i32
899+ // CHECK: call i32 @llvm.nvvm.idp2a.s.s(i32 %[[a_cast]], i32 %[[b_cast]], i1 false, i32 %{{.*}})
900+ %6 = nvvm.dot.accumulate.2way %a <signed >, %b <signed >, %c {b_hi = false }: vector <2 xi16 >, vector <4 xi8 >
901+ // CHECK: %[[a_cast:.*]] = bitcast <2 x i16> %{{.*}} to i32
902+ // CHECK: %[[b_cast:.*]] = bitcast <4 x i8> %{{.*}} to i32
903+ // CHECK: call i32 @llvm.nvvm.idp2a.s.s(i32 %[[a_cast]], i32 %[[b_cast]], i1 true, i32 %{{.*}})
904+ %7 = nvvm.dot.accumulate.2way %a <signed >, %b <signed >, %c {b_hi = true }: vector <2 xi16 >, vector <4 xi8 >
905+ llvm.return
906+ }
0 commit comments