1+ ; RUN: opt -passes=lower-matrix-intrinsics -debug-only=lower-matrix-intrinsics -disable-output < %s 2>&1 | FileCheck %s --check-prefix=CHECK
2+ ; REQUIRES: asserts
3+
4+ define void @diag_3x3 (ptr %in , ptr %out ) {
5+ %inv = call <9 x float > @llvm.matrix.column.major.load (ptr %in , i64 3 , i1 false , i32 3 , i32 3 )
6+ %diag = shufflevector <9 x float > %inv , <9 x float > poison, <3 x i32 > <i32 0 , i32 4 , i32 8 >
7+ store <3 x float > %diag , ptr %out
8+ ret void
9+ }
10+ ; CHECK-LABEL: flattening a 3x3 matrix:
11+ ; CHECK-NEXT: %{{.*}} = call <9 x float> @llvm.matrix.column.major.load.v9f32.i64(ptr %{{.*}}, i64 3, i1 false, i32 3, i32 3)
12+ ; CHECK-NEXT: because we do not have a shape-aware lowering for its user:
13+ ; CHECK-NEXT: %{{.*}} = shufflevector <9 x float> %{{.*}}, <9 x float> poison, <3 x i32> <i32 0, i32 4, i32 8>
14+
15+ define void @reshape (ptr %in , ptr %out ) {
16+ entry:
17+ %0 = load <4 x double >, ptr %in , align 8
18+ %1 = tail call <4 x double > @llvm.matrix.transpose.v4f64 (<4 x double > %0 , i32 4 , i32 1 )
19+ %2 = tail call <4 x double > @llvm.matrix.transpose.v4f64 (<4 x double > %1 , i32 1 , i32 4 )
20+ %3 = tail call <4 x double > @llvm.matrix.transpose.v4f64 (<4 x double > %2 , i32 2 , i32 2 )
21+ %4 = tail call <4 x double > @llvm.matrix.transpose.v4f64 (<4 x double > %3 , i32 2 , i32 2 )
22+ %5 = tail call <4 x double > @llvm.matrix.transpose.v4f64 (<4 x double > %4 , i32 2 , i32 2 )
23+ store <4 x double > %5 , ptr %out , align 8
24+ ret void
25+ }
26+ ; CHECK-LABEL: matrix reshape from 4x1 to 2x2 using at least 2 shuffles on behalf of:
27+ ; CHECK-NEXT: %{{.*}} = load <4 x double>, ptr %{{.*}}, align 8
28+
29+ define void @multiply_ntt (ptr %A , ptr %B , ptr %C , ptr %R ) {
30+ entry:
31+ %a = load <6 x double >, ptr %A , align 16
32+ %b = load <6 x double >, ptr %B , align 16
33+ %c = load <8 x double >, ptr %C , align 16
34+ %b_t = call <6 x double > @llvm.matrix.transpose.v6f64.v6f64 (<6 x double > %b , i32 2 , i32 3 )
35+ %c_t = call <8 x double > @llvm.matrix.transpose.v8f64.v8f64 (<8 x double > %c , i32 4 , i32 2 )
36+ %m1 = call <12 x double > @llvm.matrix.multiply.v12f64.v6f64.v8f64 (<6 x double > %b_t , <8 x double > %c_t , i32 3 , i32 2 , i32 4 )
37+ %m2 = call <8 x double > @llvm.matrix.multiply.v8f64.v6f64.v12f64 (<6 x double > %a , <12 x double > %m1 , i32 2 , i32 3 , i32 4 )
38+ store <8 x double > %m2 , ptr %R , align 16
39+ ret void
40+ }
41+ ; CHECK-LABEL: flattening a 2x3 matrix:
42+ ; CHECK-NEXT: %{{.*}} = load <6 x double>, ptr %{{.*}}, align 16
43+ ; CHECK-NEXT: because we do not have a shape-aware lowering for its user:
44+ ; CHECK-NEXT: %{{.*}} = shufflevector <6 x double> %{{.*}}, <6 x double> poison, <2 x i32> <i32 4, i32 5>
45+
46+ ; CHECK-LABEL: flattening a 4x3 matrix:
47+ ; CHECK-NEXT: %{{.*}} = call <12 x double> @llvm.matrix.multiply.v12f64.v8f64.v6f64(<8 x double> %{{.*}}, <6 x double> %{{.*}}, i32 4, i32 2, i32 3)
48+ ; CHECK-NEXT: because we do not have a shape-aware lowering for its user:
49+ ; CHECK-NEXT: %{{.*}} = shufflevector <12 x double> %{{.*}}, <12 x double> poison, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
50+
51+
52+ define void @redundant_transpose_of_shuffle (<4 x float > %m , ptr %dst ) {
53+ entry:
54+ %shuffle = shufflevector <4 x float > %m , <4 x float > zeroinitializer , <4 x i32 > zeroinitializer
55+ %t = tail call <4 x float > @llvm.matrix.transpose.v3f32 (<4 x float > %shuffle , i32 1 , i32 4 )
56+ store <4 x float > %t , ptr %dst , align 4
57+ ret void
58+ }
59+
60+ ; CHECK-LABEL: splitting a 4x1 matrix with 1 shuffles beacuse we do not have a shape-aware lowering for its def:
61+ ; CHECK-NEXT: %{{.*}} = shufflevector <4 x float> %{{.*}}, <4 x float> zeroinitializer, <4 x i32> zeroinitializer
0 commit comments