1- ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2- ; RUN: opt -passes='lower-matrix-intrinsics' -S < %s | FileCheck %s
3- ; RUN: opt -passes=lower-matrix-intrinsics -debug-only=lower-matrix-intrinsics -disable-output < %s 2>&1 | FileCheck %s --check-prefix=PASS-DEBUG
1+ ; RUN: opt -passes=lower-matrix-intrinsics -debug-only=lower-matrix-intrinsics -disable-output < %s 2>&1 | FileCheck %s --check-prefix=CHECK
42; REQUIRES: asserts
53
64define void @diag_3x3 (ptr %in , ptr %out ) {
7- ; CHECK-LABEL: @diag_3x3(
8- ; CHECK-NEXT: [[COL_LOAD:%.*]] = load <3 x float>, ptr [[IN:%.*]], align 4
9- ; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr float, ptr [[IN]], i64 3
10- ; CHECK-NEXT: [[COL_LOAD1:%.*]] = load <3 x float>, ptr [[VEC_GEP]], align 4
11- ; CHECK-NEXT: [[VEC_GEP2:%.*]] = getelementptr float, ptr [[IN]], i64 6
12- ; CHECK-NEXT: [[COL_LOAD3:%.*]] = load <3 x float>, ptr [[VEC_GEP2]], align 4
13- ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x float> [[COL_LOAD]], <3 x float> [[COL_LOAD1]], <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
14- ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <3 x float> [[COL_LOAD3]], <3 x float> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 poison, i32 poison, i32 poison>
15- ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <6 x float> [[TMP1]], <6 x float> [[TMP2]], <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
16- ; CHECK-NEXT: [[DIAG:%.*]] = shufflevector <9 x float> [[TMP3]], <9 x float> poison, <3 x i32> <i32 0, i32 4, i32 8>
17- ; CHECK-NEXT: store <3 x float> [[DIAG]], ptr [[OUT:%.*]], align 16
18- ; CHECK-NEXT: ret void
19- ;
205 %inv = call <9 x float > @llvm.matrix.column.major.load (ptr %in , i64 3 , i1 false , i32 3 , i32 3 )
216 %diag = shufflevector <9 x float > %inv , <9 x float > poison, <3 x i32 > <i32 0 , i32 4 , i32 8 >
227 store <3 x float > %diag , ptr %out
238 ret void
249}
25- ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
26- ; PASS-DEBUG: flattening a 3x3 matrix %{{.*}} = call <9 x float> @llvm.matrix.column.major.load.v9f32.i64(ptr %{{.*}}, i64 3, i1 false, i32 3, i32 3) because we do not have a shape-aware lowering for its user: %{{.*}} = shufflevector <9 x float> %{{.*}}, <9 x float> poison, <3 x i32> <i32 0, i32 4, i32 8>
10+ ; CHECK-LABEL: flattening a 3x3 matrix:
11+ ; CHECK-NEXT: %{{.*}} = call <9 x float> @llvm.matrix.column.major.load.v9f32.i64(ptr %{{.*}}, i64 3, i1 false, i32 3, i32 3)
12+ ; CHECK-NEXT: because we do not have a shape-aware lowering for its user:
13+ ; CHECK-NEXT: %{{.*}} = shufflevector <9 x float> %{{.*}}, <9 x float> poison, <3 x i32> <i32 0, i32 4, i32 8>
14+
15+ define void @reshape (ptr %in , ptr %out ) {
16+ entry:
17+ %0 = load <4 x double >, ptr %in , align 8
18+ %1 = tail call <4 x double > @llvm.matrix.transpose.v4f64 (<4 x double > %0 , i32 4 , i32 1 )
19+ %2 = tail call <4 x double > @llvm.matrix.transpose.v4f64 (<4 x double > %1 , i32 1 , i32 4 )
20+ %3 = tail call <4 x double > @llvm.matrix.transpose.v4f64 (<4 x double > %2 , i32 2 , i32 2 )
21+ %4 = tail call <4 x double > @llvm.matrix.transpose.v4f64 (<4 x double > %3 , i32 2 , i32 2 )
22+ %5 = tail call <4 x double > @llvm.matrix.transpose.v4f64 (<4 x double > %4 , i32 2 , i32 2 )
23+ store <4 x double > %5 , ptr %out , align 8
24+ ret void
25+ }
26+ ; CHECK-LABEL: matrix reshape from 4x1 to 2x2 using at least 2 shuffles on behalf of:
27+ ; CHECK-NEXT: %{{.*}} = load <4 x double>, ptr %{{.*}}, align 8
28+
29+ define void @multiply_ntt (ptr %A , ptr %B , ptr %C , ptr %R ) {
30+ entry:
31+ %a = load <6 x double >, ptr %A , align 16
32+ %b = load <6 x double >, ptr %B , align 16
33+ %c = load <8 x double >, ptr %C , align 16
34+ %b_t = call <6 x double > @llvm.matrix.transpose.v6f64.v6f64 (<6 x double > %b , i32 2 , i32 3 )
35+ %c_t = call <8 x double > @llvm.matrix.transpose.v8f64.v8f64 (<8 x double > %c , i32 4 , i32 2 )
36+ %m1 = call <12 x double > @llvm.matrix.multiply.v12f64.v6f64.v8f64 (<6 x double > %b_t , <8 x double > %c_t , i32 3 , i32 2 , i32 4 )
37+ %m2 = call <8 x double > @llvm.matrix.multiply.v8f64.v6f64.v12f64 (<6 x double > %a , <12 x double > %m1 , i32 2 , i32 3 , i32 4 )
38+ store <8 x double > %m2 , ptr %R , align 16
39+ ret void
40+ }
41+ ; CHECK-LABEL: flattening a 2x3 matrix:
42+ ; CHECK-NEXT: %{{.*}} = load <6 x double>, ptr %{{.*}}, align 16
43+ ; CHECK-NEXT: because we do not have a shape-aware lowering for its user:
44+ ; CHECK-NEXT: %{{.*}} = shufflevector <6 x double> %{{.*}}, <6 x double> poison, <2 x i32> <i32 4, i32 5>
45+
46+ ; CHECK: flattening a 4x3 matrix:
47+ ; CHECK-NEXT: %{{.*}} = call <12 x double> @llvm.matrix.multiply.v12f64.v8f64.v6f64(<8 x double> %{{.*}}, <6 x double> %{{.*}}, i32 4, i32 2, i32 3)
48+ ; CHECK-NEXT: because we do not have a shape-aware lowering for its user:
49+ ; CHECK-NEXT: %{{.*}} = shufflevector <12 x double> %{{.*}}, <12 x double> poison, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
0 commit comments