@@ -432,3 +432,46 @@ define void @xor_2x2(ptr %lhs, ptr %rhs, ptr %out) {
432432 store <4 x i32 > %optt , ptr %out
433433 ret void
434434}
435+
436+ define void @fabs_2x2f64 (ptr %in , ptr %out ) {
437+ ; CHECK-LABEL: @fabs_2x2f64(
438+ ; CHECK-NEXT: [[COL_LOAD:%.*]] = load <2 x double>, ptr [[IN:%.*]], align 32
439+ ; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr double, ptr [[IN]], i64 2
440+ ; CHECK-NEXT: [[COL_LOAD1:%.*]] = load <2 x double>, ptr [[VEC_GEP]], align 16
441+ ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[COL_LOAD]])
442+ ; CHECK-NEXT: [[TMP2:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[COL_LOAD1]])
443+ ; CHECK-NEXT: store <2 x double> [[TMP1]], ptr [[OUT:%.*]], align 32
444+ ; CHECK-NEXT: [[VEC_GEP2:%.*]] = getelementptr double, ptr [[OUT]], i64 2
445+ ; CHECK-NEXT: store <2 x double> [[TMP2]], ptr [[VEC_GEP2]], align 16
446+ ; CHECK-NEXT: ret void
447+ ;
448+ %load = load <4 x double >, ptr %in
449+ %fabs = call <4 x double > @llvm.fabs.v4f64 (<4 x double > %load )
450+ %fabst = call <4 x double > @llvm.matrix.transpose (<4 x double > %fabs , i32 2 , i32 2 )
451+ %fabstt = call <4 x double > @llvm.matrix.transpose (<4 x double > %fabst , i32 2 , i32 2 )
452+ store <4 x double > %fabstt , ptr %out
453+ ret void
454+ }
455+
456+ define void @fabs_2x2i32 (ptr %in , ptr %out ) {
457+ ; CHECK-LABEL: @fabs_2x2i32(
458+ ; CHECK-NEXT: [[COL_LOAD:%.*]] = load <2 x i32>, ptr [[IN:%.*]], align 16
459+ ; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr i32, ptr [[IN]], i64 2
460+ ; CHECK-NEXT: [[COL_LOAD1:%.*]] = load <2 x i32>, ptr [[VEC_GEP]], align 8
461+ ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.abs.v2i32(<2 x i32> [[COL_LOAD]], i1 false)
462+ ; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.abs.v2i32(<2 x i32> [[COL_LOAD1]], i1 false)
463+ ; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.abs.v2i32(<2 x i32> [[TMP1]], i1 true)
464+ ; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i32> @llvm.abs.v2i32(<2 x i32> [[TMP2]], i1 true)
465+ ; CHECK-NEXT: store <2 x i32> [[TMP3]], ptr [[OUT:%.*]], align 16
466+ ; CHECK-NEXT: [[VEC_GEP2:%.*]] = getelementptr i32, ptr [[OUT]], i64 2
467+ ; CHECK-NEXT: store <2 x i32> [[TMP4]], ptr [[VEC_GEP2]], align 8
468+ ; CHECK-NEXT: ret void
469+ ;
470+ %load = load <4 x i32 >, ptr %in
471+ %abs = call <4 x i32 > @llvm.abs.v4i32 (<4 x i32 > %load , i1 false )
472+ %abst = call <4 x i32 > @llvm.matrix.transpose (<4 x i32 > %abs , i32 2 , i32 2 )
473+ %abstt = call <4 x i32 > @llvm.matrix.transpose (<4 x i32 > %abst , i32 2 , i32 2 )
474+ %absabstt = call <4 x i32 > @llvm.abs.v4i32 (<4 x i32 > %abstt , i1 true )
475+ store <4 x i32 > %absabstt , ptr %out
476+ ret void
477+ }
0 commit comments