@@ -405,6 +405,13 @@ void test_wave_reduce_add_u64_default(global int* out, long in)
405405 * out = __builtin_amdgcn_wave_reduce_add_u64 (in , 0 );
406406}
407407
408+ // CHECK-LABEL: @test_wave_reduce_fadd_f32_default
409+ // CHECK: {{.*}}call{{.*}} float @llvm.amdgcn.wave.reduce.fadd.f32(
410+ void test_wave_reduce_fadd_f32_default (global float * out , float in )
411+ {
412+ * out = __builtin_amdgcn_wave_reduce_fadd_f32 (in , 0 );
413+ }
414+
408415// CHECK-LABEL: @test_wave_reduce_add_u32_iterative
409416// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.add.i32(
410417void test_wave_reduce_add_u32_iterative (global int * out , int in )
@@ -419,6 +426,13 @@ void test_wave_reduce_add_u64_iterative(global int* out, long in)
419426 * out = __builtin_amdgcn_wave_reduce_add_u64 (in , 1 );
420427}
421428
429+ // CHECK-LABEL: @test_wave_reduce_fadd_f32_iterative
430+ // CHECK: {{.*}}call{{.*}} float @llvm.amdgcn.wave.reduce.fadd.f32(
431+ void test_wave_reduce_fadd_f32_iterative (global float * out , float in )
432+ {
433+ * out = __builtin_amdgcn_wave_reduce_fadd_f32 (in , 0 );
434+ }
435+
422436// CHECK-LABEL: @test_wave_reduce_add_u32_dpp
423437// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.add.i32(
424438void test_wave_reduce_add_u32_dpp (global int * out , int in )
@@ -433,6 +447,13 @@ void test_wave_reduce_add_u64_dpp(global int* out, long in)
433447 * out = __builtin_amdgcn_wave_reduce_add_u64 (in , 2 );
434448}
435449
450+ // CHECK-LABEL: @test_wave_reduce_fadd_f32_dpp
451+ // CHECK: {{.*}}call{{.*}} float @llvm.amdgcn.wave.reduce.fadd.f32(
452+ void test_wave_reduce_fadd_f32_dpp (global float * out , float in )
453+ {
454+ * out = __builtin_amdgcn_wave_reduce_fadd_f32 (in , 0 );
455+ }
456+
436457// CHECK-LABEL: @test_wave_reduce_sub_u32_default
437458// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.sub.i32(
438459void test_wave_reduce_sub_u32_default (global int * out , int in )
@@ -447,6 +468,13 @@ void test_wave_reduce_sub_u64_default(global int* out, long in)
447468 * out = __builtin_amdgcn_wave_reduce_sub_u64 (in , 0 );
448469}
449470
471+ // CHECK-LABEL: @test_wave_reduce_fsub_f32_default
472+ // CHECK: {{.*}}call{{.*}} float @llvm.amdgcn.wave.reduce.fsub.f32(
473+ void test_wave_reduce_fsub_f32_default (global float * out , float in )
474+ {
475+ * out = __builtin_amdgcn_wave_reduce_fsub_f32 (in , 0 );
476+ }
477+
450478// CHECK-LABEL: @test_wave_reduce_sub_u32_iterative
451479// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.sub.i32(
452480void test_wave_reduce_sub_u32_iterative (global int * out , int in )
@@ -461,6 +489,13 @@ void test_wave_reduce_sub_u64_iterative(global int* out, long in)
461489 * out = __builtin_amdgcn_wave_reduce_sub_u64 (in , 1 );
462490}
463491
492+ // CHECK-LABEL: @test_wave_reduce_fsub_f32_iterative
493+ // CHECK: {{.*}}call{{.*}} float @llvm.amdgcn.wave.reduce.fsub.f32(
494+ void test_wave_reduce_fsub_f32_iterative (global float * out , float in )
495+ {
496+ * out = __builtin_amdgcn_wave_reduce_fsub_f32 (in , 0 );
497+ }
498+
464499// CHECK-LABEL: @test_wave_reduce_sub_u32_dpp
465500// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.sub.i32(
466501void test_wave_reduce_sub_u32_dpp (global int * out , int in )
@@ -475,6 +510,13 @@ void test_wave_reduce_sub_u64_dpp(global int* out, long in)
475510 * out = __builtin_amdgcn_wave_reduce_sub_u64 (in , 2 );
476511}
477512
513+ // CHECK-LABEL: @test_wave_reduce_fsub_f32_dpp
514+ // CHECK: {{.*}}call{{.*}} float @llvm.amdgcn.wave.reduce.fsub.f32(
515+ void test_wave_reduce_fsub_f32_dpp (global float * out , float in )
516+ {
517+ * out = __builtin_amdgcn_wave_reduce_fsub_f32 (in , 0 );
518+ }
519+
478520// CHECK-LABEL: @test_wave_reduce_and_b32_default
479521// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.and.i32(
480522void test_wave_reduce_and_b32_default (global int * out , int in )
@@ -615,6 +657,13 @@ void test_wave_reduce_min_i64_default(global int* out, long in)
615657 * out = __builtin_amdgcn_wave_reduce_min_i64 (in , 0 );
616658}
617659
660+ // CHECK-LABEL: @test_wave_reduce_fmin_f32_default
661+ // CHECK: {{.*}}call{{.*}} float @llvm.amdgcn.wave.reduce.fmin.f32(
662+ void test_wave_reduce_fmin_f32_default (global float * out , float in )
663+ {
664+ * out = __builtin_amdgcn_wave_reduce_fmin_f32 (in , 0 );
665+ }
666+
618667// CHECK-LABEL: @test_wave_reduce_min_i32_iterative
619668// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.min.i32(
620669void test_wave_reduce_min_i32_iterative (global int * out , int in )
@@ -629,6 +678,13 @@ void test_wave_reduce_min_i64_iterative(global int* out, long in)
629678 * out = __builtin_amdgcn_wave_reduce_min_i64 (in , 1 );
630679}
631680
681+ // CHECK-LABEL: @test_wave_reduce_fmin_f32_iterative
682+ // CHECK: {{.*}}call{{.*}} float @llvm.amdgcn.wave.reduce.fmin.f32(
683+ void test_wave_reduce_fmin_f32_iterative (global float * out , float in )
684+ {
685+ * out = __builtin_amdgcn_wave_reduce_fmin_f32 (in , 0 );
686+ }
687+
632688// CHECK-LABEL: @test_wave_reduce_min_i32_dpp
633689// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.min.i32(
634690void test_wave_reduce_min_i32_dpp (global int * out , int in )
@@ -643,6 +699,13 @@ void test_wave_reduce_min_i64_dpp(global int* out, long in)
643699 * out = __builtin_amdgcn_wave_reduce_min_i64 (in , 2 );
644700}
645701
702+ // CHECK-LABEL: @test_wave_reduce_fmin_f32_dpp
703+ // CHECK: {{.*}}call{{.*}} float @llvm.amdgcn.wave.reduce.fmin.f32(
704+ void test_wave_reduce_fmin_f32_dpp (global float * out , float in )
705+ {
706+ * out = __builtin_amdgcn_wave_reduce_fmin_f32 (in , 0 );
707+ }
708+
646709// CHECK-LABEL: @test_wave_reduce_min_u32_default
647710// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.umin.i32(
648711void test_wave_reduce_min_u32_default (global int * out , int in )
@@ -699,6 +762,13 @@ void test_wave_reduce_max_i64_default(global int* out, long in)
699762 * out = __builtin_amdgcn_wave_reduce_max_i64 (in , 0 );
700763}
701764
765+ // CHECK-LABEL: @test_wave_reduce_fmax_f32_default
766+ // CHECK: {{.*}}call{{.*}} float @llvm.amdgcn.wave.reduce.fmax.f32(
767+ void test_wave_reduce_fmax_f32_default (global float * out , float in )
768+ {
769+ * out = __builtin_amdgcn_wave_reduce_fmax_f32 (in , 0 );
770+ }
771+
702772// CHECK-LABEL: @test_wave_reduce_max_i32_iterative
703773// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.max.i32(
704774void test_wave_reduce_max_i32_iterative (global int * out , int in )
@@ -713,6 +783,13 @@ void test_wave_reduce_max_i64_iterative(global int* out, long in)
713783 * out = __builtin_amdgcn_wave_reduce_max_i64 (in , 1 );
714784}
715785
786+ // CHECK-LABEL: @test_wave_reduce_fmax_f32_iterative
787+ // CHECK: {{.*}}call{{.*}} float @llvm.amdgcn.wave.reduce.fmax.f32(
788+ void test_wave_reduce_fmax_f32_iterative (global float * out , float in )
789+ {
790+ * out = __builtin_amdgcn_wave_reduce_fmax_f32 (in , 0 );
791+ }
792+
716793// CHECK-LABEL: @test_wave_reduce_max_i32_dpp
717794// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.max.i32(
718795void test_wave_reduce_max_i32_dpp (global int * out , int in )
@@ -727,6 +804,13 @@ void test_wave_reduce_max_i64_dpp(global int* out, long in)
727804 * out = __builtin_amdgcn_wave_reduce_max_i64 (in , 2 );
728805}
729806
807+ // CHECK-LABEL: @test_wave_reduce_fmax_f32_dpp
808+ // CHECK: {{.*}}call{{.*}} float @llvm.amdgcn.wave.reduce.fmax.f32(
809+ void test_wave_reduce_fmax_f32_dpp (global float * out , float in )
810+ {
811+ * out = __builtin_amdgcn_wave_reduce_fmax_f32 (in , 0 );
812+ }
813+
730814// CHECK-LABEL: @test_wave_reduce_max_u32_default
731815// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.umax.i32(
732816void test_wave_reduce_max_u32_default (global int * out , int in )
0 commit comments