Skip to content

Conversation

@harrisonGPU
Copy link
Contributor

Add a test for image sample with multiple fptrunc to half in support of #141758

@llvmbot llvmbot added backend:AMDGPU llvm:instcombine Covers the InstCombine, InstSimplify and AggressiveInstCombine passes llvm:transforms labels May 28, 2025
@llvmbot
Copy link
Member

llvmbot commented May 28, 2025

@llvm/pr-subscribers-backend-amdgpu

Author: Harrison Hao (harrisonGPU)

Changes

Add a test for image sample with multiple fptrunc to half in support of #141758


Patch is 35.94 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/141771.diff

1 Files Affected:

  • (modified) llvm/test/Transforms/InstCombine/AMDGPU/image-d16.ll (+427-2)
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/image-d16.ll b/llvm/test/Transforms/InstCombine/AMDGPU/image-d16.ll
index 30431ad724843..a3624986efbd9 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/image-d16.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/image-d16.ll
@@ -1,8 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -mtriple=amdgcn--amdpal -mcpu=gfx700 -S -passes=instcombine %s | FileCheck --check-prefixes=GFX7 %s
 ; RUN: opt -mtriple=amdgcn--amdpal -mcpu=gfx810 -S -passes=instcombine %s | FileCheck --check-prefixes=GFX81PLUS %s
-; RUN: opt -mtriple=amdgcn--amdpal -mcpu=gfx900 -S -passes=instcombine %s | FileCheck --check-prefixes=GFX81PLUS %s
-; RUN: opt -mtriple=amdgcn--amdpal -mcpu=gfx1010 -S -passes=instcombine %s | FileCheck --check-prefixes=GFX81PLUS %s
+; RUN: opt -mtriple=amdgcn--amdpal -mcpu=gfx900 -S -passes=instcombine %s | FileCheck --check-prefixes=GFX9 %s
+; RUN: opt -mtriple=amdgcn--amdpal -mcpu=gfx1010 -S -passes=instcombine %s | FileCheck --check-prefixes=GFX10PLUS %s
+; RUN: opt -mtriple=amdgcn--amdpal -mcpu=gfx1100 -S -passes=instcombine %s | FileCheck --check-prefixes=GFX11 %s
 
 define amdgpu_ps half @image_sample_2d_fptrunc_to_d16(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
 ; GFX7-LABEL: @image_sample_2d_fptrunc_to_d16(
@@ -16,6 +17,21 @@ define amdgpu_ps half @image_sample_2d_fptrunc_to_d16(<8 x i32> inreg %rsrc, <4
 ; GFX81PLUS-NEXT:    [[TEX:%.*]] = call half @llvm.amdgcn.image.sample.lz.2d.f16.f32.v8i32.v4i32(i32 1, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; GFX81PLUS-NEXT:    ret half [[TEX]]
 ;
+; GFX9-LABEL: @image_sample_2d_fptrunc_to_d16(
+; GFX9-NEXT:  main_body:
+; GFX9-NEXT:    [[TEX:%.*]] = call half @llvm.amdgcn.image.sample.lz.2d.f16.f32.v8i32.v4i32(i32 1, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX9-NEXT:    ret half [[TEX]]
+;
+; GFX10PLUS-LABEL: @image_sample_2d_fptrunc_to_d16(
+; GFX10PLUS-NEXT:  main_body:
+; GFX10PLUS-NEXT:    [[TEX:%.*]] = call half @llvm.amdgcn.image.sample.lz.2d.f16.f32.v8i32.v4i32(i32 1, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX10PLUS-NEXT:    ret half [[TEX]]
+;
+; GFX11-LABEL: @image_sample_2d_fptrunc_to_d16(
+; GFX11-NEXT:  main_body:
+; GFX11-NEXT:    [[TEX:%.*]] = call half @llvm.amdgcn.image.sample.lz.2d.f16.f32.v8i32.v4i32(i32 1, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX11-NEXT:    ret half [[TEX]]
+;
 main_body:
   %tex = call float @llvm.amdgcn.image.sample.lz.2d.f32.f32.v8i32.v4i32(i32 1, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
   %tex_half = fptrunc float %tex to half
@@ -40,6 +56,30 @@ define amdgpu_ps half @image_sample_2d_v2f32(<8 x i32> inreg %rsrc, <4 x i32> in
 ; GFX81PLUS-NEXT:    [[ADDF_SUM_0:%.*]] = fadd half [[TEX_HALF_0]], [[TEX_HALF_1]]
 ; GFX81PLUS-NEXT:    ret half [[ADDF_SUM_0]]
 ;
+; GFX9-LABEL: @image_sample_2d_v2f32(
+; GFX9-NEXT:  main_body:
+; GFX9-NEXT:    [[TEX:%.*]] = call <2 x half> @llvm.amdgcn.image.sample.lz.2d.v2f16.f32.v8i32.v4i32(i32 3, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX9-NEXT:    [[TEX_HALF_0:%.*]] = extractelement <2 x half> [[TEX]], i64 0
+; GFX9-NEXT:    [[TEX_HALF_1:%.*]] = extractelement <2 x half> [[TEX]], i64 1
+; GFX9-NEXT:    [[ADDF_SUM_0:%.*]] = fadd half [[TEX_HALF_0]], [[TEX_HALF_1]]
+; GFX9-NEXT:    ret half [[ADDF_SUM_0]]
+;
+; GFX10PLUS-LABEL: @image_sample_2d_v2f32(
+; GFX10PLUS-NEXT:  main_body:
+; GFX10PLUS-NEXT:    [[TEX:%.*]] = call <2 x half> @llvm.amdgcn.image.sample.lz.2d.v2f16.f32.v8i32.v4i32(i32 3, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX10PLUS-NEXT:    [[TEX_HALF_0:%.*]] = extractelement <2 x half> [[TEX]], i64 0
+; GFX10PLUS-NEXT:    [[TEX_HALF_1:%.*]] = extractelement <2 x half> [[TEX]], i64 1
+; GFX10PLUS-NEXT:    [[ADDF_SUM_0:%.*]] = fadd half [[TEX_HALF_0]], [[TEX_HALF_1]]
+; GFX10PLUS-NEXT:    ret half [[ADDF_SUM_0]]
+;
+; GFX11-LABEL: @image_sample_2d_v2f32(
+; GFX11-NEXT:  main_body:
+; GFX11-NEXT:    [[TEX:%.*]] = call <2 x half> @llvm.amdgcn.image.sample.lz.2d.v2f16.f32.v8i32.v4i32(i32 3, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX11-NEXT:    [[TEX_HALF_0:%.*]] = extractelement <2 x half> [[TEX]], i64 0
+; GFX11-NEXT:    [[TEX_HALF_1:%.*]] = extractelement <2 x half> [[TEX]], i64 1
+; GFX11-NEXT:    [[ADDF_SUM_0:%.*]] = fadd half [[TEX_HALF_0]], [[TEX_HALF_1]]
+; GFX11-NEXT:    ret half [[ADDF_SUM_0]]
+;
 main_body:
   %tex = call <2 x float> @llvm.amdgcn.image.sample.lz.2d.v2f32.f32.v8i32.v4i32(i32 3, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
   %tex_2_half = fptrunc <2 x float> %tex to <2 x half>
@@ -71,6 +111,36 @@ define amdgpu_ps half @image_sample_2d_v3f32(<8 x i32> inreg %rsrc, <4 x i32> in
 ; GFX81PLUS-NEXT:    [[ADDF_SUM_1:%.*]] = fadd half [[ADDF_SUM_0]], [[TEX_HALF_2]]
 ; GFX81PLUS-NEXT:    ret half [[ADDF_SUM_1]]
 ;
+; GFX9-LABEL: @image_sample_2d_v3f32(
+; GFX9-NEXT:  main_body:
+; GFX9-NEXT:    [[TEX:%.*]] = call <3 x half> @llvm.amdgcn.image.sample.lz.2d.v3f16.f32.v8i32.v4i32(i32 7, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX9-NEXT:    [[TEX_HALF_0:%.*]] = extractelement <3 x half> [[TEX]], i64 0
+; GFX9-NEXT:    [[TEX_HALF_1:%.*]] = extractelement <3 x half> [[TEX]], i64 1
+; GFX9-NEXT:    [[TEX_HALF_2:%.*]] = extractelement <3 x half> [[TEX]], i64 2
+; GFX9-NEXT:    [[ADDF_SUM_0:%.*]] = fadd half [[TEX_HALF_0]], [[TEX_HALF_1]]
+; GFX9-NEXT:    [[ADDF_SUM_1:%.*]] = fadd half [[ADDF_SUM_0]], [[TEX_HALF_2]]
+; GFX9-NEXT:    ret half [[ADDF_SUM_1]]
+;
+; GFX10PLUS-LABEL: @image_sample_2d_v3f32(
+; GFX10PLUS-NEXT:  main_body:
+; GFX10PLUS-NEXT:    [[TEX:%.*]] = call <3 x half> @llvm.amdgcn.image.sample.lz.2d.v3f16.f32.v8i32.v4i32(i32 7, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX10PLUS-NEXT:    [[TEX_HALF_0:%.*]] = extractelement <3 x half> [[TEX]], i64 0
+; GFX10PLUS-NEXT:    [[TEX_HALF_1:%.*]] = extractelement <3 x half> [[TEX]], i64 1
+; GFX10PLUS-NEXT:    [[TEX_HALF_2:%.*]] = extractelement <3 x half> [[TEX]], i64 2
+; GFX10PLUS-NEXT:    [[ADDF_SUM_0:%.*]] = fadd half [[TEX_HALF_0]], [[TEX_HALF_1]]
+; GFX10PLUS-NEXT:    [[ADDF_SUM_1:%.*]] = fadd half [[ADDF_SUM_0]], [[TEX_HALF_2]]
+; GFX10PLUS-NEXT:    ret half [[ADDF_SUM_1]]
+;
+; GFX11-LABEL: @image_sample_2d_v3f32(
+; GFX11-NEXT:  main_body:
+; GFX11-NEXT:    [[TEX:%.*]] = call <3 x half> @llvm.amdgcn.image.sample.lz.2d.v3f16.f32.v8i32.v4i32(i32 7, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX11-NEXT:    [[TEX_HALF_0:%.*]] = extractelement <3 x half> [[TEX]], i64 0
+; GFX11-NEXT:    [[TEX_HALF_1:%.*]] = extractelement <3 x half> [[TEX]], i64 1
+; GFX11-NEXT:    [[TEX_HALF_2:%.*]] = extractelement <3 x half> [[TEX]], i64 2
+; GFX11-NEXT:    [[ADDF_SUM_0:%.*]] = fadd half [[TEX_HALF_0]], [[TEX_HALF_1]]
+; GFX11-NEXT:    [[ADDF_SUM_1:%.*]] = fadd half [[ADDF_SUM_0]], [[TEX_HALF_2]]
+; GFX11-NEXT:    ret half [[ADDF_SUM_1]]
+;
 main_body:
   %tex = call <3 x float> @llvm.amdgcn.image.sample.lz.2d.v3f32.f32.v8i32.v4i32(i32 7, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
   %tex_3_half = fptrunc <3 x float> %tex to <3 x half>
@@ -108,6 +178,42 @@ define amdgpu_ps half @image_sample_2d_v4f32(<8 x i32> inreg %rsrc, <4 x i32> in
 ; GFX81PLUS-NEXT:    [[ADDF_SUM_2:%.*]] = fadd half [[ADDF_SUM_0]], [[ADDF_SUM_1]]
 ; GFX81PLUS-NEXT:    ret half [[ADDF_SUM_2]]
 ;
+; GFX9-LABEL: @image_sample_2d_v4f32(
+; GFX9-NEXT:  main_body:
+; GFX9-NEXT:    [[TEX:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.lz.2d.v4f16.f32.v8i32.v4i32(i32 15, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX9-NEXT:    [[TEX_HALF_0:%.*]] = extractelement <4 x half> [[TEX]], i64 0
+; GFX9-NEXT:    [[TEX_HALF_1:%.*]] = extractelement <4 x half> [[TEX]], i64 1
+; GFX9-NEXT:    [[TEX_HALF_2:%.*]] = extractelement <4 x half> [[TEX]], i64 2
+; GFX9-NEXT:    [[TEX_HALF_3:%.*]] = extractelement <4 x half> [[TEX]], i64 3
+; GFX9-NEXT:    [[ADDF_SUM_0:%.*]] = fadd half [[TEX_HALF_0]], [[TEX_HALF_1]]
+; GFX9-NEXT:    [[ADDF_SUM_1:%.*]] = fadd half [[TEX_HALF_2]], [[TEX_HALF_3]]
+; GFX9-NEXT:    [[ADDF_SUM_2:%.*]] = fadd half [[ADDF_SUM_0]], [[ADDF_SUM_1]]
+; GFX9-NEXT:    ret half [[ADDF_SUM_2]]
+;
+; GFX10PLUS-LABEL: @image_sample_2d_v4f32(
+; GFX10PLUS-NEXT:  main_body:
+; GFX10PLUS-NEXT:    [[TEX:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.lz.2d.v4f16.f32.v8i32.v4i32(i32 15, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX10PLUS-NEXT:    [[TEX_HALF_0:%.*]] = extractelement <4 x half> [[TEX]], i64 0
+; GFX10PLUS-NEXT:    [[TEX_HALF_1:%.*]] = extractelement <4 x half> [[TEX]], i64 1
+; GFX10PLUS-NEXT:    [[TEX_HALF_2:%.*]] = extractelement <4 x half> [[TEX]], i64 2
+; GFX10PLUS-NEXT:    [[TEX_HALF_3:%.*]] = extractelement <4 x half> [[TEX]], i64 3
+; GFX10PLUS-NEXT:    [[ADDF_SUM_0:%.*]] = fadd half [[TEX_HALF_0]], [[TEX_HALF_1]]
+; GFX10PLUS-NEXT:    [[ADDF_SUM_1:%.*]] = fadd half [[TEX_HALF_2]], [[TEX_HALF_3]]
+; GFX10PLUS-NEXT:    [[ADDF_SUM_2:%.*]] = fadd half [[ADDF_SUM_0]], [[ADDF_SUM_1]]
+; GFX10PLUS-NEXT:    ret half [[ADDF_SUM_2]]
+;
+; GFX11-LABEL: @image_sample_2d_v4f32(
+; GFX11-NEXT:  main_body:
+; GFX11-NEXT:    [[TEX:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.lz.2d.v4f16.f32.v8i32.v4i32(i32 15, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX11-NEXT:    [[TEX_HALF_0:%.*]] = extractelement <4 x half> [[TEX]], i64 0
+; GFX11-NEXT:    [[TEX_HALF_1:%.*]] = extractelement <4 x half> [[TEX]], i64 1
+; GFX11-NEXT:    [[TEX_HALF_2:%.*]] = extractelement <4 x half> [[TEX]], i64 2
+; GFX11-NEXT:    [[TEX_HALF_3:%.*]] = extractelement <4 x half> [[TEX]], i64 3
+; GFX11-NEXT:    [[ADDF_SUM_0:%.*]] = fadd half [[TEX_HALF_0]], [[TEX_HALF_1]]
+; GFX11-NEXT:    [[ADDF_SUM_1:%.*]] = fadd half [[TEX_HALF_2]], [[TEX_HALF_3]]
+; GFX11-NEXT:    [[ADDF_SUM_2:%.*]] = fadd half [[ADDF_SUM_0]], [[ADDF_SUM_1]]
+; GFX11-NEXT:    ret half [[ADDF_SUM_2]]
+;
 main_body:
   %tex = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32.v8i32.v4i32(i32 15, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
   %tex_4_half = fptrunc <4 x float> %tex to <4 x half>
@@ -121,6 +227,91 @@ main_body:
   ret half %addf_sum.2
 }
 
+define void @image_sample_2d_multi_fptrunc_to_d16(<8 x i32> %surf_desc, <4 x i32> %samp, float %u, float %v, ptr addrspace(7) %out) {
+; GFX7-LABEL: @image_sample_2d_multi_fptrunc_to_d16(
+; GFX7-NEXT:  main_body:
+; GFX7-NEXT:    [[SAMPLE:%.*]] = call <3 x float> @llvm.amdgcn.image.sample.lz.2d.v3f32.f32.v8i32.v4i32(i32 7, float [[U:%.*]], float [[V:%.*]], <8 x i32> [[SURF_DESC:%.*]], <4 x i32> [[SAMPLER_DESC:%.*]], i1 false, i32 0, i32 0)
+; GFX7-NEXT:    [[E0:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 0
+; GFX7-NEXT:    [[H0:%.*]] = fptrunc float [[E0]] to half
+; GFX7-NEXT:    [[E1:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 1
+; GFX7-NEXT:    [[H1:%.*]] = fptrunc float [[E1]] to half
+; GFX7-NEXT:    [[E2:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 2
+; GFX7-NEXT:    [[H2:%.*]] = fptrunc float [[E2]] to half
+; GFX7-NEXT:    [[MUL:%.*]] = fmul half [[H0]], [[H1]]
+; GFX7-NEXT:    [[RES:%.*]] = fadd half [[MUL]], [[H2]]
+; GFX7-NEXT:    store half [[RES]], ptr addrspace(7) [[OUT:%.*]], align 2
+; GFX7-NEXT:    ret void
+;
+; GFX81PLUS-LABEL: @image_sample_2d_multi_fptrunc_to_d16(
+; GFX81PLUS-NEXT:  main_body:
+; GFX81PLUS-NEXT:    [[SAMPLE:%.*]] = call <3 x float> @llvm.amdgcn.image.sample.lz.2d.v3f32.f32.v8i32.v4i32(i32 7, float [[U:%.*]], float [[V:%.*]], <8 x i32> [[SURF_DESC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX81PLUS-NEXT:    [[E0:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 0
+; GFX81PLUS-NEXT:    [[H0:%.*]] = fptrunc float [[E0]] to half
+; GFX81PLUS-NEXT:    [[E1:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 1
+; GFX81PLUS-NEXT:    [[H1:%.*]] = fptrunc float [[E1]] to half
+; GFX81PLUS-NEXT:    [[E2:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 2
+; GFX81PLUS-NEXT:    [[H2:%.*]] = fptrunc float [[E2]] to half
+; GFX81PLUS-NEXT:    [[MUL:%.*]] = fmul half [[H0]], [[H1]]
+; GFX81PLUS-NEXT:    [[RES:%.*]] = fadd half [[MUL]], [[H2]]
+; GFX81PLUS-NEXT:    store half [[RES]], ptr addrspace(7) [[OUT:%.*]], align 2
+; GFX81PLUS-NEXT:    ret void
+;
+; GFX9-LABEL: @image_sample_2d_multi_fptrunc_to_d16(
+; GFX9-NEXT:  main_body:
+; GFX9-NEXT:    [[SAMPLE:%.*]] = call <3 x float> @llvm.amdgcn.image.sample.lz.2d.v3f32.f32.v8i32.v4i32(i32 7, float [[U:%.*]], float [[V:%.*]], <8 x i32> [[SURF_DESC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX9-NEXT:    [[E0:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 0
+; GFX9-NEXT:    [[HALF_EXT2:%.*]] = fptrunc float [[E0]] to half
+; GFX9-NEXT:    [[E1:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 1
+; GFX9-NEXT:    [[HALF_EXT1:%.*]] = fptrunc float [[E1]] to half
+; GFX9-NEXT:    [[E2:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 2
+; GFX9-NEXT:    [[HALF_EXT:%.*]] = fptrunc float [[E2]] to half
+; GFX9-NEXT:    [[MUL:%.*]] = fmul half [[HALF_EXT2]], [[HALF_EXT1]]
+; GFX9-NEXT:    [[RES:%.*]] = fadd half [[MUL]], [[HALF_EXT]]
+; GFX9-NEXT:    store half [[RES]], ptr addrspace(7) [[OUT:%.*]], align 2
+; GFX9-NEXT:    ret void
+;
+; GFX10PLUS-LABEL: @image_sample_2d_multi_fptrunc_to_d16(
+; GFX10PLUS-NEXT:  main_body:
+; GFX10PLUS-NEXT:    [[SAMPLE:%.*]] = call <3 x float> @llvm.amdgcn.image.sample.lz.2d.v3f32.f32.v8i32.v4i32(i32 7, float [[U:%.*]], float [[V:%.*]], <8 x i32> [[SURF_DESC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX10PLUS-NEXT:    [[E0:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 0
+; GFX10PLUS-NEXT:    [[HALF_EXT2:%.*]] = fptrunc float [[E0]] to half
+; GFX10PLUS-NEXT:    [[E1:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 1
+; GFX10PLUS-NEXT:    [[HALF_EXT1:%.*]] = fptrunc float [[E1]] to half
+; GFX10PLUS-NEXT:    [[E2:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 2
+; GFX10PLUS-NEXT:    [[HALF_EXT:%.*]] = fptrunc float [[E2]] to half
+; GFX10PLUS-NEXT:    [[MUL:%.*]] = fmul half [[HALF_EXT2]], [[HALF_EXT1]]
+; GFX10PLUS-NEXT:    [[RES:%.*]] = fadd half [[MUL]], [[HALF_EXT]]
+; GFX10PLUS-NEXT:    store half [[RES]], ptr addrspace(7) [[OUT:%.*]], align 2
+; GFX10PLUS-NEXT:    ret void
+;
+; GFX11-LABEL: @image_sample_2d_multi_fptrunc_to_d16(
+; GFX11-NEXT:  main_body:
+; GFX11-NEXT:    [[SAMPLE:%.*]] = call <3 x float> @llvm.amdgcn.image.sample.lz.2d.v3f32.f32.v8i32.v4i32(i32 7, float [[U:%.*]], float [[V:%.*]], <8 x i32> [[SURF_DESC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX11-NEXT:    [[E0:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 0
+; GFX11-NEXT:    [[HALF_EXT2:%.*]] = fptrunc float [[E0]] to half
+; GFX11-NEXT:    [[E1:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 1
+; GFX11-NEXT:    [[HALF_EXT1:%.*]] = fptrunc float [[E1]] to half
+; GFX11-NEXT:    [[E2:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 2
+; GFX11-NEXT:    [[HALF_EXT:%.*]] = fptrunc float [[E2]] to half
+; GFX11-NEXT:    [[MUL:%.*]] = fmul half [[HALF_EXT2]], [[HALF_EXT1]]
+; GFX11-NEXT:    [[RES:%.*]] = fadd half [[MUL]], [[HALF_EXT]]
+; GFX11-NEXT:    store half [[RES]], ptr addrspace(7) [[OUT:%.*]], align 2
+; GFX11-NEXT:    ret void
+;
+main_body:
+  %sample = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32.v8i32.v4i32(i32 15, float %u, float %v, <8 x i32> %surf_desc, <4 x i32> %samp, i1 false, i32 0, i32 0)
+  %e0 = extractelement <4 x float> %sample, i32 0
+  %h0 = fptrunc float %e0 to half
+  %e1 = extractelement <4 x float> %sample, i32 1
+  %h1 = fptrunc float %e1 to half
+  %e2 = extractelement <4 x float> %sample, i32 2
+  %h2 = fptrunc float %e2 to half
+  %mul = fmul half %h0, %h1
+  %res = fadd half %mul, %h2
+  store half %res, ptr addrspace(7) %out, align 2
+  ret void
+}
+
 define amdgpu_ps half @image_gather4_2d_v4f32(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
 ; GFX7-LABEL: @image_gather4_2d_v4f32(
 ; GFX7-NEXT:  main_body:
@@ -147,6 +338,42 @@ define amdgpu_ps half @image_gather4_2d_v4f32(<8 x i32> inreg %rsrc, <4 x i32> i
 ; GFX81PLUS-NEXT:    [[ADDF_SUM_2:%.*]] = fadd half [[ADDF_SUM_0]], [[ADDF_SUM_1]]
 ; GFX81PLUS-NEXT:    ret half [[ADDF_SUM_2]]
 ;
+; GFX9-LABEL: @image_gather4_2d_v4f32(
+; GFX9-NEXT:  main_body:
+; GFX9-NEXT:    [[TEX:%.*]] = call <4 x half> @llvm.amdgcn.image.gather4.2d.v4f16.f16.v8i32.v4i32(i32 1, half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX9-NEXT:    [[TEX_HALF_0:%.*]] = extractelement <4 x half> [[TEX]], i64 0
+; GFX9-NEXT:    [[TEX_HALF_1:%.*]] = extractelement <4 x half> [[TEX]], i64 1
+; GFX9-NEXT:    [[TEX_HALF_2:%.*]] = extractelement <4 x half> [[TEX]], i64 2
+; GFX9-NEXT:    [[TEX_HALF_3:%.*]] = extractelement <4 x half> [[TEX]], i64 3
+; GFX9-NEXT:    [[ADDF_SUM_0:%.*]] = fadd half [[TEX_HALF_0]], [[TEX_HALF_1]]
+; GFX9-NEXT:    [[ADDF_SUM_1:%.*]] = fadd half [[TEX_HALF_2]], [[TEX_HALF_3]]
+; GFX9-NEXT:    [[ADDF_SUM_2:%.*]] = fadd half [[ADDF_SUM_0]], [[ADDF_SUM_1]]
+; GFX9-NEXT:    ret half [[ADDF_SUM_2]]
+;
+; GFX10PLUS-LABEL: @image_gather4_2d_v4f32(
+; GFX10PLUS-NEXT:  main_body:
+; GFX10PLUS-NEXT:    [[TEX:%.*]] = call <4 x half> @llvm.amdgcn.image.gather4.2d.v4f16.f16.v8i32.v4i32(i32 1, half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX10PLUS-NEXT:    [[TEX_HALF_0:%.*]] = extractelement <4 x half> [[TEX]], i64 0
+; GFX10PLUS-NEXT:    [[TEX_HALF_1:%.*]] = extractelement <4 x half> [[TEX]], i64 1
+; GFX10PLUS-NEXT:    [[TEX_HALF_2:%.*]] = extractelement <4 x half> [[TEX]], i64 2
+; GFX10PLUS-NEXT:    [[TEX_HALF_3:%.*]] = extractelement <4 x half> [[TEX]], i64 3
+; GFX10PLUS-NEXT:    [[ADDF_SUM_0:%.*]] = fadd half [[TEX_HALF_0]], [[TEX_HALF_1]]
+; GFX10PLUS-NEXT:    [[ADDF_SUM_1:%.*]] = fadd half [[TEX_HALF_2]], [[TEX_HALF_3]]
+; GFX10PLUS-NEXT:    [[ADDF_SUM_2:%.*]] = fadd half [[ADDF_SUM_0]], [[ADDF_SUM_1]]
+; GFX10PLUS-NEXT:    ret half [[ADDF_SUM_2]]
+;
+; GFX11-LABEL: @image_gather4_2d_v4f32(
+; GFX11-NEXT:  main_body:
+; GFX11-NEXT:    [[TEX:%.*]] = call <4 x half> @llvm.amdgcn.image.gather4.2d.v4f16.f16.v8i32.v4i32(i32 1, half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX11-NEXT:    [[TEX_HALF_0:%.*]] = extractelement <4 x half> [[TEX]], i64 0
+; GFX11-NEXT:    [[TEX_HALF_1:%.*]] = extractelement <4 x half> [[TEX]], i64 1
+; GFX11-NEXT:    [[TEX_HALF_2:%.*]] = extractelement <4 x half> [[TEX]], i64 2
+; GFX11-NEXT:    [[TEX_HALF_3:%.*]] = extractelement <4 x half> [[TEX]], i64 3
+; GFX11-NEXT:    [[ADDF_SUM_0:%.*]] = fadd half [[TEX_HALF_0]], [[TEX_HALF_1]]
+; GFX11-NEXT:    [[ADDF_SUM_1:%.*]] = fadd half [[TEX_HALF_2]], [[TEX_HALF_3]]
+; GFX11-NEXT:    [[ADDF_SUM_2:%.*]] = fadd half [[ADDF_SUM_0]], [[ADDF_SUM_1]]
+; GFX11-NEXT:    ret half [[ADDF_SUM_2]]
+;
 main_body:
   %tex = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f16.v8i32.v4i32(i32 1, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
   %tex_4_half = fptrunc <4 x float> %tex to <4 x half>
@@ -169,6 +396,18 @@ define amdgpu_ps half @load_1d(i16 %s, <8 x i32> inreg %rsrc) {
 ; GFX81PLUS-LABEL: @load_1d(
 ; GFX81PLUS-NEXT:    [[S_FLOAT:%.*]] = call half @llvm.amdgcn.image.load.1d.f16.i16.v8i32(i32 1, i16 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0)
 ; GFX81PLUS-NEXT:    ret half [[S_FLOAT]]
+;
+; GFX9-LABEL: @load_1d(
+; GFX9-NEXT:    [[S_FLOAT:%.*]] = call half @llvm.amdgcn.image.load.1d.f16.i16.v8i32(i32 1, i16 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0)
+; GFX9-NEXT:    ret half [[S_FLOAT]]
+;
+; GFX10PLUS-LABEL: @load_1d(
+; GFX10PLUS-NEXT:    [[...
[truncated]

@llvmbot
Copy link
Member

llvmbot commented May 28, 2025

@llvm/pr-subscribers-llvm-transforms

Author: Harrison Hao (harrisonGPU)

Changes

Add a test for image sample with multiple fptrunc to half in support of #141758


Patch is 35.94 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/141771.diff

1 Files Affected:

  • (modified) llvm/test/Transforms/InstCombine/AMDGPU/image-d16.ll (+427-2)
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/image-d16.ll b/llvm/test/Transforms/InstCombine/AMDGPU/image-d16.ll
index 30431ad724843..a3624986efbd9 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/image-d16.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/image-d16.ll
@@ -1,8 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -mtriple=amdgcn--amdpal -mcpu=gfx700 -S -passes=instcombine %s | FileCheck --check-prefixes=GFX7 %s
 ; RUN: opt -mtriple=amdgcn--amdpal -mcpu=gfx810 -S -passes=instcombine %s | FileCheck --check-prefixes=GFX81PLUS %s
-; RUN: opt -mtriple=amdgcn--amdpal -mcpu=gfx900 -S -passes=instcombine %s | FileCheck --check-prefixes=GFX81PLUS %s
-; RUN: opt -mtriple=amdgcn--amdpal -mcpu=gfx1010 -S -passes=instcombine %s | FileCheck --check-prefixes=GFX81PLUS %s
+; RUN: opt -mtriple=amdgcn--amdpal -mcpu=gfx900 -S -passes=instcombine %s | FileCheck --check-prefixes=GFX9 %s
+; RUN: opt -mtriple=amdgcn--amdpal -mcpu=gfx1010 -S -passes=instcombine %s | FileCheck --check-prefixes=GFX10PLUS %s
+; RUN: opt -mtriple=amdgcn--amdpal -mcpu=gfx1100 -S -passes=instcombine %s | FileCheck --check-prefixes=GFX11 %s
 
 define amdgpu_ps half @image_sample_2d_fptrunc_to_d16(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
 ; GFX7-LABEL: @image_sample_2d_fptrunc_to_d16(
@@ -16,6 +17,21 @@ define amdgpu_ps half @image_sample_2d_fptrunc_to_d16(<8 x i32> inreg %rsrc, <4
 ; GFX81PLUS-NEXT:    [[TEX:%.*]] = call half @llvm.amdgcn.image.sample.lz.2d.f16.f32.v8i32.v4i32(i32 1, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; GFX81PLUS-NEXT:    ret half [[TEX]]
 ;
+; GFX9-LABEL: @image_sample_2d_fptrunc_to_d16(
+; GFX9-NEXT:  main_body:
+; GFX9-NEXT:    [[TEX:%.*]] = call half @llvm.amdgcn.image.sample.lz.2d.f16.f32.v8i32.v4i32(i32 1, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX9-NEXT:    ret half [[TEX]]
+;
+; GFX10PLUS-LABEL: @image_sample_2d_fptrunc_to_d16(
+; GFX10PLUS-NEXT:  main_body:
+; GFX10PLUS-NEXT:    [[TEX:%.*]] = call half @llvm.amdgcn.image.sample.lz.2d.f16.f32.v8i32.v4i32(i32 1, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX10PLUS-NEXT:    ret half [[TEX]]
+;
+; GFX11-LABEL: @image_sample_2d_fptrunc_to_d16(
+; GFX11-NEXT:  main_body:
+; GFX11-NEXT:    [[TEX:%.*]] = call half @llvm.amdgcn.image.sample.lz.2d.f16.f32.v8i32.v4i32(i32 1, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX11-NEXT:    ret half [[TEX]]
+;
 main_body:
   %tex = call float @llvm.amdgcn.image.sample.lz.2d.f32.f32.v8i32.v4i32(i32 1, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
   %tex_half = fptrunc float %tex to half
@@ -40,6 +56,30 @@ define amdgpu_ps half @image_sample_2d_v2f32(<8 x i32> inreg %rsrc, <4 x i32> in
 ; GFX81PLUS-NEXT:    [[ADDF_SUM_0:%.*]] = fadd half [[TEX_HALF_0]], [[TEX_HALF_1]]
 ; GFX81PLUS-NEXT:    ret half [[ADDF_SUM_0]]
 ;
+; GFX9-LABEL: @image_sample_2d_v2f32(
+; GFX9-NEXT:  main_body:
+; GFX9-NEXT:    [[TEX:%.*]] = call <2 x half> @llvm.amdgcn.image.sample.lz.2d.v2f16.f32.v8i32.v4i32(i32 3, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX9-NEXT:    [[TEX_HALF_0:%.*]] = extractelement <2 x half> [[TEX]], i64 0
+; GFX9-NEXT:    [[TEX_HALF_1:%.*]] = extractelement <2 x half> [[TEX]], i64 1
+; GFX9-NEXT:    [[ADDF_SUM_0:%.*]] = fadd half [[TEX_HALF_0]], [[TEX_HALF_1]]
+; GFX9-NEXT:    ret half [[ADDF_SUM_0]]
+;
+; GFX10PLUS-LABEL: @image_sample_2d_v2f32(
+; GFX10PLUS-NEXT:  main_body:
+; GFX10PLUS-NEXT:    [[TEX:%.*]] = call <2 x half> @llvm.amdgcn.image.sample.lz.2d.v2f16.f32.v8i32.v4i32(i32 3, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX10PLUS-NEXT:    [[TEX_HALF_0:%.*]] = extractelement <2 x half> [[TEX]], i64 0
+; GFX10PLUS-NEXT:    [[TEX_HALF_1:%.*]] = extractelement <2 x half> [[TEX]], i64 1
+; GFX10PLUS-NEXT:    [[ADDF_SUM_0:%.*]] = fadd half [[TEX_HALF_0]], [[TEX_HALF_1]]
+; GFX10PLUS-NEXT:    ret half [[ADDF_SUM_0]]
+;
+; GFX11-LABEL: @image_sample_2d_v2f32(
+; GFX11-NEXT:  main_body:
+; GFX11-NEXT:    [[TEX:%.*]] = call <2 x half> @llvm.amdgcn.image.sample.lz.2d.v2f16.f32.v8i32.v4i32(i32 3, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX11-NEXT:    [[TEX_HALF_0:%.*]] = extractelement <2 x half> [[TEX]], i64 0
+; GFX11-NEXT:    [[TEX_HALF_1:%.*]] = extractelement <2 x half> [[TEX]], i64 1
+; GFX11-NEXT:    [[ADDF_SUM_0:%.*]] = fadd half [[TEX_HALF_0]], [[TEX_HALF_1]]
+; GFX11-NEXT:    ret half [[ADDF_SUM_0]]
+;
 main_body:
   %tex = call <2 x float> @llvm.amdgcn.image.sample.lz.2d.v2f32.f32.v8i32.v4i32(i32 3, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
   %tex_2_half = fptrunc <2 x float> %tex to <2 x half>
@@ -71,6 +111,36 @@ define amdgpu_ps half @image_sample_2d_v3f32(<8 x i32> inreg %rsrc, <4 x i32> in
 ; GFX81PLUS-NEXT:    [[ADDF_SUM_1:%.*]] = fadd half [[ADDF_SUM_0]], [[TEX_HALF_2]]
 ; GFX81PLUS-NEXT:    ret half [[ADDF_SUM_1]]
 ;
+; GFX9-LABEL: @image_sample_2d_v3f32(
+; GFX9-NEXT:  main_body:
+; GFX9-NEXT:    [[TEX:%.*]] = call <3 x half> @llvm.amdgcn.image.sample.lz.2d.v3f16.f32.v8i32.v4i32(i32 7, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX9-NEXT:    [[TEX_HALF_0:%.*]] = extractelement <3 x half> [[TEX]], i64 0
+; GFX9-NEXT:    [[TEX_HALF_1:%.*]] = extractelement <3 x half> [[TEX]], i64 1
+; GFX9-NEXT:    [[TEX_HALF_2:%.*]] = extractelement <3 x half> [[TEX]], i64 2
+; GFX9-NEXT:    [[ADDF_SUM_0:%.*]] = fadd half [[TEX_HALF_0]], [[TEX_HALF_1]]
+; GFX9-NEXT:    [[ADDF_SUM_1:%.*]] = fadd half [[ADDF_SUM_0]], [[TEX_HALF_2]]
+; GFX9-NEXT:    ret half [[ADDF_SUM_1]]
+;
+; GFX10PLUS-LABEL: @image_sample_2d_v3f32(
+; GFX10PLUS-NEXT:  main_body:
+; GFX10PLUS-NEXT:    [[TEX:%.*]] = call <3 x half> @llvm.amdgcn.image.sample.lz.2d.v3f16.f32.v8i32.v4i32(i32 7, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX10PLUS-NEXT:    [[TEX_HALF_0:%.*]] = extractelement <3 x half> [[TEX]], i64 0
+; GFX10PLUS-NEXT:    [[TEX_HALF_1:%.*]] = extractelement <3 x half> [[TEX]], i64 1
+; GFX10PLUS-NEXT:    [[TEX_HALF_2:%.*]] = extractelement <3 x half> [[TEX]], i64 2
+; GFX10PLUS-NEXT:    [[ADDF_SUM_0:%.*]] = fadd half [[TEX_HALF_0]], [[TEX_HALF_1]]
+; GFX10PLUS-NEXT:    [[ADDF_SUM_1:%.*]] = fadd half [[ADDF_SUM_0]], [[TEX_HALF_2]]
+; GFX10PLUS-NEXT:    ret half [[ADDF_SUM_1]]
+;
+; GFX11-LABEL: @image_sample_2d_v3f32(
+; GFX11-NEXT:  main_body:
+; GFX11-NEXT:    [[TEX:%.*]] = call <3 x half> @llvm.amdgcn.image.sample.lz.2d.v3f16.f32.v8i32.v4i32(i32 7, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX11-NEXT:    [[TEX_HALF_0:%.*]] = extractelement <3 x half> [[TEX]], i64 0
+; GFX11-NEXT:    [[TEX_HALF_1:%.*]] = extractelement <3 x half> [[TEX]], i64 1
+; GFX11-NEXT:    [[TEX_HALF_2:%.*]] = extractelement <3 x half> [[TEX]], i64 2
+; GFX11-NEXT:    [[ADDF_SUM_0:%.*]] = fadd half [[TEX_HALF_0]], [[TEX_HALF_1]]
+; GFX11-NEXT:    [[ADDF_SUM_1:%.*]] = fadd half [[ADDF_SUM_0]], [[TEX_HALF_2]]
+; GFX11-NEXT:    ret half [[ADDF_SUM_1]]
+;
 main_body:
   %tex = call <3 x float> @llvm.amdgcn.image.sample.lz.2d.v3f32.f32.v8i32.v4i32(i32 7, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
   %tex_3_half = fptrunc <3 x float> %tex to <3 x half>
@@ -108,6 +178,42 @@ define amdgpu_ps half @image_sample_2d_v4f32(<8 x i32> inreg %rsrc, <4 x i32> in
 ; GFX81PLUS-NEXT:    [[ADDF_SUM_2:%.*]] = fadd half [[ADDF_SUM_0]], [[ADDF_SUM_1]]
 ; GFX81PLUS-NEXT:    ret half [[ADDF_SUM_2]]
 ;
+; GFX9-LABEL: @image_sample_2d_v4f32(
+; GFX9-NEXT:  main_body:
+; GFX9-NEXT:    [[TEX:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.lz.2d.v4f16.f32.v8i32.v4i32(i32 15, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX9-NEXT:    [[TEX_HALF_0:%.*]] = extractelement <4 x half> [[TEX]], i64 0
+; GFX9-NEXT:    [[TEX_HALF_1:%.*]] = extractelement <4 x half> [[TEX]], i64 1
+; GFX9-NEXT:    [[TEX_HALF_2:%.*]] = extractelement <4 x half> [[TEX]], i64 2
+; GFX9-NEXT:    [[TEX_HALF_3:%.*]] = extractelement <4 x half> [[TEX]], i64 3
+; GFX9-NEXT:    [[ADDF_SUM_0:%.*]] = fadd half [[TEX_HALF_0]], [[TEX_HALF_1]]
+; GFX9-NEXT:    [[ADDF_SUM_1:%.*]] = fadd half [[TEX_HALF_2]], [[TEX_HALF_3]]
+; GFX9-NEXT:    [[ADDF_SUM_2:%.*]] = fadd half [[ADDF_SUM_0]], [[ADDF_SUM_1]]
+; GFX9-NEXT:    ret half [[ADDF_SUM_2]]
+;
+; GFX10PLUS-LABEL: @image_sample_2d_v4f32(
+; GFX10PLUS-NEXT:  main_body:
+; GFX10PLUS-NEXT:    [[TEX:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.lz.2d.v4f16.f32.v8i32.v4i32(i32 15, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX10PLUS-NEXT:    [[TEX_HALF_0:%.*]] = extractelement <4 x half> [[TEX]], i64 0
+; GFX10PLUS-NEXT:    [[TEX_HALF_1:%.*]] = extractelement <4 x half> [[TEX]], i64 1
+; GFX10PLUS-NEXT:    [[TEX_HALF_2:%.*]] = extractelement <4 x half> [[TEX]], i64 2
+; GFX10PLUS-NEXT:    [[TEX_HALF_3:%.*]] = extractelement <4 x half> [[TEX]], i64 3
+; GFX10PLUS-NEXT:    [[ADDF_SUM_0:%.*]] = fadd half [[TEX_HALF_0]], [[TEX_HALF_1]]
+; GFX10PLUS-NEXT:    [[ADDF_SUM_1:%.*]] = fadd half [[TEX_HALF_2]], [[TEX_HALF_3]]
+; GFX10PLUS-NEXT:    [[ADDF_SUM_2:%.*]] = fadd half [[ADDF_SUM_0]], [[ADDF_SUM_1]]
+; GFX10PLUS-NEXT:    ret half [[ADDF_SUM_2]]
+;
+; GFX11-LABEL: @image_sample_2d_v4f32(
+; GFX11-NEXT:  main_body:
+; GFX11-NEXT:    [[TEX:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.lz.2d.v4f16.f32.v8i32.v4i32(i32 15, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX11-NEXT:    [[TEX_HALF_0:%.*]] = extractelement <4 x half> [[TEX]], i64 0
+; GFX11-NEXT:    [[TEX_HALF_1:%.*]] = extractelement <4 x half> [[TEX]], i64 1
+; GFX11-NEXT:    [[TEX_HALF_2:%.*]] = extractelement <4 x half> [[TEX]], i64 2
+; GFX11-NEXT:    [[TEX_HALF_3:%.*]] = extractelement <4 x half> [[TEX]], i64 3
+; GFX11-NEXT:    [[ADDF_SUM_0:%.*]] = fadd half [[TEX_HALF_0]], [[TEX_HALF_1]]
+; GFX11-NEXT:    [[ADDF_SUM_1:%.*]] = fadd half [[TEX_HALF_2]], [[TEX_HALF_3]]
+; GFX11-NEXT:    [[ADDF_SUM_2:%.*]] = fadd half [[ADDF_SUM_0]], [[ADDF_SUM_1]]
+; GFX11-NEXT:    ret half [[ADDF_SUM_2]]
+;
 main_body:
   %tex = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32.v8i32.v4i32(i32 15, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
   %tex_4_half = fptrunc <4 x float> %tex to <4 x half>
@@ -121,6 +227,91 @@ main_body:
   ret half %addf_sum.2
 }
 
+define void @image_sample_2d_multi_fptrunc_to_d16(<8 x i32> %surf_desc, <4 x i32> %samp, float %u, float %v, ptr addrspace(7) %out) {
+; GFX7-LABEL: @image_sample_2d_multi_fptrunc_to_d16(
+; GFX7-NEXT:  main_body:
+; GFX7-NEXT:    [[SAMPLE:%.*]] = call <3 x float> @llvm.amdgcn.image.sample.lz.2d.v3f32.f32.v8i32.v4i32(i32 7, float [[U:%.*]], float [[V:%.*]], <8 x i32> [[SURF_DESC:%.*]], <4 x i32> [[SAMPLER_DESC:%.*]], i1 false, i32 0, i32 0)
+; GFX7-NEXT:    [[E0:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 0
+; GFX7-NEXT:    [[H0:%.*]] = fptrunc float [[E0]] to half
+; GFX7-NEXT:    [[E1:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 1
+; GFX7-NEXT:    [[H1:%.*]] = fptrunc float [[E1]] to half
+; GFX7-NEXT:    [[E2:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 2
+; GFX7-NEXT:    [[H2:%.*]] = fptrunc float [[E2]] to half
+; GFX7-NEXT:    [[MUL:%.*]] = fmul half [[H0]], [[H1]]
+; GFX7-NEXT:    [[RES:%.*]] = fadd half [[MUL]], [[H2]]
+; GFX7-NEXT:    store half [[RES]], ptr addrspace(7) [[OUT:%.*]], align 2
+; GFX7-NEXT:    ret void
+;
+; GFX81PLUS-LABEL: @image_sample_2d_multi_fptrunc_to_d16(
+; GFX81PLUS-NEXT:  main_body:
+; GFX81PLUS-NEXT:    [[SAMPLE:%.*]] = call <3 x float> @llvm.amdgcn.image.sample.lz.2d.v3f32.f32.v8i32.v4i32(i32 7, float [[U:%.*]], float [[V:%.*]], <8 x i32> [[SURF_DESC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX81PLUS-NEXT:    [[E0:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 0
+; GFX81PLUS-NEXT:    [[H0:%.*]] = fptrunc float [[E0]] to half
+; GFX81PLUS-NEXT:    [[E1:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 1
+; GFX81PLUS-NEXT:    [[H1:%.*]] = fptrunc float [[E1]] to half
+; GFX81PLUS-NEXT:    [[E2:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 2
+; GFX81PLUS-NEXT:    [[H2:%.*]] = fptrunc float [[E2]] to half
+; GFX81PLUS-NEXT:    [[MUL:%.*]] = fmul half [[H0]], [[H1]]
+; GFX81PLUS-NEXT:    [[RES:%.*]] = fadd half [[MUL]], [[H2]]
+; GFX81PLUS-NEXT:    store half [[RES]], ptr addrspace(7) [[OUT:%.*]], align 2
+; GFX81PLUS-NEXT:    ret void
+;
+; GFX9-LABEL: @image_sample_2d_multi_fptrunc_to_d16(
+; GFX9-NEXT:  main_body:
+; GFX9-NEXT:    [[SAMPLE:%.*]] = call <3 x float> @llvm.amdgcn.image.sample.lz.2d.v3f32.f32.v8i32.v4i32(i32 7, float [[U:%.*]], float [[V:%.*]], <8 x i32> [[SURF_DESC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX9-NEXT:    [[E0:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 0
+; GFX9-NEXT:    [[HALF_EXT2:%.*]] = fptrunc float [[E0]] to half
+; GFX9-NEXT:    [[E1:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 1
+; GFX9-NEXT:    [[HALF_EXT1:%.*]] = fptrunc float [[E1]] to half
+; GFX9-NEXT:    [[E2:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 2
+; GFX9-NEXT:    [[HALF_EXT:%.*]] = fptrunc float [[E2]] to half
+; GFX9-NEXT:    [[MUL:%.*]] = fmul half [[HALF_EXT2]], [[HALF_EXT1]]
+; GFX9-NEXT:    [[RES:%.*]] = fadd half [[MUL]], [[HALF_EXT]]
+; GFX9-NEXT:    store half [[RES]], ptr addrspace(7) [[OUT:%.*]], align 2
+; GFX9-NEXT:    ret void
+;
+; GFX10PLUS-LABEL: @image_sample_2d_multi_fptrunc_to_d16(
+; GFX10PLUS-NEXT:  main_body:
+; GFX10PLUS-NEXT:    [[SAMPLE:%.*]] = call <3 x float> @llvm.amdgcn.image.sample.lz.2d.v3f32.f32.v8i32.v4i32(i32 7, float [[U:%.*]], float [[V:%.*]], <8 x i32> [[SURF_DESC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX10PLUS-NEXT:    [[E0:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 0
+; GFX10PLUS-NEXT:    [[HALF_EXT2:%.*]] = fptrunc float [[E0]] to half
+; GFX10PLUS-NEXT:    [[E1:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 1
+; GFX10PLUS-NEXT:    [[HALF_EXT1:%.*]] = fptrunc float [[E1]] to half
+; GFX10PLUS-NEXT:    [[E2:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 2
+; GFX10PLUS-NEXT:    [[HALF_EXT:%.*]] = fptrunc float [[E2]] to half
+; GFX10PLUS-NEXT:    [[MUL:%.*]] = fmul half [[HALF_EXT2]], [[HALF_EXT1]]
+; GFX10PLUS-NEXT:    [[RES:%.*]] = fadd half [[MUL]], [[HALF_EXT]]
+; GFX10PLUS-NEXT:    store half [[RES]], ptr addrspace(7) [[OUT:%.*]], align 2
+; GFX10PLUS-NEXT:    ret void
+;
+; GFX11-LABEL: @image_sample_2d_multi_fptrunc_to_d16(
+; GFX11-NEXT:  main_body:
+; GFX11-NEXT:    [[SAMPLE:%.*]] = call <3 x float> @llvm.amdgcn.image.sample.lz.2d.v3f32.f32.v8i32.v4i32(i32 7, float [[U:%.*]], float [[V:%.*]], <8 x i32> [[SURF_DESC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX11-NEXT:    [[E0:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 0
+; GFX11-NEXT:    [[HALF_EXT2:%.*]] = fptrunc float [[E0]] to half
+; GFX11-NEXT:    [[E1:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 1
+; GFX11-NEXT:    [[HALF_EXT1:%.*]] = fptrunc float [[E1]] to half
+; GFX11-NEXT:    [[E2:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 2
+; GFX11-NEXT:    [[HALF_EXT:%.*]] = fptrunc float [[E2]] to half
+; GFX11-NEXT:    [[MUL:%.*]] = fmul half [[HALF_EXT2]], [[HALF_EXT1]]
+; GFX11-NEXT:    [[RES:%.*]] = fadd half [[MUL]], [[HALF_EXT]]
+; GFX11-NEXT:    store half [[RES]], ptr addrspace(7) [[OUT:%.*]], align 2
+; GFX11-NEXT:    ret void
+;
+main_body:
+  %sample = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32.v8i32.v4i32(i32 15, float %u, float %v, <8 x i32> %surf_desc, <4 x i32> %samp, i1 false, i32 0, i32 0)
+  %e0 = extractelement <4 x float> %sample, i32 0
+  %h0 = fptrunc float %e0 to half
+  %e1 = extractelement <4 x float> %sample, i32 1
+  %h1 = fptrunc float %e1 to half
+  %e2 = extractelement <4 x float> %sample, i32 2
+  %h2 = fptrunc float %e2 to half
+  %mul = fmul half %h0, %h1
+  %res = fadd half %mul, %h2
+  store half %res, ptr addrspace(7) %out, align 2
+  ret void
+}
+
 define amdgpu_ps half @image_gather4_2d_v4f32(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
 ; GFX7-LABEL: @image_gather4_2d_v4f32(
 ; GFX7-NEXT:  main_body:
@@ -147,6 +338,42 @@ define amdgpu_ps half @image_gather4_2d_v4f32(<8 x i32> inreg %rsrc, <4 x i32> i
 ; GFX81PLUS-NEXT:    [[ADDF_SUM_2:%.*]] = fadd half [[ADDF_SUM_0]], [[ADDF_SUM_1]]
 ; GFX81PLUS-NEXT:    ret half [[ADDF_SUM_2]]
 ;
+; GFX9-LABEL: @image_gather4_2d_v4f32(
+; GFX9-NEXT:  main_body:
+; GFX9-NEXT:    [[TEX:%.*]] = call <4 x half> @llvm.amdgcn.image.gather4.2d.v4f16.f16.v8i32.v4i32(i32 1, half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX9-NEXT:    [[TEX_HALF_0:%.*]] = extractelement <4 x half> [[TEX]], i64 0
+; GFX9-NEXT:    [[TEX_HALF_1:%.*]] = extractelement <4 x half> [[TEX]], i64 1
+; GFX9-NEXT:    [[TEX_HALF_2:%.*]] = extractelement <4 x half> [[TEX]], i64 2
+; GFX9-NEXT:    [[TEX_HALF_3:%.*]] = extractelement <4 x half> [[TEX]], i64 3
+; GFX9-NEXT:    [[ADDF_SUM_0:%.*]] = fadd half [[TEX_HALF_0]], [[TEX_HALF_1]]
+; GFX9-NEXT:    [[ADDF_SUM_1:%.*]] = fadd half [[TEX_HALF_2]], [[TEX_HALF_3]]
+; GFX9-NEXT:    [[ADDF_SUM_2:%.*]] = fadd half [[ADDF_SUM_0]], [[ADDF_SUM_1]]
+; GFX9-NEXT:    ret half [[ADDF_SUM_2]]
+;
+; GFX10PLUS-LABEL: @image_gather4_2d_v4f32(
+; GFX10PLUS-NEXT:  main_body:
+; GFX10PLUS-NEXT:    [[TEX:%.*]] = call <4 x half> @llvm.amdgcn.image.gather4.2d.v4f16.f16.v8i32.v4i32(i32 1, half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX10PLUS-NEXT:    [[TEX_HALF_0:%.*]] = extractelement <4 x half> [[TEX]], i64 0
+; GFX10PLUS-NEXT:    [[TEX_HALF_1:%.*]] = extractelement <4 x half> [[TEX]], i64 1
+; GFX10PLUS-NEXT:    [[TEX_HALF_2:%.*]] = extractelement <4 x half> [[TEX]], i64 2
+; GFX10PLUS-NEXT:    [[TEX_HALF_3:%.*]] = extractelement <4 x half> [[TEX]], i64 3
+; GFX10PLUS-NEXT:    [[ADDF_SUM_0:%.*]] = fadd half [[TEX_HALF_0]], [[TEX_HALF_1]]
+; GFX10PLUS-NEXT:    [[ADDF_SUM_1:%.*]] = fadd half [[TEX_HALF_2]], [[TEX_HALF_3]]
+; GFX10PLUS-NEXT:    [[ADDF_SUM_2:%.*]] = fadd half [[ADDF_SUM_0]], [[ADDF_SUM_1]]
+; GFX10PLUS-NEXT:    ret half [[ADDF_SUM_2]]
+;
+; GFX11-LABEL: @image_gather4_2d_v4f32(
+; GFX11-NEXT:  main_body:
+; GFX11-NEXT:    [[TEX:%.*]] = call <4 x half> @llvm.amdgcn.image.gather4.2d.v4f16.f16.v8i32.v4i32(i32 1, half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX11-NEXT:    [[TEX_HALF_0:%.*]] = extractelement <4 x half> [[TEX]], i64 0
+; GFX11-NEXT:    [[TEX_HALF_1:%.*]] = extractelement <4 x half> [[TEX]], i64 1
+; GFX11-NEXT:    [[TEX_HALF_2:%.*]] = extractelement <4 x half> [[TEX]], i64 2
+; GFX11-NEXT:    [[TEX_HALF_3:%.*]] = extractelement <4 x half> [[TEX]], i64 3
+; GFX11-NEXT:    [[ADDF_SUM_0:%.*]] = fadd half [[TEX_HALF_0]], [[TEX_HALF_1]]
+; GFX11-NEXT:    [[ADDF_SUM_1:%.*]] = fadd half [[TEX_HALF_2]], [[TEX_HALF_3]]
+; GFX11-NEXT:    [[ADDF_SUM_2:%.*]] = fadd half [[ADDF_SUM_0]], [[ADDF_SUM_1]]
+; GFX11-NEXT:    ret half [[ADDF_SUM_2]]
+;
 main_body:
   %tex = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f16.v8i32.v4i32(i32 1, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
   %tex_4_half = fptrunc <4 x float> %tex to <4 x half>
@@ -169,6 +396,18 @@ define amdgpu_ps half @load_1d(i16 %s, <8 x i32> inreg %rsrc) {
 ; GFX81PLUS-LABEL: @load_1d(
 ; GFX81PLUS-NEXT:    [[S_FLOAT:%.*]] = call half @llvm.amdgcn.image.load.1d.f16.i16.v8i32(i32 1, i16 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0)
 ; GFX81PLUS-NEXT:    ret half [[S_FLOAT]]
+;
+; GFX9-LABEL: @load_1d(
+; GFX9-NEXT:    [[S_FLOAT:%.*]] = call half @llvm.amdgcn.image.load.1d.f16.i16.v8i32(i32 1, i16 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0)
+; GFX9-NEXT:    ret half [[S_FLOAT]]
+;
+; GFX10PLUS-LABEL: @load_1d(
+; GFX10PLUS-NEXT:    [[...
[truncated]

Comment on lines 5 to 6
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All 4 of these support d16, so what's the point of testing all of them? These at least need to use a common check prefix since the output is identical to the 81PLUS case

Copy link
Contributor Author

@harrisonGPU harrisonGPU May 28, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I noticed that using update_test_checks.py produces a warning about using same check prefixes. I understand your point!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have updated it. Thanks!

@harrisonGPU harrisonGPU force-pushed the amdgpu/imageLitTest branch from 4a7fe89 to 6deaf23 Compare May 28, 2025 16:15
@harrisonGPU harrisonGPU requested a review from arsenm May 28, 2025 16:17
@harrisonGPU harrisonGPU force-pushed the amdgpu/imageLitTest branch from 6deaf23 to 62a2e1c Compare June 8, 2025 08:06
Copy link
Contributor

@shiltian shiltian left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What does "D16" mean?

@harrisonGPU
Copy link
Contributor Author

What does "D16" mean?

D16 stands for "Destination 16-bit". When enabled, image sample instructions return 16-bit values directly, which reduces VGPR usage and skips the need for v_cvt_f16_f32_e32.

@harrisonGPU harrisonGPU deleted the amdgpu/imageLitTest branch June 18, 2025 01:02
@arsenm
Copy link
Contributor

arsenm commented Jun 18, 2025

What does "D16" mean?

It's the name of a bit on MIMG instructions

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

backend:AMDGPU llvm:instcombine Covers the InstCombine, InstSimplify and AggressiveInstCombine passes llvm:transforms

Projects

None yet

Development

Successfully merging this pull request may close these issues.

4 participants