-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[AMDGPU] Update pal metadata for v3.6 and fix v3.0 #135196
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Update entry_point for all pal versions below 3.6 3.6 and above removes entry_point
|
@llvm/pr-subscribers-backend-amdgpu Author: David Stuttard (dstutt) ChangesUpdate entry_point for all pal versions below 3.6 Full diff: https://github.com/llvm/llvm-project/pull/135196.diff 12 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp
index cdd6cb8fdd6fc..247371f27d5ef 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp
@@ -259,13 +259,17 @@ void AMDGPUPALMetadata::setEntryPoint(unsigned CC, StringRef Name) {
getHwStage(CC)[".entry_point_symbol"] =
MsgPackDoc.getNode(Name, /*Copy=*/true);
- // Set .entry_point which is defined
- // to be _amdgpu_<stage> and _amdgpu_cs for non-shader functions
- SmallString<16> EPName("_amdgpu_");
- raw_svector_ostream EPNameOS(EPName);
- EPNameOS << getStageName(CC) + 1;
- getHwStage(CC)[".entry_point"] =
- MsgPackDoc.getNode(EPNameOS.str(), /*Copy=*/true);
+ // For pal version 3.6 and above, entry_point is no longer required
+ if (getPALMajorVersion() < 3 ||
+ (getPALMajorVersion() == 3 && getPALMinorVersion() < 6)) {
+ // Set .entry_point which is defined
+ // to be _amdgpu_<stage>_main and _amdgpu_cs_main for non-shader functions
+ SmallString<16> EPName("_amdgpu_");
+ raw_svector_ostream EPNameOS(EPName);
+ EPNameOS << getStageName(CC) + 1 << "_main";
+ getHwStage(CC)[".entry_point"] =
+ MsgPackDoc.getNode(EPNameOS.str(), /*Copy=*/true);
+ }
}
// Set the number of used vgprs in the metadata. This is an optional
diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-cs.ll b/llvm/test/CodeGen/AMDGPU/amdpal-cs.ll
index 96775f4763e34..d06f397c00d77 100644
--- a/llvm/test/CodeGen/AMDGPU/amdpal-cs.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdpal-cs.ll
@@ -8,7 +8,7 @@
; GCN-NEXT: amdpal.pipelines:
; GCN-NEXT: - .hardware_stages:
; GCN-NEXT: .cs:
-; GCN-NEXT: .entry_point: _amdgpu_cs
+; GCN-NEXT: .entry_point: _amdgpu_cs_main
; GCN-NEXT: .entry_point_symbol: cs_amdpal
; GCN-NEXT: .scratch_memory_size: 0
; GCN: .registers:
diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-es.ll b/llvm/test/CodeGen/AMDGPU/amdpal-es.ll
index 1379246c32573..fce918cb3e49a 100644
--- a/llvm/test/CodeGen/AMDGPU/amdpal-es.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdpal-es.ll
@@ -7,7 +7,7 @@
; GCN-NEXT: amdpal.pipelines:
; GCN-NEXT: - .hardware_stages:
; GCN-NEXT: .es:
-; GCN-NEXT: .entry_point: _amdgpu_es
+; GCN-NEXT: .entry_point: _amdgpu_es_main
; GCN-NEXT: .entry_point_symbol: es_amdpal
; GCN-NEXT: .scratch_memory_size: 0
; GCN: .registers:
diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-gs.ll b/llvm/test/CodeGen/AMDGPU/amdpal-gs.ll
index 1fba34a50094f..02a235345cee1 100644
--- a/llvm/test/CodeGen/AMDGPU/amdpal-gs.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdpal-gs.ll
@@ -8,7 +8,7 @@
; GCN-NEXT: amdpal.pipelines:
; GCN-NEXT: - .hardware_stages:
; GCN-NEXT: .gs:
-; GCN-NEXT: .entry_point: _amdgpu_gs
+; GCN-NEXT: .entry_point: _amdgpu_gs_main
; GCN-NEXT: .entry_point_symbol: gs_amdpal
; GCN-NEXT: .scratch_memory_size: 0
; GCN: .registers:
diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-ls.ll b/llvm/test/CodeGen/AMDGPU/amdpal-ls.ll
index ebe753134a42d..0897489231a53 100644
--- a/llvm/test/CodeGen/AMDGPU/amdpal-ls.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdpal-ls.ll
@@ -7,7 +7,7 @@
; GCN-NEXT: amdpal.pipelines:
; GCN-NEXT: - .hardware_stages:
; GCN-NEXT: .ls:
-; GCN-NEXT: .entry_point: _amdgpu_ls
+; GCN-NEXT: .entry_point: _amdgpu_ls_main
; GCN-NEXT: .entry_point_symbol: ls_amdpal
; GCN-NEXT: .scratch_memory_size: 0
; GCN: .registers:
diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-psenable.ll b/llvm/test/CodeGen/AMDGPU/amdpal-psenable.ll
index 32f19e2af32ec..a289e04ebcb0c 100644
--- a/llvm/test/CodeGen/AMDGPU/amdpal-psenable.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdpal-psenable.ll
@@ -11,7 +11,7 @@
; GCN-NEXT: amdpal.pipelines:
; GCN-NEXT: - .hardware_stages:
; GCN-NEXT: .ps:
-; GCN-NEXT: .entry_point: _amdgpu_ps
+; GCN-NEXT: .entry_point: _amdgpu_ps_main
; GCN-NEXT: .entry_point_symbol: amdpal_psenable
; GCN-NEXT: .scratch_memory_size: 0
; GCN: .registers:
diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-vs.ll b/llvm/test/CodeGen/AMDGPU/amdpal-vs.ll
index 853d221ee3aab..77456962a4ea0 100644
--- a/llvm/test/CodeGen/AMDGPU/amdpal-vs.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdpal-vs.ll
@@ -8,7 +8,7 @@
; GCN-NEXT: amdpal.pipelines:
; GCN-NEXT: - .hardware_stages:
; GCN-NEXT: .vs:
-; GCN-NEXT: .entry_point: _amdgpu_vs
+; GCN-NEXT: .entry_point: _amdgpu_vs_main
; GCN-NEXT: .entry_point_symbol: vs_amdpal
; GCN-NEXT: .scratch_memory_size: 0
; GCN: .registers:
diff --git a/llvm/test/CodeGen/AMDGPU/amdpal.ll b/llvm/test/CodeGen/AMDGPU/amdpal.ll
index fd9227d2f4319..2e47b0163aa8c 100644
--- a/llvm/test/CodeGen/AMDGPU/amdpal.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdpal.ll
@@ -86,7 +86,7 @@ declare void @llvm.amdgcn.raw.ptr.buffer.store.f32(float, ptr addrspace(8), i32,
; PAL-NEXT: amdpal.pipelines:
; PAL-NEXT: - .hardware_stages:
; PAL-NEXT: .cs:
-; PAL-NEXT: .entry_point: _amdgpu_cs
+; PAL-NEXT: .entry_point: _amdgpu_cs_main
; PAL-NEXT: .entry_point_symbol: scratch2_cs
; PAL-NEXT: .scratch_memory_size: 0x10
; PAL-NEXT: .sgpr_count: 0x
diff --git a/llvm/test/CodeGen/AMDGPU/elf-notes.ll b/llvm/test/CodeGen/AMDGPU/elf-notes.ll
index b205678bd9085..22d699a8f4809 100644
--- a/llvm/test/CodeGen/AMDGPU/elf-notes.ll
+++ b/llvm/test/CodeGen/AMDGPU/elf-notes.ll
@@ -66,7 +66,7 @@
; OSABI-PAL-ELF: amdpal.pipelines:
; OSABI-PAL-ELF: - .hardware_stages:
; OSABI-PAL-ELF: .cs:
-; OSABI-PAL-ELF: .entry_point: _amdgpu_cs
+; OSABI-PAL-ELF: .entry_point: _amdgpu_cs_main
; OSABI-PAL-ELF: .entry_point_symbol: elf_notes
; OSABI-PAL-ELF: .scratch_memory_size: 0
; OSABI-PAL-ELF: .sgpr_count: 96
diff --git a/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0.ll b/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0.ll
index 5748f6b188acf..01d0ca6767d5e 100644
--- a/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0.ll
+++ b/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0.ll
@@ -58,7 +58,7 @@
; CHECK-NEXT: .checksum_value: 0x9444d7d0
; CHECK-NEXT: .debug_mode: false
; DVGPR-NEXT: .dynamic_vgpr_saved_count: 0x70
-; CHECK-NEXT: .entry_point: _amdgpu_cs
+; CHECK-NEXT: .entry_point: _amdgpu_cs_main
; CHECK-NEXT: .entry_point_symbol: _amdgpu_cs_main
; CHECK-NEXT: .excp_en: 0
; CHECK-NEXT: .float_mode: 0xc0
@@ -116,7 +116,7 @@
; CHECK-NEXT: .wgp_mode: false
; CHECK-NEXT: .gs:
; CHECK-NEXT: .debug_mode: false
-; CHECK-NEXT: .entry_point: _amdgpu_gs
+; CHECK-NEXT: .entry_point: _amdgpu_gs_main
; CHECK-NEXT: .entry_point_symbol: gs_shader
; GFX11-NEXT: .ieee_mode: false
; CHECK-NEXT: .lds_size: 0x200
@@ -128,7 +128,7 @@
; CHECK-NEXT: .wgp_mode: true
; CHECK-NEXT: .hs:
; CHECK-NEXT: .debug_mode: false
-; CHECK-NEXT: .entry_point: _amdgpu_hs
+; CHECK-NEXT: .entry_point: _amdgpu_hs_main
; CHECK-NEXT: .entry_point_symbol: hs_shader
; GFX11-NEXT: .ieee_mode: false
; CHECK-NEXT: .lds_size: 0x1000
@@ -140,7 +140,7 @@
; CHECK-NEXT: .wgp_mode: true
; CHECK-NEXT: .ps:
; CHECK-NEXT: .debug_mode: false
-; CHECK-NEXT: .entry_point: _amdgpu_ps
+; CHECK-NEXT: .entry_point: _amdgpu_ps_main
; CHECK-NEXT: .entry_point_symbol: ps_shader
; GFX11-NEXT: .ieee_mode: false
; CHECK-NEXT: .lds_size: 0
diff --git a/llvm/test/CodeGen/AMDGPU/pal-metadata-3.6.ll b/llvm/test/CodeGen/AMDGPU/pal-metadata-3.6.ll
new file mode 100644
index 0000000000000..baff6660f42cd
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/pal-metadata-3.6.ll
@@ -0,0 +1,213 @@
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 <%s | FileCheck %s --check-prefixes=CHECK,GFX11,NODVGPR
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1200 <%s | FileCheck %s --check-prefixes=CHECK,NODVGPR
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1200 -mattr=+dynamic-vgpr <%s | FileCheck %s --check-prefixes=CHECK,DVGPR
+
+; CHECK-LABEL: {{^}}_amdgpu_cs_main:
+; NODVGPR: ; TotalNumSgprs: 4
+; DVGPR: ; TotalNumSgprs: 34
+; CHECK: ; NumVgprs: 2
+; CHECK: .amdgpu_pal_metadata
+; CHECK-NEXT: ---
+; CHECK-NEXT: amdpal.pipelines:
+; CHECK-NEXT: - .api: Vulkan
+; CHECK-NEXT: .compute_registers:
+; DVGPR-NEXT: .dynamic_vgpr_en: true
+; CHECK-NEXT: .tg_size_en: true
+; CHECK-NEXT: .tgid_x_en: false
+; CHECK-NEXT: .tgid_y_en: false
+; CHECK-NEXT: .tgid_z_en: false
+; CHECK-NEXT: .tidig_comp_cnt: 0x1
+; CHECK-NEXT: .graphics_registers:
+; CHECK-NEXT: .ps_extra_lds_size: 0
+; CHECK-NEXT: .spi_ps_input_addr:
+; CHECK-NEXT: .ancillary_ena: false
+; CHECK-NEXT: .front_face_ena: true
+; CHECK-NEXT: .line_stipple_tex_ena: false
+; CHECK-NEXT: .linear_center_ena: true
+; CHECK-NEXT: .linear_centroid_ena: true
+; CHECK-NEXT: .linear_sample_ena: true
+; CHECK-NEXT: .persp_center_ena: true
+; CHECK-NEXT: .persp_centroid_ena: true
+; CHECK-NEXT: .persp_pull_model_ena: false
+; CHECK-NEXT: .persp_sample_ena: true
+; CHECK-NEXT: .pos_fixed_pt_ena: true
+; CHECK-NEXT: .pos_w_float_ena: false
+; CHECK-NEXT: .pos_x_float_ena: false
+; CHECK-NEXT: .pos_y_float_ena: false
+; CHECK-NEXT: .pos_z_float_ena: false
+; CHECK-NEXT: .sample_coverage_ena: false
+; CHECK-NEXT: .spi_ps_input_ena:
+; CHECK-NEXT: .ancillary_ena: false
+; CHECK-NEXT: .front_face_ena: false
+; CHECK-NEXT: .line_stipple_tex_ena: false
+; CHECK-NEXT: .linear_center_ena: false
+; CHECK-NEXT: .linear_centroid_ena: false
+; CHECK-NEXT: .linear_sample_ena: false
+; CHECK-NEXT: .persp_center_ena: false
+; CHECK-NEXT: .persp_centroid_ena: false
+; CHECK-NEXT: .persp_pull_model_ena: false
+; CHECK-NEXT: .persp_sample_ena: true
+; CHECK-NEXT: .pos_fixed_pt_ena: false
+; CHECK-NEXT: .pos_w_float_ena: false
+; CHECK-NEXT: .pos_x_float_ena: false
+; CHECK-NEXT: .pos_y_float_ena: false
+; CHECK-NEXT: .pos_z_float_ena: false
+; CHECK-NEXT: .sample_coverage_ena: false
+; CHECK-NEXT: .hardware_stages:
+; CHECK-NEXT: .cs:
+; CHECK-NEXT: .checksum_value: 0x9444d7d0
+; CHECK-NEXT: .debug_mode: false
+; DVGPR-NEXT: .dynamic_vgpr_saved_count: 0x70
+; CHECK-NOT: .entry_point: _amdgpu_cs_main
+; CHECK-NEXT: .entry_point_symbol: _amdgpu_cs_main
+; CHECK-NEXT: .excp_en: 0
+; CHECK-NEXT: .float_mode: 0xc0
+; GFX11-NEXT: .ieee_mode: false
+; CHECK-NEXT: .image_op: false
+; CHECK-NEXT: .lds_size: 0
+; CHECK-NEXT: .mem_ordered: true
+; CHECK-NEXT: .scratch_en: false
+; CHECK-NEXT: .scratch_memory_size: 0
+; NODVGPR-NEXT: .sgpr_count: 0x4
+; DVGPR-NEXT: .sgpr_count: 0x22
+; CHECK-NEXT: .sgpr_limit: 0x6a
+; CHECK-NEXT: .threadgroup_dimensions:
+; CHECK-NEXT: - 0x1
+; CHECK-NEXT: - 0x400
+; CHECK-NEXT: - 0x1
+; CHECK-NEXT: .trap_present: false
+; CHECK-NEXT: .user_data_reg_map:
+; CHECK-NEXT: - 0x10000000
+; CHECK-NEXT: - 0xffffffff
+; CHECK-NEXT: - 0
+; CHECK-NEXT: - 0xffffffff
+; CHECK-NEXT: - 0xffffffff
+; CHECK-NEXT: - 0xffffffff
+; CHECK-NEXT: - 0xffffffff
+; CHECK-NEXT: - 0xffffffff
+; CHECK-NEXT: - 0xffffffff
+; CHECK-NEXT: - 0xffffffff
+; CHECK-NEXT: - 0xffffffff
+; CHECK-NEXT: - 0xffffffff
+; CHECK-NEXT: - 0xffffffff
+; CHECK-NEXT: - 0xffffffff
+; CHECK-NEXT: - 0xffffffff
+; CHECK-NEXT: - 0xffffffff
+; CHECK-NEXT: - 0xffffffff
+; CHECK-NEXT: - 0xffffffff
+; CHECK-NEXT: - 0xffffffff
+; CHECK-NEXT: - 0xffffffff
+; CHECK-NEXT: - 0xffffffff
+; CHECK-NEXT: - 0xffffffff
+; CHECK-NEXT: - 0xffffffff
+; CHECK-NEXT: - 0xffffffff
+; CHECK-NEXT: - 0xffffffff
+; CHECK-NEXT: - 0xffffffff
+; CHECK-NEXT: - 0xffffffff
+; CHECK-NEXT: - 0xffffffff
+; CHECK-NEXT: - 0xffffffff
+; CHECK-NEXT: - 0xffffffff
+; CHECK-NEXT: - 0xffffffff
+; CHECK-NEXT: - 0xffffffff
+; CHECK-NEXT: .user_sgprs: 0x3
+; CHECK-NEXT: .vgpr_count: 0x2
+; CHECK-NEXT: .vgpr_limit: 0x100
+; CHECK-NEXT: .wavefront_size: 0x40
+; CHECK-NEXT: .wgp_mode: false
+; CHECK-NEXT: .gs:
+; CHECK-NEXT: .debug_mode: false
+; CHECK-NOT: .entry_point: _amdgpu_gs_main
+; CHECK-NEXT: .entry_point_symbol: gs_shader
+; GFX11-NEXT: .ieee_mode: false
+; CHECK-NEXT: .lds_size: 0x200
+; CHECK-NEXT: .mem_ordered: true
+; CHECK-NEXT: .scratch_en: false
+; CHECK-NEXT: .scratch_memory_size: 0
+; CHECK-NEXT: .sgpr_count: 0x1
+; CHECK-NEXT: .vgpr_count: 0x1
+; CHECK-NEXT: .wgp_mode: true
+; CHECK-NEXT: .hs:
+; CHECK-NEXT: .debug_mode: false
+; CHECK-NOT: .entry_point: _amdgpu_hs_main
+; CHECK-NEXT: .entry_point_symbol: hs_shader
+; GFX11-NEXT: .ieee_mode: false
+; CHECK-NEXT: .lds_size: 0x1000
+; CHECK-NEXT: .mem_ordered: true
+; CHECK-NEXT: .scratch_en: false
+; CHECK-NEXT: .scratch_memory_size: 0
+; CHECK-NEXT: .sgpr_count: 0x1
+; CHECK-NEXT: .vgpr_count: 0x1
+; CHECK-NEXT: .wgp_mode: true
+; CHECK-NEXT: .ps:
+; CHECK-NEXT: .debug_mode: false
+; CHECK-NOT: .entry_point: _amdgpu_ps_main
+; CHECK-NEXT: .entry_point_symbol: ps_shader
+; GFX11-NEXT: .ieee_mode: false
+; CHECK-NEXT: .lds_size: 0
+; CHECK-NEXT: .mem_ordered: true
+; CHECK-NEXT: .scratch_en: false
+; CHECK-NEXT: .scratch_memory_size: 0
+; CHECK-NEXT: .sgpr_count: 0x1
+; CHECK-NEXT: .vgpr_count: 0x1
+; CHECK-NEXT: .wgp_mode: true
+; CHECK: .registers: {}
+; CHECK:amdpal.version:
+; CHECK-NEXT: - 0x3
+; CHECK-NEXT: - 0x6
+; CHECK-NEXT:...
+; CHECK-NEXT: .end_amdgpu_pal_metadata
+
+define dllexport amdgpu_cs void @_amdgpu_cs_main(i32 inreg %arg1, i32 %arg2) #0 !lgc.shaderstage !1 {
+.entry:
+ %i = call i64 @llvm.amdgcn.s.getpc()
+ %i1 = and i64 %i, -4294967296
+ %i2 = zext i32 %arg1 to i64
+ %i3 = or i64 %i1, %i2
+ %i4 = inttoptr i64 %i3 to ptr addrspace(4)
+ %i5 = and i32 %arg2, 1023
+ %i6 = lshr i32 %arg2, 10
+ %i7 = and i32 %i6, 1023
+ %i8 = add nuw nsw i32 %i7, %i5
+ %i9 = load <4 x i32>, ptr addrspace(4) %i4, align 16
+ %.idx = shl nuw nsw i32 %i8, 2
+ call void @llvm.amdgcn.raw.buffer.store.i32(i32 1, <4 x i32> %i9, i32 %.idx, i32 0, i32 0)
+ ret void
+}
+
+define dllexport amdgpu_ps void @ps_shader() #1 {
+ ret void
+}
+
+@LDS.GS = external addrspace(3) global [1 x i32], align 4
+
+define dllexport amdgpu_gs void @gs_shader() #2 {
+ %ptr = getelementptr i32, ptr addrspace(3) @LDS.GS, i32 0
+ store i32 0, ptr addrspace(3) %ptr, align 4
+ ret void
+}
+
+@LDS.HS = external addrspace(3) global [1024 x i32], align 4
+
+define dllexport amdgpu_hs void @hs_shader() #2 {
+ %ptr = getelementptr i32, ptr addrspace(3) @LDS.HS, i32 0
+ store i32 0, ptr addrspace(3) %ptr, align 4
+ ret void
+}
+
+!amdgpu.pal.metadata.msgpack = !{!0}
+
+; Function Attrs: nounwind willreturn memory(none)
+declare ptr addrspace(7) @lgc.buffer.desc.to.ptr(<4 x i32>) #1
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare i64 @llvm.amdgcn.s.getpc() #2
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write)
+declare void @llvm.amdgcn.raw.buffer.store.i32(i32, <4 x i32>, i32, i32, i32 immarg) #3
+
+attributes #0 = { nounwind memory(readwrite) "amdgpu-flat-work-group-size"="1024,1024" "amdgpu-memory-bound"="false" "amdgpu-unroll-threshold"="700" "amdgpu-wave-limiter"="false" "amdgpu-work-group-info-arg-no"="4" "denormal-fp-math-f32"="preserve-sign" "target-features"=",+wavefrontsize64,+cumode" }
+
+attributes #1 = { nounwind memory(readwrite) "InitialPSInputAddr"="36983" }
+
+!0 = !{!"\82\B0amdpal.pipelines\91\8A\A4.api\A6Vulkan\B2.compute_registers\85\AB.tg_size_en\C3\AA.tgid_x_en\C2\AA.tgid_y_en\C2\AA.tgid_z_en\C2\AF.tidig_comp_cnt\01\B0.hardware_stages\81\A3.cs\8C\AF.checksum_value\CE\94D\D7\D0\AB.debug_mode\00\AB.float_mode\CC\C0\A9.image_op\C2\AC.mem_ordered\C3\AB.sgpr_limitj\B7.threadgroup_dimensions\93\01\CD\04\00\01\AD.trap_present\00\B2.user_data_reg_map\DC\00 \CE\10\00\00\00\CE\FF\FF\FF\FF\00\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\AB.user_sgprs\03\AB.vgpr_limit\CD\01\00\AF.wavefront_size@\B7.internal_pipeline_hash\92\CF\E7\10k\A6:\A6%\F7\CF\B2\1F\1A\D4{\DA\E1T\AA.registers\80\A8.shaders\81\A8.compute\82\B0.api_shader_hash\92\CF\E9Zn7}\1E\B9\E7\00\B1.hardware_mapping\91\A3.cs\B0.spill_threshold\CE\FF\FF\FF\FF\A5.type\A2Cs\B0.user_data_limit\01\AF.xgl_cache_info\82\B3.128_bit_cache_hash\92\CF\B4X\B8\11[\A4\88P\CF\A0;\B0\AF\FF\B4\BE\C0\AD.llpc_version\A461.1\AEamdpal.version\92\03\06"}
+!1 = !{i32 7}
diff --git a/llvm/test/CodeGen/AMDGPU/wave_dispatch_regs.ll b/llvm/test/CodeGen/AMDGPU/wave_dispatch_regs.ll
index fdc1e6abb051f..a798dc158a8f4 100644
--- a/llvm/test/CodeGen/AMDGPU/wave_dispatch_regs.ll
+++ b/llvm/test/CodeGen/AMDGPU/wave_dispatch_regs.ll
@@ -12,7 +12,7 @@
; GCN-NEXT: amdpal.pipelines:
; GCN-NEXT: - .hardware_stages:
; GCN-NEXT: .cs:
-; GCN-NEXT: .entry_point: _amdgpu_cs
+; GCN-NEXT: .entry_point: _amdgpu_cs_main
; GCN-NEXT: .entry_point_symbol: _amdgpu_cs_main
; GCN-NEXT: .scratch_memory_size: 0
; SI-NEXT: .sgpr_count: 0x11
|
|
ping |
jayfoad
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So this is a bug fix for versions < 3.6, as well as a removal for version 3.6? The subject line makes it sound like you only changed 3.6.
I'll make that change (and the full stops) in the final commit. |
|
|
||
| attributes #1 = { nounwind memory(readwrite) "InitialPSInputAddr"="36983" } | ||
|
|
||
| !0 = !{!"\82\B0amdpal.pipelines\91\8A\A4.api\A6Vulkan\B2.compute_registers\85\AB.tg_size_en\C3\AA.tgid_x_en\C2\AA.tgid_y_en\C2\AA.tgid_z_en\C2\AF.tidig_comp_cnt\01\B0.hardware_stages\81\A3.cs\8C\AF.checksum_value\CE\94D\D7\D0\AB.debug_mode\00\AB.float_mode\CC\C0\A9.image_op\C2\AC.mem_ordered\C3\AB.sgpr_limitj\B7.threadgroup_dimensions\93\01\CD\04\00\01\AD.trap_present\00\B2.user_data_reg_map\DC\00 \CE\10\00\00\00\CE\FF\FF\FF\FF\00\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\AB.user_sgprs\03\AB.vgpr_limit\CD\01\00\AF.wavefront_size@\B7.internal_pipeline_hash\92\CF\E7\10k\A6:\A6%\F7\CF\B2\1F\1A\D4{\DA\E1T\AA.registers\80\A8.shaders\81\A8.compute\82\B0.api_shader_hash\92\CF\E9Zn7}\1E\B9\E7\00\B1.hardware_mapping\91\A3.cs\B0.spill_threshold\CE\FF\FF\FF\FF\A5.type\A2Cs\B0.user_data_limit\01\AF.xgl_cache_info\82\B3.128_bit_cache_hash\92\CF\B4X\B8\11[\A4\88P\CF\A0;\B0\AF\FF\B4\BE\C0\AD.llpc_version\A461.1\AEamdpal.version\92\03\06"} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This can be reduced to: !0 = !{!"\81\AEamdpal.version\92\03\06"}
In future I wonder if we should have a command line argument as a more readable way of specifying what PAL metadata version we want the compiler to generate? Also I wonder if it should always default to generating the latest supported version?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd prefer the leave the metadata as-is for this test. It's more representative of what the front end would pass in.
Update entry_point for all pal versions below 3.6. 3.6 and above removes entry_point.
Update entry_point for all pal versions below 3.6
3.6 and above removes entry_point