Skip to content

Commit b6a35a6

Browse files
Merge branch 'main' into HotFixDWARF
2 parents b47cd7a + b3b36d3 commit b6a35a6

File tree

15 files changed

+332
-3
lines changed

15 files changed

+332
-3
lines changed

compiler-rt/lib/memprof/memprof_interface_internal.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,15 @@ SANITIZER_INTERFACE_ATTRIBUTE void __memprof_version_mismatch_check_v1();
3535
SANITIZER_INTERFACE_ATTRIBUTE
3636
void __memprof_record_access(void const volatile *addr);
3737

38+
SANITIZER_INTERFACE_ATTRIBUTE
39+
void __memprof_record_access_hist(void const volatile *addr);
40+
3841
SANITIZER_INTERFACE_ATTRIBUTE
3942
void __memprof_record_access_range(void const volatile *addr, uptr size);
4043

44+
SANITIZER_INTERFACE_ATTRIBUTE
45+
void __memprof_record_access_range_hist(void const volatile *addr, uptr size);
46+
4147
SANITIZER_INTERFACE_ATTRIBUTE void __memprof_print_accumulated_stats();
4248

4349
SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE extern char
@@ -51,6 +57,10 @@ extern uptr __memprof_shadow_memory_dynamic_address;
5157

5258
SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE extern char
5359
__memprof_profile_filename[1];
60+
61+
SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE extern bool
62+
__memprof_histogram;
63+
5464
SANITIZER_INTERFACE_ATTRIBUTE int __memprof_profile_dump();
5565
SANITIZER_INTERFACE_ATTRIBUTE void __memprof_profile_reset();
5666

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
// Test the histogram support in memprof using the text format output.
2+
// Shadow memory counters per object are limited to 8b. In memory counters
3+
// aggregating counts across multiple objects are 64b.
4+
5+
// RUN: %clangxx_memprof -O0 -mllvm -memprof-histogram -mllvm -memprof-use-callbacks=true %s -o %t
6+
// RUN: %env_memprof_opts=print_text=1:histogram=1:log_path=stdout %run %t 2>&1 | FileCheck %s
7+
8+
#include <stdio.h>
9+
#include <stdlib.h>
10+
11+
int main() {
12+
// Allocate memory that will create a histogram
13+
char *buffer = (char *)malloc(1024);
14+
if (!buffer)
15+
return 1;
16+
17+
for (int i = 0; i < 10; ++i) {
18+
// Access every 8th byte (since shadow granularity is 8b.
19+
buffer[i * 8] = 'A';
20+
}
21+
22+
for (int j = 0; j < 200; ++j) {
23+
buffer[8] = 'B'; // Count = previous count + 200
24+
}
25+
26+
for (int j = 0; j < 400; ++j) {
27+
buffer[16] = 'B'; // Count is saturated at 255
28+
}
29+
30+
// Free the memory to trigger MIB creation with histogram
31+
free(buffer);
32+
33+
printf("Test completed successfully\n");
34+
return 0;
35+
}
36+
37+
// CHECK: AccessCountHistogram[128]: 1 201 255 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
38+
// CHECK: Test completed successfully

lldb/source/Plugins/Instruction/RISCV/EmulateInstructionRISCV.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1807,7 +1807,7 @@ RISCVSingleStepBreakpointLocationsPredictor::GetBreakpointLocations(
18071807
Log *log = GetLog(LLDBLog::Unwind);
18081808
LLDB_LOGF(log,
18091809
"RISCVSingleStepBreakpointLocationsPredictor::%s: can't find "
1810-
"corresponding load reserve insturuction",
1810+
"corresponding load reserve instruction",
18111811
__FUNCTION__);
18121812
return {*pc + (inst->is_rvc ? 2u : 4u)};
18131813
}
@@ -1839,7 +1839,7 @@ RISCVSingleStepBreakpointLocationsPredictor::HandleAtomicSequence(
18391839
EmulateInstructionRISCV *riscv_emulator =
18401840
static_cast<EmulateInstructionRISCV *>(m_emulator_up.get());
18411841

1842-
// Handle instructions between LR and SC. According to unprivilleged
1842+
// Handle instructions between LR and SC. According to unprivileged
18431843
// RISC-V ISA there can be at most 16 instructions in the sequence.
18441844

18451845
lldb::addr_t entry_pc = pc; // LR instruction address
@@ -1872,7 +1872,7 @@ RISCVSingleStepBreakpointLocationsPredictor::HandleAtomicSequence(
18721872
Log *log = GetLog(LLDBLog::Unwind);
18731873
LLDB_LOGF(log,
18741874
"RISCVSingleStepBreakpointLocationsPredictor::%s: can't find "
1875-
"corresponding store conditional insturuction",
1875+
"corresponding store conditional instruction",
18761876
__FUNCTION__);
18771877
return {entry_pc + (lr_inst->is_rvc ? 2u : 4u)};
18781878
}

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2015,6 +2015,8 @@ let AssemblerPredicate = isGFX11Plus in {
20152015

20162016
// These instructions differ from GFX12 variant by supporting DPP:
20172017
defm V_LSHL_ADD_U64 : VOP3Only_Realtriple_gfx1250<0x252>;
2018+
defm V_ASHR_PK_I8_I32 : VOP3Only_Realtriple_gfx1250<0x290>;
2019+
defm V_ASHR_PK_U8_I32 : VOP3Only_Realtriple_gfx1250<0x291>;
20182020
defm V_CVT_PK_BF16_F32 : VOP3Only_Realtriple_gfx1250<0x36d>;
20192021
defm V_CVT_SR_PK_BF16_F32 : VOP3Only_Realtriple_gfx1250<0x36e>;
20202022

llvm/test/CodeGen/AMDGPU/v_ashr_pk.ll

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
22
; RUN: llc -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX950 %s
3+
; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250 %s
34
define amdgpu_kernel void @v_ashr_pk_i8_i32(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #0 {
45
; GFX950-LABEL: v_ashr_pk_i8_i32:
56
; GFX950: ; %bb.0:
@@ -13,6 +14,20 @@ define amdgpu_kernel void @v_ashr_pk_i8_i32(ptr addrspace(1) %out, i32 %src0, i3
1314
; GFX950-NEXT: v_ashr_pk_i8_i32 v1, s0, v1, v2
1415
; GFX950-NEXT: global_store_short v0, v1, s[6:7]
1516
; GFX950-NEXT: s_endpgm
17+
;
18+
; GFX1250-LABEL: v_ashr_pk_i8_i32:
19+
; GFX1250: ; %bb.0:
20+
; GFX1250-NEXT: s_load_b96 s[0:2], s[4:5], 0x2c
21+
; GFX1250-NEXT: s_wait_xcnt 0x0
22+
; GFX1250-NEXT: s_load_b64 s[4:5], s[4:5], 0x24
23+
; GFX1250-NEXT: v_mov_b32_e32 v1, 0
24+
; GFX1250-NEXT: s_wait_kmcnt 0x0
25+
; GFX1250-NEXT: s_and_b32 s2, s2, 31
26+
; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
27+
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
28+
; GFX1250-NEXT: v_ashr_pk_i8_i32 v0, s0, s1, v0
29+
; GFX1250-NEXT: global_store_b16 v1, v0, s[4:5]
30+
; GFX1250-NEXT: s_endpgm
1631
%insert.0 = insertelement <2 x i32> poison, i32 %src0, i64 0
1732
%build_vector = insertelement <2 x i32> %insert.0, i32 %src1, i64 1
1833
%src2.clamp = and i32 %src2, 31
@@ -40,6 +55,20 @@ define amdgpu_kernel void @v_ashr_pk_u8_i32(ptr addrspace(1) %out, i32 %src0, i3
4055
; GFX950-NEXT: v_ashr_pk_u8_i32 v1, s0, v1, v2
4156
; GFX950-NEXT: global_store_short v0, v1, s[6:7]
4257
; GFX950-NEXT: s_endpgm
58+
;
59+
; GFX1250-LABEL: v_ashr_pk_u8_i32:
60+
; GFX1250: ; %bb.0:
61+
; GFX1250-NEXT: s_load_b96 s[0:2], s[4:5], 0x2c
62+
; GFX1250-NEXT: s_wait_xcnt 0x0
63+
; GFX1250-NEXT: s_load_b64 s[4:5], s[4:5], 0x24
64+
; GFX1250-NEXT: v_mov_b32_e32 v1, 0
65+
; GFX1250-NEXT: s_wait_kmcnt 0x0
66+
; GFX1250-NEXT: s_and_b32 s2, s2, 31
67+
; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
68+
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
69+
; GFX1250-NEXT: v_ashr_pk_u8_i32 v0, s0, s1, v0
70+
; GFX1250-NEXT: global_store_b16 v1, v0, s[4:5]
71+
; GFX1250-NEXT: s_endpgm
4372
%insert.0 = insertelement <2 x i32> poison, i32 %src0, i64 0
4473
%build_vector = insertelement <2 x i32> %insert.0, i32 %src1, i64 1
4574
%src2.clamp = and i32 %src2, 31

llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -366,3 +366,39 @@ v_cvt_sr_pk_bf16_f32 v5, -src_scc, |vcc_lo|, -1 mul:4
366366

367367
v_cvt_sr_pk_bf16_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2
368368
// GFX1250: v_cvt_sr_pk_bf16_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x6e,0xd7,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf]
369+
370+
v_ashr_pk_i8_i32 v2, s4, v7, v8
371+
// GFX1250: v_ashr_pk_i8_i32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x90,0xd6,0x04,0x0e,0x22,0x04]
372+
373+
v_ashr_pk_i8_i32 v2, v4, 0, 1
374+
// GFX1250: v_ashr_pk_i8_i32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x90,0xd6,0x04,0x01,0x05,0x02]
375+
376+
v_ashr_pk_i8_i32 v2, v4, 3, s2
377+
// GFX1250: v_ashr_pk_i8_i32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x90,0xd6,0x04,0x07,0x09,0x00]
378+
379+
v_ashr_pk_i8_i32 v2, s4, 4, v2
380+
// GFX1250: v_ashr_pk_i8_i32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x90,0xd6,0x04,0x08,0x09,0x04]
381+
382+
v_ashr_pk_i8_i32 v2, v4, v7, 12345
383+
// GFX1250: v_ashr_pk_i8_i32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x90,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
384+
385+
v_ashr_pk_i8_i32 v1, v2, v3, v4 op_sel:[0,0,0,1]
386+
// GFX1250: v_ashr_pk_i8_i32 v1, v2, v3, v4 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x90,0xd6,0x02,0x07,0x12,0x04]
387+
388+
v_ashr_pk_u8_i32 v2, s4, v7, v8
389+
// GFX1250: v_ashr_pk_u8_i32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x91,0xd6,0x04,0x0e,0x22,0x04]
390+
391+
v_ashr_pk_u8_i32 v2, v4, 0, 1
392+
// GFX1250: v_ashr_pk_u8_i32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x91,0xd6,0x04,0x01,0x05,0x02]
393+
394+
v_ashr_pk_u8_i32 v2, v4, 3, s2
395+
// GFX1250: v_ashr_pk_u8_i32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x91,0xd6,0x04,0x07,0x09,0x00]
396+
397+
v_ashr_pk_u8_i32 v2, s4, 4, v2
398+
// GFX1250: v_ashr_pk_u8_i32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x91,0xd6,0x04,0x08,0x09,0x04]
399+
400+
v_ashr_pk_u8_i32 v2, v4, v7, 12345
401+
// GFX1250: v_ashr_pk_u8_i32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x91,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
402+
403+
v_ashr_pk_u8_i32 v1, v2, v3, v4 op_sel:[0,0,0,1]
404+
// GFX1250: v_ashr_pk_u8_i32 v1, v2, v3, v4 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x91,0xd6,0x02,0x07,0x12,0x04]

llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -366,3 +366,39 @@ v_cvt_sr_pk_bf16_f32 v5, -src_scc, |vcc_lo|, -1 mul:4
366366

367367
v_cvt_sr_pk_bf16_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2
368368
// GFX1250: v_cvt_sr_pk_bf16_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x6e,0xd7,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf]
369+
370+
v_ashr_pk_i8_i32 v2, s4, v7, v8
371+
// GFX1250: v_ashr_pk_i8_i32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x90,0xd6,0x04,0x0e,0x22,0x04]
372+
373+
v_ashr_pk_i8_i32 v2, v4, 0, 1
374+
// GFX1250: v_ashr_pk_i8_i32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x90,0xd6,0x04,0x01,0x05,0x02]
375+
376+
v_ashr_pk_i8_i32 v2, v4, 3, s2
377+
// GFX1250: v_ashr_pk_i8_i32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x90,0xd6,0x04,0x07,0x09,0x00]
378+
379+
v_ashr_pk_i8_i32 v2, s4, 4, v2
380+
// GFX1250: v_ashr_pk_i8_i32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x90,0xd6,0x04,0x08,0x09,0x04]
381+
382+
v_ashr_pk_i8_i32 v2, v4, v7, 12345
383+
// GFX1250: v_ashr_pk_i8_i32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x90,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
384+
385+
v_ashr_pk_i8_i32 v1, v2, v3, v4 op_sel:[0,0,0,1]
386+
// GFX1250: v_ashr_pk_i8_i32 v1, v2, v3, v4 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x90,0xd6,0x02,0x07,0x12,0x04]
387+
388+
v_ashr_pk_u8_i32 v2, s4, v7, v8
389+
// GFX1250: v_ashr_pk_u8_i32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x91,0xd6,0x04,0x0e,0x22,0x04]
390+
391+
v_ashr_pk_u8_i32 v2, v4, 0, 1
392+
// GFX1250: v_ashr_pk_u8_i32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x91,0xd6,0x04,0x01,0x05,0x02]
393+
394+
v_ashr_pk_u8_i32 v2, v4, 3, s2
395+
// GFX1250: v_ashr_pk_u8_i32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x91,0xd6,0x04,0x07,0x09,0x00]
396+
397+
v_ashr_pk_u8_i32 v2, s4, 4, v2
398+
// GFX1250: v_ashr_pk_u8_i32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x91,0xd6,0x04,0x08,0x09,0x04]
399+
400+
v_ashr_pk_u8_i32 v2, v4, v7, 12345
401+
// GFX1250: v_ashr_pk_u8_i32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x91,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
402+
403+
v_ashr_pk_u8_i32 v1, v2, v3, v4 op_sel:[0,0,0,1]
404+
// GFX1250: v_ashr_pk_u8_i32 v1, v2, v3, v4 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x91,0xd6,0x02,0x07,0x12,0x04]

llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16-fake16.s

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -297,3 +297,35 @@ v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, -|v2|, 5 mul:4 row_xmask:0 row_mask:0x1 ban
297297
v_cvt_sr_pk_bf16_f32_e64_dpp v255, -|v255|, -|v255|, src_scc clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
298298
// GFX1250: v_cvt_sr_pk_bf16_f32_e64_dpp v255, -|v255|, -|v255|, src_scc clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x83,0x6e,0xd7,0xfa,0xfe,0xf7,0x7b,0xff,0x6f,0x05,0x30]
299299
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
300+
301+
v_ashr_pk_i8_i32 v2, v4, v7, v8 quad_perm:[1,2,3,1]
302+
// GFX1250: v_ashr_pk_i8_i32_e64_dpp v2, v4, v7, v8 quad_perm:[1,2,3,1] row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x90,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x79,0x00,0xff]
303+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
304+
305+
v_ashr_pk_i8_i32 v2, v4, v7, v8 row_share:3 fi:1
306+
// GFX1250: v_ashr_pk_i8_i32_e64_dpp v2, v4, v7, v8 row_share:3 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x02,0x00,0x90,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x53,0x05,0xff]
307+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
308+
309+
v_ashr_pk_i8_i32 v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf
310+
// GFX1250: v_ashr_pk_i8_i32_e64_dpp v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x90,0xd6,0xfa,0x0e,0x06,0x02,0x04,0x50,0x01,0xff]
311+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
312+
313+
v_ashr_pk_i8_i32 v2, v4, v7, 1 op_sel:[0,0,0,1] row_share:0 row_mask:0x5 bank_mask:0x3
314+
// GFX1250: v_ashr_pk_i8_i32_e64_dpp v2, v4, v7, 1 op_sel:[0,0,0,1] row_share:0 row_mask:0x5 bank_mask:0x3 ; encoding: [0x02,0x40,0x90,0xd6,0xfa,0x0e,0x06,0x02,0x04,0x50,0x01,0x53]
315+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
316+
317+
v_ashr_pk_u8_i32 v2, v4, v7, v8 quad_perm:[1,2,3,1]
318+
// GFX1250: v_ashr_pk_u8_i32_e64_dpp v2, v4, v7, v8 quad_perm:[1,2,3,1] row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x91,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x79,0x00,0xff]
319+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
320+
321+
v_ashr_pk_u8_i32 v2, v4, v7, v8 row_share:3 fi:1
322+
// GFX1250: v_ashr_pk_u8_i32_e64_dpp v2, v4, v7, v8 row_share:3 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x02,0x00,0x91,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x53,0x05,0xff]
323+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
324+
325+
v_ashr_pk_u8_i32 v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf
326+
// GFX1250: v_ashr_pk_u8_i32_e64_dpp v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x91,0xd6,0xfa,0x0e,0x06,0x02,0x04,0x50,0x01,0xff]
327+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
328+
329+
v_ashr_pk_u8_i32 v2, v4, v7, 1 op_sel:[0,0,0,1] row_share:0 row_mask:0x5 bank_mask:0x3
330+
// GFX1250: v_ashr_pk_u8_i32_e64_dpp v2, v4, v7, 1 op_sel:[0,0,0,1] row_share:0 row_mask:0x5 bank_mask:0x3 ; encoding: [0x02,0x40,0x91,0xd6,0xfa,0x0e,0x06,0x02,0x04,0x50,0x01,0x53]
331+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16.s

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -297,3 +297,35 @@ v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, -|v2|, 5 mul:4 row_xmask:0 row_mask:0x1 ban
297297
v_cvt_sr_pk_bf16_f32_e64_dpp v255, -|v255|, -|v255|, src_scc clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
298298
// GFX1250: v_cvt_sr_pk_bf16_f32_e64_dpp v255, -|v255|, -|v255|, src_scc clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x83,0x6e,0xd7,0xfa,0xfe,0xf7,0x7b,0xff,0x6f,0x05,0x30]
299299
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
300+
301+
v_ashr_pk_i8_i32 v2, v4, v7, v8 quad_perm:[1,2,3,1]
302+
// GFX1250: v_ashr_pk_i8_i32_e64_dpp v2, v4, v7, v8 quad_perm:[1,2,3,1] row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x90,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x79,0x00,0xff]
303+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
304+
305+
v_ashr_pk_i8_i32 v2, v4, v7, v8 row_share:3 fi:1
306+
// GFX1250: v_ashr_pk_i8_i32_e64_dpp v2, v4, v7, v8 row_share:3 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x02,0x00,0x90,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x53,0x05,0xff]
307+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
308+
309+
v_ashr_pk_i8_i32 v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf
310+
// GFX1250: v_ashr_pk_i8_i32_e64_dpp v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x90,0xd6,0xfa,0x0e,0x06,0x02,0x04,0x50,0x01,0xff]
311+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
312+
313+
v_ashr_pk_i8_i32 v2, v4, v7, 1 op_sel:[0,0,0,1] row_share:0 row_mask:0x5 bank_mask:0x3
314+
// GFX1250: v_ashr_pk_i8_i32_e64_dpp v2, v4, v7, 1 op_sel:[0,0,0,1] row_share:0 row_mask:0x5 bank_mask:0x3 ; encoding: [0x02,0x40,0x90,0xd6,0xfa,0x0e,0x06,0x02,0x04,0x50,0x01,0x53]
315+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
316+
317+
v_ashr_pk_u8_i32 v2, v4, v7, v8 quad_perm:[1,2,3,1]
318+
// GFX1250: v_ashr_pk_u8_i32_e64_dpp v2, v4, v7, v8 quad_perm:[1,2,3,1] row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x91,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x79,0x00,0xff]
319+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
320+
321+
v_ashr_pk_u8_i32 v2, v4, v7, v8 row_share:3 fi:1
322+
// GFX1250: v_ashr_pk_u8_i32_e64_dpp v2, v4, v7, v8 row_share:3 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x02,0x00,0x91,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x53,0x05,0xff]
323+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
324+
325+
v_ashr_pk_u8_i32 v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf
326+
// GFX1250: v_ashr_pk_u8_i32_e64_dpp v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x91,0xd6,0xfa,0x0e,0x06,0x02,0x04,0x50,0x01,0xff]
327+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
328+
329+
v_ashr_pk_u8_i32 v2, v4, v7, 1 op_sel:[0,0,0,1] row_share:0 row_mask:0x5 bank_mask:0x3
330+
// GFX1250: v_ashr_pk_u8_i32_e64_dpp v2, v4, v7, 1 op_sel:[0,0,0,1] row_share:0 row_mask:0x5 bank_mask:0x3 ; encoding: [0x02,0x40,0x91,0xd6,0xfa,0x0e,0x06,0x02,0x04,0x50,0x01,0x53]
331+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp8-fake16.s

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,3 +209,19 @@ v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, -|v2|, 5 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1
209209
v_cvt_sr_pk_bf16_f32_e64_dpp v255, -|v255|, -|v255|, src_scc clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0
210210
// GFX1250: v_cvt_sr_pk_bf16_f32_e64_dpp v255, -|v255|, -|v255|, src_scc clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x83,0x6e,0xd7,0xe9,0xfe,0xf7,0x7b,0xff,0x00,0x00,0x00]
211211
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
212+
213+
v_ashr_pk_i8_i32 v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0]
214+
// GFX1250: v_ashr_pk_i8_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x90,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
215+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
216+
217+
v_ashr_pk_i8_i32 v5, v1, v2, s3 op_sel:[0,0,0,1] dpp8:[7,6,5,4,3,2,1,0] fi:1
218+
// GFX1250: v_ashr_pk_i8_i32_e64_dpp v5, v1, v2, s3 op_sel:[0,0,0,1] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x40,0x90,0xd6,0xea,0x04,0x0e,0x00,0x01,0x77,0x39,0x05]
219+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
220+
221+
v_ashr_pk_u8_i32 v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0]
222+
// GFX1250: v_ashr_pk_u8_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x91,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
223+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
224+
225+
v_ashr_pk_u8_i32 v5, v1, v2, s3 op_sel:[0,0,0,1] dpp8:[7,6,5,4,3,2,1,0] fi:1
226+
// GFX1250: v_ashr_pk_u8_i32_e64_dpp v5, v1, v2, s3 op_sel:[0,0,0,1] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x40,0x91,0xd6,0xea,0x04,0x0e,0x00,0x01,0x77,0x39,0x05]
227+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

0 commit comments

Comments
 (0)