-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[AArch64] Use dupq (SVE2.1) for segmented lane splats #144482
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
e848506
5dd61ba
5ffdce6
1f05588
1fc5190
31ee725
6b380b0
ddcf5d5
c512f9b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,115 @@ | ||
| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 | ||
| ; RUN: llc -mtriple=aarch64-linux-gnu < %s | FileCheck %s | ||
|
|
||
| define void @dupq_i8_256b(ptr %addr) #0 { | ||
| ; CHECK-LABEL: dupq_i8_256b: | ||
| ; CHECK: // %bb.0: | ||
| ; CHECK-NEXT: ldr z0, [x0] | ||
| ; CHECK-NEXT: dupq z0.b, z0.b[11] | ||
| ; CHECK-NEXT: str z0, [x0] | ||
| ; CHECK-NEXT: ret | ||
| %load = load <32 x i8>, ptr %addr | ||
| %splat.lanes = shufflevector <32 x i8> %load, <32 x i8> poison, <32 x i32> <i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, | ||
| i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27> | ||
|
||
| store <32 x i8> %splat.lanes, ptr %addr | ||
| ret void | ||
| } | ||
|
|
||
| define void @dupq_i16_256b(ptr %addr) #0 { | ||
| ; CHECK-LABEL: dupq_i16_256b: | ||
| ; CHECK: // %bb.0: | ||
| ; CHECK-NEXT: ldr z0, [x0] | ||
| ; CHECK-NEXT: dupq z0.h, z0.h[2] | ||
| ; CHECK-NEXT: str z0, [x0] | ||
| ; CHECK-NEXT: ret | ||
| %load = load <16 x i16>, ptr %addr | ||
| %splat.lanes = shufflevector <16 x i16> %load, <16 x i16> poison, <16 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, | ||
| i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10> | ||
| store <16 x i16> %splat.lanes, ptr %addr | ||
| ret void | ||
| } | ||
|
|
||
| define void @dupq_i32_256b(ptr %addr) #0 { | ||
| ; CHECK-LABEL: dupq_i32_256b: | ||
| ; CHECK: // %bb.0: | ||
| ; CHECK-NEXT: ldr z0, [x0] | ||
| ; CHECK-NEXT: dupq z0.s, z0.s[3] | ||
| ; CHECK-NEXT: str z0, [x0] | ||
| ; CHECK-NEXT: ret | ||
| %load = load <8 x i32>, ptr %addr | ||
| %splat.lanes = shufflevector <8 x i32> %load, <8 x i32> poison, <8 x i32> <i32 3, i32 3, i32 3, i32 3, | ||
| i32 7, i32 7, i32 7, i32 7> | ||
| store <8 x i32> %splat.lanes, ptr %addr | ||
| ret void | ||
| } | ||
|
|
||
| define void @dupq_i64_256b(ptr %addr) #0 { | ||
| ; CHECK-LABEL: dupq_i64_256b: | ||
| ; CHECK: // %bb.0: | ||
| ; CHECK-NEXT: ldr z0, [x0] | ||
| ; CHECK-NEXT: trn1 z0.d, z0.d, z0.d | ||
| ; CHECK-NEXT: str z0, [x0] | ||
| ; CHECK-NEXT: ret | ||
| %load = load <4 x i64>, ptr %addr | ||
| %splat.lanes = shufflevector <4 x i64> %load, <4 x i64> poison, <4 x i32> <i32 0, i32 0, i32 2, i32 2> | ||
| store <4 x i64> %splat.lanes, ptr %addr | ||
| ret void | ||
| } | ||
|
|
||
| define void @dupq_f16_256b(ptr %addr) #0 { | ||
| ; CHECK-LABEL: dupq_f16_256b: | ||
| ; CHECK: // %bb.0: | ||
| ; CHECK-NEXT: ldr z0, [x0] | ||
| ; CHECK-NEXT: dupq z0.h, z0.h[2] | ||
| ; CHECK-NEXT: str z0, [x0] | ||
| ; CHECK-NEXT: ret | ||
| %load = load <16 x half>, ptr %addr | ||
| %splat.lanes = shufflevector <16 x half> %load, <16 x half> poison, <16 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, | ||
| i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10> | ||
| store <16 x half> %splat.lanes, ptr %addr | ||
| ret void | ||
| } | ||
|
|
||
| define void @dupq_bf16_256b(ptr %addr) #0 { | ||
| ; CHECK-LABEL: dupq_bf16_256b: | ||
| ; CHECK: // %bb.0: | ||
| ; CHECK-NEXT: ldp q0, q1, [x0] | ||
| ; CHECK-NEXT: dup v0.8h, v0.h[2] | ||
| ; CHECK-NEXT: dup v1.8h, v1.h[2] | ||
paulwalker-arm marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| ; CHECK-NEXT: stp q0, q1, [x0] | ||
| ; CHECK-NEXT: ret | ||
| %load = load <16 x bfloat>, ptr %addr | ||
| %splat.lanes = shufflevector <16 x bfloat> %load, <16 x bfloat> poison, <16 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, | ||
| i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10> | ||
| store <16 x bfloat> %splat.lanes, ptr %addr | ||
| ret void | ||
| } | ||
|
|
||
| define void @dupq_f32_256b(ptr %addr) #0 { | ||
| ; CHECK-LABEL: dupq_f32_256b: | ||
| ; CHECK: // %bb.0: | ||
| ; CHECK-NEXT: ldr z0, [x0] | ||
| ; CHECK-NEXT: dupq z0.s, z0.s[3] | ||
| ; CHECK-NEXT: str z0, [x0] | ||
| ; CHECK-NEXT: ret | ||
| %load = load <8 x float>, ptr %addr | ||
| %splat.lanes = shufflevector <8 x float> %load, <8 x float> poison, <8 x i32> <i32 3, i32 3, i32 3, i32 3, | ||
| i32 7, i32 7, i32 7, i32 7> | ||
| store <8 x float> %splat.lanes, ptr %addr | ||
| ret void | ||
| } | ||
|
|
||
| define void @dupq_f64_256b(ptr %addr) #0 { | ||
| ; CHECK-LABEL: dupq_f64_256b: | ||
| ; CHECK: // %bb.0: | ||
| ; CHECK-NEXT: ldr z0, [x0] | ||
| ; CHECK-NEXT: trn1 z0.d, z0.d, z0.d | ||
| ; CHECK-NEXT: str z0, [x0] | ||
| ; CHECK-NEXT: ret | ||
| %load = load <4 x double>, ptr %addr | ||
| %splat.lanes = shufflevector <4 x double> %load, <4 x double> poison, <4 x i32> <i32 0, i32 0, i32 2, i32 2> | ||
| store <4 x double> %splat.lanes, ptr %addr | ||
| ret void | ||
| } | ||
|
|
||
| attributes #0 = { noinline vscale_range(2,2) "target-features"="+sve2p1,+bf16" } | ||
Uh oh!
There was an error while loading. Please reload this page.