-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[AArch64] recognise trn1/trn2 with flipped operands #169858
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 1 commit
428d463
06a4039
e55e58c
83ec81c
51437fc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -6699,33 +6699,52 @@ inline bool isUZPMask(ArrayRef<int> M, unsigned NumElts, | |
| } | ||
|
|
||
| /// Return true for trn1 or trn2 masks of the form: | ||
| /// <0, 8, 2, 10, 4, 12, 6, 14> or | ||
| /// <1, 9, 3, 11, 5, 13, 7, 15> | ||
| /// <0, 8, 2, 10, 4, 12, 6, 14> (WhichResultOut = 0, OperandOrderOut = 0) or | ||
| /// <1, 9, 3, 11, 5, 13, 7, 15> (WhichResultOut = 1, OperandOrderOut = 0) or | ||
| /// <8, 0, 10, 2, 12, 4, 14, 6> (WhichResultOut = 0, OperandOrderOut = 1) or | ||
| /// <9, 1, 11, 3, 13, 5, 15, 7> (WhichResultOut = 1, OperandOrderOut = 1) or | ||
| inline bool isTRNMask(ArrayRef<int> M, unsigned NumElts, | ||
| unsigned &WhichResultOut) { | ||
| unsigned &WhichResultOut, unsigned &OperandOrderOut) { | ||
| if (NumElts % 2 != 0) | ||
| return false; | ||
| // Check the first non-undef element for trn1 vs trn2. | ||
| unsigned WhichResult = 2; | ||
|
|
||
| // "Variant" refers to the distinction bwetween trn1 and trn2, while | ||
| // "Order" refers to sequence of input registers (matching vs flipped). | ||
|
||
| bool Variant0Order0 = true; // WhichResultOut = 0, OperandOrderOut = 0 | ||
| bool Variant1Order0 = true; // WhichResultOut = 1, OperandOrderOut = 0 | ||
| bool Variant0Order1 = true; // WhichResultOut = 0, OperandOrderOut = 1 | ||
| bool Variant1Order1 = true; // WhichResultOut = 1, OperandOrderOut = 1 | ||
|
||
| // Check all elements match. | ||
| for (unsigned i = 0; i != NumElts; i += 2) { | ||
| if (M[i] >= 0) { | ||
| WhichResult = ((unsigned)M[i] == i ? 0 : 1); | ||
| break; | ||
| unsigned EvenElt = (unsigned)M[i]; | ||
| if (EvenElt != i) | ||
| Variant0Order0 = false; | ||
| if (EvenElt != i + 1) | ||
| Variant1Order0 = false; | ||
| if (EvenElt != NumElts + i) | ||
| Variant0Order1 = false; | ||
| if (EvenElt != NumElts + i + 1) | ||
| Variant1Order1 = false; | ||
| } | ||
| if (M[i + 1] >= 0) { | ||
| WhichResult = ((unsigned)M[i + 1] == i + NumElts ? 0 : 1); | ||
| break; | ||
| unsigned OddElt = (unsigned)M[i + 1]; | ||
| if (OddElt != NumElts + i) | ||
| Variant0Order0 = false; | ||
| if (OddElt != NumElts + i + 1) | ||
| Variant1Order0 = false; | ||
| if (OddElt != i) | ||
| Variant0Order1 = false; | ||
| if (OddElt != i + 1) | ||
| Variant1Order1 = false; | ||
| } | ||
| } | ||
| if (WhichResult == 2) | ||
|
|
||
| if (Variant0Order0 + Variant1Order0 + Variant0Order1 + Variant1Order1 != 1) | ||
| return false; | ||
|
|
||
| for (unsigned i = 0; i < NumElts; i += 2) { | ||
| if ((M[i] >= 0 && (unsigned)M[i] != i + WhichResult) || | ||
| (M[i + 1] >= 0 && (unsigned)M[i + 1] != i + NumElts + WhichResult)) | ||
| return false; | ||
| } | ||
| WhichResultOut = WhichResult; | ||
| WhichResultOut = (Variant0Order0 || Variant0Order1) ? 0 : 1; | ||
| OperandOrderOut = (Variant0Order0 || Variant1Order0) ? 0 : 1; | ||
| return true; | ||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -246,6 +246,87 @@ define <4 x float> @vtrnQf(ptr %A, ptr %B) nounwind { | |
| ret <4 x float> %tmp5 | ||
| } | ||
|
|
||
| define <8 x i8> @vtrni8_8first(ptr %A, ptr %B) nounwind { | ||
|
||
| ; CHECKLE-LABEL: vtrni8_8first: | ||
| ; CHECKLE: // %bb.0: | ||
| ; CHECKLE-NEXT: ldr d0, [x0] | ||
| ; CHECKLE-NEXT: ldr d1, [x1] | ||
| ; CHECKLE-NEXT: trn1 v2.8b, v1.8b, v0.8b | ||
| ; CHECKLE-NEXT: trn2 v0.8b, v0.8b, v1.8b | ||
| ; CHECKLE-NEXT: add v0.8b, v2.8b, v0.8b | ||
| ; CHECKLE-NEXT: ret | ||
| ; | ||
| ; CHECKBE-LABEL: vtrni8_8first: | ||
| ; CHECKBE: // %bb.0: | ||
| ; CHECKBE-NEXT: ld1 { v0.8b }, [x0] | ||
| ; CHECKBE-NEXT: ld1 { v1.8b }, [x1] | ||
| ; CHECKBE-NEXT: trn1 v2.8b, v1.8b, v0.8b | ||
| ; CHECKBE-NEXT: trn2 v0.8b, v0.8b, v1.8b | ||
| ; CHECKBE-NEXT: add v0.8b, v2.8b, v0.8b | ||
| ; CHECKBE-NEXT: rev64 v0.8b, v0.8b | ||
| ; CHECKBE-NEXT: ret | ||
| %tmp1 = load <8 x i8>, ptr %A | ||
| %tmp2 = load <8 x i8>, ptr %B | ||
|
||
| %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 8, i32 0, i32 10, i32 2, i32 12, i32 4, i32 14, i32 6> | ||
| %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> | ||
| %tmp5 = add <8 x i8> %tmp3, %tmp4 | ||
| ret <8 x i8> %tmp5 | ||
| } | ||
|
|
||
| define <8 x i8> @vtrni8_9first(ptr %A, ptr %B) nounwind { | ||
| ; CHECKLE-LABEL: vtrni8_9first: | ||
| ; CHECKLE: // %bb.0: | ||
| ; CHECKLE-NEXT: ldr d0, [x0] | ||
| ; CHECKLE-NEXT: ldr d1, [x1] | ||
| ; CHECKLE-NEXT: trn1 v2.8b, v1.8b, v0.8b | ||
| ; CHECKLE-NEXT: trn2 v0.8b, v1.8b, v0.8b | ||
| ; CHECKLE-NEXT: add v0.8b, v2.8b, v0.8b | ||
| ; CHECKLE-NEXT: ret | ||
| ; | ||
| ; CHECKBE-LABEL: vtrni8_9first: | ||
| ; CHECKBE: // %bb.0: | ||
| ; CHECKBE-NEXT: ld1 { v0.8b }, [x0] | ||
| ; CHECKBE-NEXT: ld1 { v1.8b }, [x1] | ||
| ; CHECKBE-NEXT: trn1 v2.8b, v1.8b, v0.8b | ||
| ; CHECKBE-NEXT: trn2 v0.8b, v1.8b, v0.8b | ||
| ; CHECKBE-NEXT: add v0.8b, v2.8b, v0.8b | ||
| ; CHECKBE-NEXT: rev64 v0.8b, v0.8b | ||
| ; CHECKBE-NEXT: ret | ||
| %tmp1 = load <8 x i8>, ptr %A | ||
| %tmp2 = load <8 x i8>, ptr %B | ||
| %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 8, i32 0, i32 10, i32 2, i32 12, i32 4, i32 14, i32 6> | ||
| %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 9, i32 1, i32 11, i32 3, i32 13, i32 5, i32 15, i32 7> | ||
| %tmp5 = add <8 x i8> %tmp3, %tmp4 | ||
| ret <8 x i8> %tmp5 | ||
| } | ||
|
|
||
| define <8 x i8> @vtrni8_89first_undef(ptr %A, ptr %B) nounwind { | ||
| ; CHECKLE-LABEL: vtrni8_89first_undef: | ||
| ; CHECKLE: // %bb.0: | ||
| ; CHECKLE-NEXT: ldr d0, [x0] | ||
| ; CHECKLE-NEXT: ldr d1, [x1] | ||
| ; CHECKLE-NEXT: trn1 v2.8b, v1.8b, v0.8b | ||
| ; CHECKLE-NEXT: trn2 v0.8b, v1.8b, v0.8b | ||
| ; CHECKLE-NEXT: add v0.8b, v2.8b, v0.8b | ||
| ; CHECKLE-NEXT: ret | ||
| ; | ||
| ; CHECKBE-LABEL: vtrni8_89first_undef: | ||
| ; CHECKBE: // %bb.0: | ||
| ; CHECKBE-NEXT: ld1 { v0.8b }, [x0] | ||
| ; CHECKBE-NEXT: ld1 { v1.8b }, [x1] | ||
| ; CHECKBE-NEXT: trn1 v2.8b, v1.8b, v0.8b | ||
| ; CHECKBE-NEXT: trn2 v0.8b, v1.8b, v0.8b | ||
| ; CHECKBE-NEXT: add v0.8b, v2.8b, v0.8b | ||
| ; CHECKBE-NEXT: rev64 v0.8b, v0.8b | ||
| ; CHECKBE-NEXT: ret | ||
| %tmp1 = load <8 x i8>, ptr %A | ||
| %tmp2 = load <8 x i8>, ptr %B | ||
| %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 8, i32 0, i32 poison, i32 2, i32 poison, i32 4, i32 14, i32 6> | ||
| %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 9, i32 1, i32 poison, i32 3, i32 13, i32 5, i32 15, i32 poison> | ||
| %tmp5 = add <8 x i8> %tmp3, %tmp4 | ||
| ret <8 x i8> %tmp5 | ||
| } | ||
|
|
||
| ; Undef shuffle indices (even at the start of the shuffle mask) should not prevent matching to VTRN: | ||
|
|
||
| define <8 x i8> @vtrni8_undef(ptr %A, ptr %B) nounwind { | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: Maybe add
SDValue TRN = Dag.getNode(...)since thisconvertFromScalableVectorcall has got a little long.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done in commit 3.