Skip to content

Commit 654ac92

Browse files
committed
Optimize once more
1 parent 4a24b92 commit 654ac92

File tree

3 files changed

+73
-76
lines changed

3 files changed

+73
-76
lines changed

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -9901,27 +9901,26 @@ SDValue TargetLowering::expandBSWAP(SDNode *N, SelectionDAG &DAG) const {
99019901
case MVT::i32:
99029902
// This is meant for ARM specifically, which has ROTR but no ROTL.
99039903
if (isOperationLegalOrCustom(ISD::ROTR, VT)) {
9904-
// ror rtmp, r0, #16
9904+
// eor r3, r0, r0, ror #16
99059905
SDValue Ror16 =
99069906
DAG.getNode(ISD::ROTR, dl, VT, Op, DAG.getConstant(16, dl, SHVT));
9907-
// eor r1, r0, rtmp ; r1 = r0 ^ (r0 ror 16)
99089907
SDValue Xor1 = DAG.getNode(ISD::XOR, dl, VT, Op, Ror16);
99099908

9910-
// bic r1, r1, #0xff0000 (clear bits 16-23)
9911-
// So we need the negated value: ~0x00FF0000 = 0xFF00FFFF
9912-
SDValue Mask = DAG.getConstant(0xFF00FFFFu, dl, VT);
9913-
SDValue BicResult = DAG.getNode(ISD::AND, dl, VT, Xor1, Mask);
9909+
// lsr r3, r3, #8
9910+
SDValue Lsr8 =
9911+
DAG.getNode(ISD::SRL, dl, VT, Xor1, DAG.getConstant(8, dl, SHVT));
99149912

9915-
// mov r1, r1, lsr #8
9916-
SDValue Lsr8 = DAG.getNode(ISD::SRL, dl, VT, BicResult,
9917-
DAG.getConstant(8, dl, SHVT));
9913+
// bic r3, r3, #65280 (0xFF00)
9914+
// So we need the negated value: ~0x0000FF00 = 0xFFFF00FF
9915+
SDValue Mask = DAG.getConstant(0xFFFF00FFu, dl, VT);
9916+
SDValue BicResult = DAG.getNode(ISD::AND, dl, VT, Lsr8, Mask);
99189917

99199918
// ror r0, r0, #8
99209919
SDValue Ror8 =
99219920
DAG.getNode(ISD::ROTR, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
99229921

9923-
// eor r0, Lsr8, Ror8
9924-
return DAG.getNode(ISD::XOR, dl, VT, Lsr8, Ror8);
9922+
// eor r0, r3, r0, ror #8
9923+
return DAG.getNode(ISD::XOR, dl, VT, BicResult, Ror8);
99259924
}
99269925
Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
99279926
Tmp3 = DAG.getNode(ISD::AND, dl, VT, Op,

llvm/test/CodeGen/ARM/load-combine-big-endian.ll

Lines changed: 15 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,9 @@ define i32 @load_i32_by_i8_bswap(ptr %arg) {
5353
; CHECK-LABEL: load_i32_by_i8_bswap:
5454
; CHECK: @ %bb.0:
5555
; CHECK-NEXT: ldr r0, [r0]
56+
; CHECK-NEXT: mvn r2, #65280
5657
; CHECK-NEXT: eor r1, r0, r0, ror #16
57-
; CHECK-NEXT: bic r1, r1, #16711680
58-
; CHECK-NEXT: lsr r1, r1, #8
58+
; CHECK-NEXT: and r1, r2, r1, lsr #8
5959
; CHECK-NEXT: eor r0, r1, r0, ror #8
6060
; CHECK-NEXT: mov pc, lr
6161
;
@@ -220,14 +220,13 @@ define i64 @load_i64_by_i8_bswap(ptr %arg) {
220220
; CHECK-LABEL: load_i64_by_i8_bswap:
221221
; CHECK: @ %bb.0:
222222
; CHECK-NEXT: ldr r1, [r0]
223+
; CHECK-NEXT: mvn r3, #65280
223224
; CHECK-NEXT: ldr r0, [r0, #4]
224225
; CHECK-NEXT: eor r2, r0, r0, ror #16
225-
; CHECK-NEXT: bic r2, r2, #16711680
226-
; CHECK-NEXT: lsr r2, r2, #8
226+
; CHECK-NEXT: and r2, r3, r2, lsr #8
227227
; CHECK-NEXT: eor r0, r2, r0, ror #8
228228
; CHECK-NEXT: eor r2, r1, r1, ror #16
229-
; CHECK-NEXT: bic r2, r2, #16711680
230-
; CHECK-NEXT: lsr r2, r2, #8
229+
; CHECK-NEXT: and r2, r3, r2, lsr #8
231230
; CHECK-NEXT: eor r1, r2, r1, ror #8
232231
; CHECK-NEXT: mov pc, lr
233232
;
@@ -369,9 +368,9 @@ define i32 @load_i32_by_i8_nonzero_offset(ptr %arg) {
369368
; CHECK-LABEL: load_i32_by_i8_nonzero_offset:
370369
; CHECK: @ %bb.0:
371370
; CHECK-NEXT: ldr r0, [r0, #1]
371+
; CHECK-NEXT: mvn r2, #65280
372372
; CHECK-NEXT: eor r1, r0, r0, ror #16
373-
; CHECK-NEXT: bic r1, r1, #16711680
374-
; CHECK-NEXT: lsr r1, r1, #8
373+
; CHECK-NEXT: and r1, r2, r1, lsr #8
375374
; CHECK-NEXT: eor r0, r1, r0, ror #8
376375
; CHECK-NEXT: mov pc, lr
377376
;
@@ -423,9 +422,9 @@ define i32 @load_i32_by_i8_neg_offset(ptr %arg) {
423422
; CHECK-LABEL: load_i32_by_i8_neg_offset:
424423
; CHECK: @ %bb.0:
425424
; CHECK-NEXT: ldr r0, [r0, #-4]
425+
; CHECK-NEXT: mvn r2, #65280
426426
; CHECK-NEXT: eor r1, r0, r0, ror #16
427-
; CHECK-NEXT: bic r1, r1, #16711680
428-
; CHECK-NEXT: lsr r1, r1, #8
427+
; CHECK-NEXT: and r1, r2, r1, lsr #8
429428
; CHECK-NEXT: eor r0, r1, r0, ror #8
430429
; CHECK-NEXT: mov pc, lr
431430
;
@@ -573,9 +572,9 @@ define i32 @load_i32_by_bswap_i16(ptr %arg) {
573572
; CHECK-LABEL: load_i32_by_bswap_i16:
574573
; CHECK: @ %bb.0:
575574
; CHECK-NEXT: ldr r0, [r0]
575+
; CHECK-NEXT: mvn r2, #65280
576576
; CHECK-NEXT: eor r1, r0, r0, ror #16
577-
; CHECK-NEXT: bic r1, r1, #16711680
578-
; CHECK-NEXT: lsr r1, r1, #8
577+
; CHECK-NEXT: and r1, r2, r1, lsr #8
579578
; CHECK-NEXT: eor r0, r1, r0, ror #8
580579
; CHECK-NEXT: mov pc, lr
581580
;
@@ -649,10 +648,10 @@ define i32 @load_i32_by_i8_base_offset_index(ptr %arg, i32 %i) {
649648
; CHECK-LABEL: load_i32_by_i8_base_offset_index:
650649
; CHECK: @ %bb.0:
651650
; CHECK-NEXT: add r0, r0, r1
651+
; CHECK-NEXT: mvn r2, #65280
652652
; CHECK-NEXT: ldr r0, [r0, #12]
653653
; CHECK-NEXT: eor r1, r0, r0, ror #16
654-
; CHECK-NEXT: bic r1, r1, #16711680
655-
; CHECK-NEXT: lsr r1, r1, #8
654+
; CHECK-NEXT: and r1, r2, r1, lsr #8
656655
; CHECK-NEXT: eor r0, r1, r0, ror #8
657656
; CHECK-NEXT: mov pc, lr
658657
;
@@ -712,10 +711,10 @@ define i32 @load_i32_by_i8_base_offset_index_2(ptr %arg, i32 %i) {
712711
; CHECK-LABEL: load_i32_by_i8_base_offset_index_2:
713712
; CHECK: @ %bb.0:
714713
; CHECK-NEXT: add r0, r1, r0
714+
; CHECK-NEXT: mvn r2, #65280
715715
; CHECK-NEXT: ldr r0, [r0, #13]
716716
; CHECK-NEXT: eor r1, r0, r0, ror #16
717-
; CHECK-NEXT: bic r1, r1, #16711680
718-
; CHECK-NEXT: lsr r1, r1, #8
717+
; CHECK-NEXT: and r1, r2, r1, lsr #8
719718
; CHECK-NEXT: eor r0, r1, r0, ror #8
720719
; CHECK-NEXT: mov pc, lr
721720
;

llvm/test/CodeGen/ARM/load-combine.ll

Lines changed: 48 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -137,9 +137,9 @@ define i32 @load_i32_by_i8_bswap(ptr %arg) {
137137
; CHECK-LABEL: load_i32_by_i8_bswap:
138138
; CHECK: @ %bb.0:
139139
; CHECK-NEXT: ldr r0, [r0]
140+
; CHECK-NEXT: mvn r2, #65280
140141
; CHECK-NEXT: eor r1, r0, r0, ror #16
141-
; CHECK-NEXT: bic r1, r1, #16711680
142-
; CHECK-NEXT: lsr r1, r1, #8
142+
; CHECK-NEXT: and r1, r2, r1, lsr #8
143143
; CHECK-NEXT: eor r0, r1, r0, ror #8
144144
; CHECK-NEXT: mov pc, lr
145145
;
@@ -153,15 +153,15 @@ define i32 @load_i32_by_i8_bswap(ptr %arg) {
153153
; CHECK-THUMBv5-NEXT: movs r3, r0
154154
; CHECK-THUMBv5-NEXT: rors r3, r1
155155
; CHECK-THUMBv5-NEXT: eors r3, r0
156+
; CHECK-THUMBv5-NEXT: lsrs r1, r3, #8
156157
; CHECK-THUMBv5-NEXT: ldr r0, .LCPI2_0
157-
; CHECK-THUMBv5-NEXT: ands r0, r3
158-
; CHECK-THUMBv5-NEXT: lsrs r0, r0, #8
158+
; CHECK-THUMBv5-NEXT: ands r0, r1
159159
; CHECK-THUMBv5-NEXT: eors r0, r2
160160
; CHECK-THUMBv5-NEXT: bx lr
161161
; CHECK-THUMBv5-NEXT: .p2align 2
162162
; CHECK-THUMBv5-NEXT: @ %bb.1:
163163
; CHECK-THUMBv5-NEXT: .LCPI2_0:
164-
; CHECK-THUMBv5-NEXT: .long 4278255360 @ 0xff00ff00
164+
; CHECK-THUMBv5-NEXT: .long 16711935 @ 0xff00ff
165165
;
166166
; CHECK-ARMv6-LABEL: load_i32_by_i8_bswap:
167167
; CHECK-ARMv6: @ %bb.0:
@@ -281,47 +281,46 @@ define i64 @load_i64_by_i8_bswap(ptr %arg) {
281281
; CHECK-LABEL: load_i64_by_i8_bswap:
282282
; CHECK: @ %bb.0:
283283
; CHECK-NEXT: ldr r1, [r0]
284+
; CHECK-NEXT: mvn r3, #65280
284285
; CHECK-NEXT: ldr r0, [r0, #4]
285286
; CHECK-NEXT: eor r2, r0, r0, ror #16
286-
; CHECK-NEXT: bic r2, r2, #16711680
287-
; CHECK-NEXT: lsr r2, r2, #8
287+
; CHECK-NEXT: and r2, r3, r2, lsr #8
288288
; CHECK-NEXT: eor r0, r2, r0, ror #8
289289
; CHECK-NEXT: eor r2, r1, r1, ror #16
290-
; CHECK-NEXT: bic r2, r2, #16711680
291-
; CHECK-NEXT: lsr r2, r2, #8
290+
; CHECK-NEXT: and r2, r3, r2, lsr #8
292291
; CHECK-NEXT: eor r1, r2, r1, ror #8
293292
; CHECK-NEXT: mov pc, lr
294293
;
295294
; CHECK-THUMBv5-LABEL: load_i64_by_i8_bswap:
296295
; CHECK-THUMBv5: @ %bb.0:
297296
; CHECK-THUMBv5-NEXT: push {r4, r5, r7, lr}
298-
; CHECK-THUMBv5-NEXT: ldr r1, [r0, #4]
299-
; CHECK-THUMBv5-NEXT: movs r3, #8
300-
; CHECK-THUMBv5-NEXT: movs r4, r1
301-
; CHECK-THUMBv5-NEXT: rors r4, r3
302-
; CHECK-THUMBv5-NEXT: movs r5, #16
297+
; CHECK-THUMBv5-NEXT: movs r1, r0
298+
; CHECK-THUMBv5-NEXT: ldr r0, [r0, #4]
299+
; CHECK-THUMBv5-NEXT: movs r2, #8
300+
; CHECK-THUMBv5-NEXT: movs r3, r0
301+
; CHECK-THUMBv5-NEXT: rors r3, r2
302+
; CHECK-THUMBv5-NEXT: movs r4, #16
303+
; CHECK-THUMBv5-NEXT: movs r5, r0
304+
; CHECK-THUMBv5-NEXT: rors r5, r4
305+
; CHECK-THUMBv5-NEXT: eors r5, r0
306+
; CHECK-THUMBv5-NEXT: lsrs r0, r5, #8
307+
; CHECK-THUMBv5-NEXT: ldr r5, .LCPI4_0
308+
; CHECK-THUMBv5-NEXT: ands r0, r5
309+
; CHECK-THUMBv5-NEXT: eors r0, r3
310+
; CHECK-THUMBv5-NEXT: ldr r1, [r1]
311+
; CHECK-THUMBv5-NEXT: movs r3, r1
312+
; CHECK-THUMBv5-NEXT: rors r3, r2
303313
; CHECK-THUMBv5-NEXT: movs r2, r1
304-
; CHECK-THUMBv5-NEXT: rors r2, r5
314+
; CHECK-THUMBv5-NEXT: rors r2, r4
305315
; CHECK-THUMBv5-NEXT: eors r2, r1
306-
; CHECK-THUMBv5-NEXT: ldr r1, .LCPI4_0
307-
; CHECK-THUMBv5-NEXT: ands r2, r1
308-
; CHECK-THUMBv5-NEXT: lsrs r2, r2, #8
309-
; CHECK-THUMBv5-NEXT: eors r2, r4
310-
; CHECK-THUMBv5-NEXT: ldr r0, [r0]
311-
; CHECK-THUMBv5-NEXT: movs r4, r0
312-
; CHECK-THUMBv5-NEXT: rors r4, r3
313-
; CHECK-THUMBv5-NEXT: movs r3, r0
314-
; CHECK-THUMBv5-NEXT: rors r3, r5
315-
; CHECK-THUMBv5-NEXT: eors r3, r0
316-
; CHECK-THUMBv5-NEXT: ands r3, r1
317-
; CHECK-THUMBv5-NEXT: lsrs r1, r3, #8
318-
; CHECK-THUMBv5-NEXT: eors r1, r4
319-
; CHECK-THUMBv5-NEXT: movs r0, r2
316+
; CHECK-THUMBv5-NEXT: lsrs r1, r2, #8
317+
; CHECK-THUMBv5-NEXT: ands r1, r5
318+
; CHECK-THUMBv5-NEXT: eors r1, r3
320319
; CHECK-THUMBv5-NEXT: pop {r4, r5, r7, pc}
321320
; CHECK-THUMBv5-NEXT: .p2align 2
322321
; CHECK-THUMBv5-NEXT: @ %bb.1:
323322
; CHECK-THUMBv5-NEXT: .LCPI4_0:
324-
; CHECK-THUMBv5-NEXT: .long 4278255360 @ 0xff00ff00
323+
; CHECK-THUMBv5-NEXT: .long 16711935 @ 0xff00ff
325324
;
326325
; CHECK-ARMv6-LABEL: load_i64_by_i8_bswap:
327326
; CHECK-ARMv6: @ %bb.0:
@@ -495,9 +494,9 @@ define i32 @load_i32_by_i8_nonzero_offset_bswap(ptr %arg) {
495494
; CHECK-LABEL: load_i32_by_i8_nonzero_offset_bswap:
496495
; CHECK: @ %bb.0:
497496
; CHECK-NEXT: ldr r0, [r0, #1]
497+
; CHECK-NEXT: mvn r2, #65280
498498
; CHECK-NEXT: eor r1, r0, r0, ror #16
499-
; CHECK-NEXT: bic r1, r1, #16711680
500-
; CHECK-NEXT: lsr r1, r1, #8
499+
; CHECK-NEXT: and r1, r2, r1, lsr #8
501500
; CHECK-NEXT: eor r0, r1, r0, ror #8
502501
; CHECK-NEXT: mov pc, lr
503502
;
@@ -509,17 +508,17 @@ define i32 @load_i32_by_i8_nonzero_offset_bswap(ptr %arg) {
509508
; CHECK-THUMBv5-NEXT: movs r2, r0
510509
; CHECK-THUMBv5-NEXT: rors r2, r1
511510
; CHECK-THUMBv5-NEXT: eors r2, r0
512-
; CHECK-THUMBv5-NEXT: ldr r1, .LCPI7_0
513-
; CHECK-THUMBv5-NEXT: ands r1, r2
514-
; CHECK-THUMBv5-NEXT: lsrs r1, r1, #8
515-
; CHECK-THUMBv5-NEXT: movs r2, #8
516-
; CHECK-THUMBv5-NEXT: rors r0, r2
517-
; CHECK-THUMBv5-NEXT: eors r0, r1
511+
; CHECK-THUMBv5-NEXT: lsrs r1, r2, #8
512+
; CHECK-THUMBv5-NEXT: ldr r2, .LCPI7_0
513+
; CHECK-THUMBv5-NEXT: ands r2, r1
514+
; CHECK-THUMBv5-NEXT: movs r1, #8
515+
; CHECK-THUMBv5-NEXT: rors r0, r1
516+
; CHECK-THUMBv5-NEXT: eors r0, r2
518517
; CHECK-THUMBv5-NEXT: bx lr
519518
; CHECK-THUMBv5-NEXT: .p2align 2
520519
; CHECK-THUMBv5-NEXT: @ %bb.1:
521520
; CHECK-THUMBv5-NEXT: .LCPI7_0:
522-
; CHECK-THUMBv5-NEXT: .long 4278255360 @ 0xff00ff00
521+
; CHECK-THUMBv5-NEXT: .long 16711935 @ 0xff00ff
523522
;
524523
; CHECK-ARMv6-LABEL: load_i32_by_i8_nonzero_offset_bswap:
525524
; CHECK-ARMv6: @ %bb.0:
@@ -568,9 +567,9 @@ define i32 @load_i32_by_i8_neg_offset_bswap(ptr %arg) {
568567
; CHECK-LABEL: load_i32_by_i8_neg_offset_bswap:
569568
; CHECK: @ %bb.0:
570569
; CHECK-NEXT: ldr r0, [r0, #-4]
570+
; CHECK-NEXT: mvn r2, #65280
571571
; CHECK-NEXT: eor r1, r0, r0, ror #16
572-
; CHECK-NEXT: bic r1, r1, #16711680
573-
; CHECK-NEXT: lsr r1, r1, #8
572+
; CHECK-NEXT: and r1, r2, r1, lsr #8
574573
; CHECK-NEXT: eor r0, r1, r0, ror #8
575574
; CHECK-NEXT: mov pc, lr
576575
;
@@ -585,15 +584,15 @@ define i32 @load_i32_by_i8_neg_offset_bswap(ptr %arg) {
585584
; CHECK-THUMBv5-NEXT: movs r3, r0
586585
; CHECK-THUMBv5-NEXT: rors r3, r1
587586
; CHECK-THUMBv5-NEXT: eors r3, r0
587+
; CHECK-THUMBv5-NEXT: lsrs r1, r3, #8
588588
; CHECK-THUMBv5-NEXT: ldr r0, .LCPI8_0
589-
; CHECK-THUMBv5-NEXT: ands r0, r3
590-
; CHECK-THUMBv5-NEXT: lsrs r0, r0, #8
589+
; CHECK-THUMBv5-NEXT: ands r0, r1
591590
; CHECK-THUMBv5-NEXT: eors r0, r2
592591
; CHECK-THUMBv5-NEXT: bx lr
593592
; CHECK-THUMBv5-NEXT: .p2align 2
594593
; CHECK-THUMBv5-NEXT: @ %bb.1:
595594
; CHECK-THUMBv5-NEXT: .LCPI8_0:
596-
; CHECK-THUMBv5-NEXT: .long 4278255360 @ 0xff00ff00
595+
; CHECK-THUMBv5-NEXT: .long 16711935 @ 0xff00ff
597596
;
598597
; CHECK-ARMv6-LABEL: load_i32_by_i8_neg_offset_bswap:
599598
; CHECK-ARMv6: @ %bb.0:
@@ -644,9 +643,9 @@ define i32 @load_i32_by_bswap_i16(ptr %arg) {
644643
; CHECK-LABEL: load_i32_by_bswap_i16:
645644
; CHECK: @ %bb.0:
646645
; CHECK-NEXT: ldr r0, [r0]
646+
; CHECK-NEXT: mvn r2, #65280
647647
; CHECK-NEXT: eor r1, r0, r0, ror #16
648-
; CHECK-NEXT: bic r1, r1, #16711680
649-
; CHECK-NEXT: lsr r1, r1, #8
648+
; CHECK-NEXT: and r1, r2, r1, lsr #8
650649
; CHECK-NEXT: eor r0, r1, r0, ror #8
651650
; CHECK-NEXT: mov pc, lr
652651
;
@@ -660,15 +659,15 @@ define i32 @load_i32_by_bswap_i16(ptr %arg) {
660659
; CHECK-THUMBv5-NEXT: movs r3, r0
661660
; CHECK-THUMBv5-NEXT: rors r3, r1
662661
; CHECK-THUMBv5-NEXT: eors r3, r0
662+
; CHECK-THUMBv5-NEXT: lsrs r1, r3, #8
663663
; CHECK-THUMBv5-NEXT: ldr r0, .LCPI9_0
664-
; CHECK-THUMBv5-NEXT: ands r0, r3
665-
; CHECK-THUMBv5-NEXT: lsrs r0, r0, #8
664+
; CHECK-THUMBv5-NEXT: ands r0, r1
666665
; CHECK-THUMBv5-NEXT: eors r0, r2
667666
; CHECK-THUMBv5-NEXT: bx lr
668667
; CHECK-THUMBv5-NEXT: .p2align 2
669668
; CHECK-THUMBv5-NEXT: @ %bb.1:
670669
; CHECK-THUMBv5-NEXT: .LCPI9_0:
671-
; CHECK-THUMBv5-NEXT: .long 4278255360 @ 0xff00ff00
670+
; CHECK-THUMBv5-NEXT: .long 16711935 @ 0xff00ff
672671
;
673672
; CHECK-ARMv6-LABEL: load_i32_by_bswap_i16:
674673
; CHECK-ARMv6: @ %bb.0:

0 commit comments

Comments
 (0)