Skip to content

Commit c17f912

Browse files
committed
test
1 parent c53787f commit c17f912

File tree

2 files changed

+91
-72
lines changed

2 files changed

+91
-72
lines changed

llvm/test/CodeGen/NVPTX/lower-byval-args.ll

Lines changed: 91 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -455,42 +455,64 @@ define dso_local ptx_kernel void @memcpy_to_param(ptr nocapture noundef readonly
455455
; PTX-NEXT: .local .align 8 .b8 __local_depot9[8];
456456
; PTX-NEXT: .reg .b64 %SP;
457457
; PTX-NEXT: .reg .b64 %SPL;
458-
; PTX-NEXT: .reg .b64 %rd<30>;
458+
; PTX-NEXT: .reg .b32 %r<3>;
459+
; PTX-NEXT: .reg .b64 %rd<47>;
459460
; PTX-EMPTY:
460461
; PTX-NEXT: // %bb.0: // %entry
461462
; PTX-NEXT: mov.b64 %SPL, __local_depot9;
462463
; PTX-NEXT: cvta.local.u64 %SP, %SPL;
463464
; PTX-NEXT: ld.param.b64 %rd1, [memcpy_to_param_param_0];
464-
; PTX-NEXT: cvta.to.global.u64 %rd2, %rd1;
465-
; PTX-NEXT: ld.param.b32 %rd3, [memcpy_to_param_param_1+4];
466-
; PTX-NEXT: shl.b64 %rd4, %rd3, 32;
467-
; PTX-NEXT: ld.param.b32 %rd5, [memcpy_to_param_param_1];
468-
; PTX-NEXT: or.b64 %rd6, %rd4, %rd5;
469-
; PTX-NEXT: st.b64 [%SP], %rd6;
470-
; PTX-NEXT: ld.volatile.global.b8 %rd7, [%rd2];
471-
; PTX-NEXT: ld.volatile.global.b8 %rd8, [%rd2+1];
472-
; PTX-NEXT: shl.b64 %rd9, %rd8, 8;
473-
; PTX-NEXT: or.b64 %rd10, %rd9, %rd7;
474-
; PTX-NEXT: ld.volatile.global.b8 %rd11, [%rd2+2];
475-
; PTX-NEXT: shl.b64 %rd12, %rd11, 16;
476-
; PTX-NEXT: ld.volatile.global.b8 %rd13, [%rd2+3];
477-
; PTX-NEXT: shl.b64 %rd14, %rd13, 24;
478-
; PTX-NEXT: or.b64 %rd15, %rd14, %rd12;
479-
; PTX-NEXT: or.b64 %rd16, %rd15, %rd10;
480-
; PTX-NEXT: ld.volatile.global.b8 %rd17, [%rd2+4];
481-
; PTX-NEXT: ld.volatile.global.b8 %rd18, [%rd2+5];
482-
; PTX-NEXT: shl.b64 %rd19, %rd18, 8;
483-
; PTX-NEXT: or.b64 %rd20, %rd19, %rd17;
484-
; PTX-NEXT: ld.volatile.global.b8 %rd21, [%rd2+6];
485-
; PTX-NEXT: shl.b64 %rd22, %rd21, 16;
486-
; PTX-NEXT: ld.volatile.global.b8 %rd23, [%rd2+7];
487-
; PTX-NEXT: shl.b64 %rd24, %rd23, 24;
488-
; PTX-NEXT: or.b64 %rd25, %rd24, %rd22;
489-
; PTX-NEXT: or.b64 %rd26, %rd25, %rd20;
490-
; PTX-NEXT: shl.b64 %rd27, %rd26, 32;
491-
; PTX-NEXT: or.b64 %rd28, %rd27, %rd16;
492-
; PTX-NEXT: add.u64 %rd29, %SPL, 0;
493-
; PTX-NEXT: st.local.b64 [%rd29], %rd28;
465+
; PTX-NEXT: add.u64 %rd2, %SPL, 0;
466+
; PTX-NEXT: ld.param.b32 %r1, [memcpy_to_param_param_1+4];
467+
; PTX-NEXT: st.local.b32 [%rd2+4], %r1;
468+
; PTX-NEXT: ld.param.b32 %r2, [memcpy_to_param_param_1];
469+
; PTX-NEXT: st.local.b32 [%rd2], %r2;
470+
; PTX-NEXT: ld.volatile.b8 %rd3, [%rd1];
471+
; PTX-NEXT: ld.volatile.b8 %rd4, [%rd1+1];
472+
; PTX-NEXT: shl.b64 %rd5, %rd4, 8;
473+
; PTX-NEXT: or.b64 %rd6, %rd5, %rd3;
474+
; PTX-NEXT: ld.volatile.b8 %rd7, [%rd1+2];
475+
; PTX-NEXT: shl.b64 %rd8, %rd7, 16;
476+
; PTX-NEXT: ld.volatile.b8 %rd9, [%rd1+3];
477+
; PTX-NEXT: shl.b64 %rd10, %rd9, 24;
478+
; PTX-NEXT: or.b64 %rd11, %rd10, %rd8;
479+
; PTX-NEXT: or.b64 %rd12, %rd11, %rd6;
480+
; PTX-NEXT: ld.volatile.b8 %rd13, [%rd1+4];
481+
; PTX-NEXT: ld.volatile.b8 %rd14, [%rd1+5];
482+
; PTX-NEXT: shl.b64 %rd15, %rd14, 8;
483+
; PTX-NEXT: or.b64 %rd16, %rd15, %rd13;
484+
; PTX-NEXT: ld.volatile.b8 %rd17, [%rd1+6];
485+
; PTX-NEXT: shl.b64 %rd18, %rd17, 16;
486+
; PTX-NEXT: ld.volatile.b8 %rd19, [%rd1+7];
487+
; PTX-NEXT: shl.b64 %rd20, %rd19, 24;
488+
; PTX-NEXT: or.b64 %rd21, %rd20, %rd18;
489+
; PTX-NEXT: or.b64 %rd22, %rd21, %rd16;
490+
; PTX-NEXT: shl.b64 %rd23, %rd22, 32;
491+
; PTX-NEXT: or.b64 %rd24, %rd23, %rd12;
492+
; PTX-NEXT: st.volatile.b64 [%SP], %rd24;
493+
; PTX-NEXT: ld.volatile.b8 %rd25, [%rd1+8];
494+
; PTX-NEXT: ld.volatile.b8 %rd26, [%rd1+9];
495+
; PTX-NEXT: shl.b64 %rd27, %rd26, 8;
496+
; PTX-NEXT: or.b64 %rd28, %rd27, %rd25;
497+
; PTX-NEXT: ld.volatile.b8 %rd29, [%rd1+10];
498+
; PTX-NEXT: shl.b64 %rd30, %rd29, 16;
499+
; PTX-NEXT: ld.volatile.b8 %rd31, [%rd1+11];
500+
; PTX-NEXT: shl.b64 %rd32, %rd31, 24;
501+
; PTX-NEXT: or.b64 %rd33, %rd32, %rd30;
502+
; PTX-NEXT: or.b64 %rd34, %rd33, %rd28;
503+
; PTX-NEXT: ld.volatile.b8 %rd35, [%rd1+12];
504+
; PTX-NEXT: ld.volatile.b8 %rd36, [%rd1+13];
505+
; PTX-NEXT: shl.b64 %rd37, %rd36, 8;
506+
; PTX-NEXT: or.b64 %rd38, %rd37, %rd35;
507+
; PTX-NEXT: ld.volatile.b8 %rd39, [%rd1+14];
508+
; PTX-NEXT: shl.b64 %rd40, %rd39, 16;
509+
; PTX-NEXT: ld.volatile.b8 %rd41, [%rd1+15];
510+
; PTX-NEXT: shl.b64 %rd42, %rd41, 24;
511+
; PTX-NEXT: or.b64 %rd43, %rd42, %rd40;
512+
; PTX-NEXT: or.b64 %rd44, %rd43, %rd38;
513+
; PTX-NEXT: shl.b64 %rd45, %rd44, 32;
514+
; PTX-NEXT: or.b64 %rd46, %rd45, %rd34;
515+
; PTX-NEXT: st.volatile.b64 [%SP+8], %rd46;
494516
; PTX-NEXT: ret;
495517
entry:
496518
tail call void @llvm.memcpy.p0.p0.i64(ptr %s, ptr %in, i64 16, i1 true)
@@ -562,25 +584,44 @@ define ptx_kernel void @test_select(ptr byval(i32) align 4 %input1, ptr byval(i3
562584
; COPY-NEXT: store i32 [[VALLOADED]], ptr [[OUT]], align 4
563585
; COPY-NEXT: ret void
564586
;
565-
; PTX-LABEL: test_select(
566-
; PTX: {
567-
; PTX-NEXT: .reg .pred %p<2>;
568-
; PTX-NEXT: .reg .b16 %rs<3>;
569-
; PTX-NEXT: .reg .b32 %r<2>;
570-
; PTX-NEXT: .reg .b64 %rd<6>;
571-
; PTX-EMPTY:
572-
; PTX-NEXT: // %bb.0: // %bb
573-
; PTX-NEXT: ld.param.b8 %rs1, [test_select_param_3];
574-
; PTX-NEXT: and.b16 %rs2, %rs1, 1;
575-
; PTX-NEXT: setp.ne.b16 %p1, %rs2, 0;
576-
; PTX-NEXT: mov.b64 %rd1, test_select_param_0;
577-
; PTX-NEXT: ld.param.b64 %rd2, [test_select_param_2];
578-
; PTX-NEXT: cvta.to.global.u64 %rd3, %rd2;
579-
; PTX-NEXT: mov.b64 %rd4, test_select_param_1;
580-
; PTX-NEXT: selp.b64 %rd5, %rd1, %rd4, %p1;
581-
; PTX-NEXT: ld.param.b32 %r1, [%rd5];
582-
; PTX-NEXT: st.global.b32 [%rd3], %r1;
583-
; PTX-NEXT: ret;
587+
; PTX_60-LABEL: test_select(
588+
; PTX_60: {
589+
; PTX_60-NEXT: .reg .pred %p<2>;
590+
; PTX_60-NEXT: .reg .b16 %rs<3>;
591+
; PTX_60-NEXT: .reg .b32 %r<4>;
592+
; PTX_60-NEXT: .reg .b64 %rd<3>;
593+
; PTX_60-EMPTY:
594+
; PTX_60-NEXT: // %bb.0: // %bb
595+
; PTX_60-NEXT: ld.param.b8 %rs1, [test_select_param_3];
596+
; PTX_60-NEXT: and.b16 %rs2, %rs1, 1;
597+
; PTX_60-NEXT: setp.ne.b16 %p1, %rs2, 0;
598+
; PTX_60-NEXT: ld.param.b64 %rd1, [test_select_param_2];
599+
; PTX_60-NEXT: cvta.to.global.u64 %rd2, %rd1;
600+
; PTX_60-NEXT: ld.param.b32 %r1, [test_select_param_1];
601+
; PTX_60-NEXT: ld.param.b32 %r2, [test_select_param_0];
602+
; PTX_60-NEXT: selp.b32 %r3, %r2, %r1, %p1;
603+
; PTX_60-NEXT: st.global.b32 [%rd2], %r3;
604+
; PTX_60-NEXT: ret;
605+
;
606+
; PTX_70-LABEL: test_select(
607+
; PTX_70: {
608+
; PTX_70-NEXT: .reg .pred %p<2>;
609+
; PTX_70-NEXT: .reg .b16 %rs<3>;
610+
; PTX_70-NEXT: .reg .b32 %r<2>;
611+
; PTX_70-NEXT: .reg .b64 %rd<6>;
612+
; PTX_70-EMPTY:
613+
; PTX_70-NEXT: // %bb.0: // %bb
614+
; PTX_70-NEXT: ld.param.b8 %rs1, [test_select_param_3];
615+
; PTX_70-NEXT: and.b16 %rs2, %rs1, 1;
616+
; PTX_70-NEXT: setp.ne.b16 %p1, %rs2, 0;
617+
; PTX_70-NEXT: mov.b64 %rd1, test_select_param_0;
618+
; PTX_70-NEXT: ld.param.b64 %rd2, [test_select_param_2];
619+
; PTX_70-NEXT: cvta.to.global.u64 %rd3, %rd2;
620+
; PTX_70-NEXT: mov.b64 %rd4, test_select_param_1;
621+
; PTX_70-NEXT: selp.b64 %rd5, %rd1, %rd4, %p1;
622+
; PTX_70-NEXT: ld.param.b32 %r1, [%rd5];
623+
; PTX_70-NEXT: st.global.b32 [%rd3], %r1;
624+
; PTX_70-NEXT: ret;
584625
bb:
585626
%ptrnew = select i1 %cond, ptr %input1, ptr %input2
586627
%valloaded = load i32, ptr %ptrnew, align 4

llvm/test/Transforms/SROA/prefer-integer-partition.ll

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -62,28 +62,6 @@ _ZNK4pbrt3SOAINS_10RaySamplesEEixEi.exit: ; preds = %0, %6
6262
ret <2 x float> %.sroa.01.0.copyload
6363
}
6464

65-
define void @test_float_array_only_intrinsics() {
66-
; CHECK-LABEL: @test_float_array_only_intrinsics(
67-
; CHECK-NEXT: entry:
68-
; CHECK-NEXT: ret void
69-
;
70-
entry:
71-
%src = alloca [2 x float], align 4
72-
%dst = alloca [2 x float], align 4
73-
74-
call void @llvm.lifetime.start.p0(i64 8, ptr %src)
75-
call void @llvm.lifetime.start.p0(i64 8, ptr %dst)
76-
77-
; Only intrinsic uses - no scalar loads/stores to establish common type
78-
call void @llvm.memset.p0.i64(ptr %src, i8 42, i64 8, i1 false)
79-
call void @llvm.memcpy.p0.p0.i64(ptr %dst, ptr %src, i64 8, i1 false)
80-
call void @llvm.memcpy.p0.p0.i64(ptr %src, ptr %dst, i64 8, i1 false)
81-
82-
call void @llvm.lifetime.end.p0(i64 8, ptr %dst)
83-
call void @llvm.lifetime.end.p0(i64 8, ptr %src)
84-
ret void
85-
}
86-
8765
define void @test_mixed_types() {
8866
; CHECK-LABEL: @test_mixed_types(
8967
; CHECK-NEXT: entry:

0 commit comments

Comments
 (0)