Skip to content

Commit 7269b0b

Browse files
committed
ptx test update
1 parent 8804ace commit 7269b0b

File tree

1 file changed

+31
-53
lines changed

1 file changed

+31
-53
lines changed

llvm/test/CodeGen/NVPTX/lower-byval-args.ll

Lines changed: 31 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -455,64 +455,42 @@ define dso_local ptx_kernel void @memcpy_to_param(ptr nocapture noundef readonly
455455
; PTX-NEXT: .local .align 8 .b8 __local_depot9[8];
456456
; PTX-NEXT: .reg .b64 %SP;
457457
; PTX-NEXT: .reg .b64 %SPL;
458-
; PTX-NEXT: .reg .b32 %r<3>;
459-
; PTX-NEXT: .reg .b64 %rd<47>;
458+
; PTX-NEXT: .reg .b64 %rd<30>;
460459
; PTX-EMPTY:
461460
; PTX-NEXT: // %bb.0: // %entry
462461
; PTX-NEXT: mov.b64 %SPL, __local_depot9;
463462
; PTX-NEXT: cvta.local.u64 %SP, %SPL;
464463
; PTX-NEXT: ld.param.b64 %rd1, [memcpy_to_param_param_0];
465-
; PTX-NEXT: add.u64 %rd2, %SPL, 0;
466-
; PTX-NEXT: ld.param.b32 %r1, [memcpy_to_param_param_1+4];
467-
; PTX-NEXT: st.local.b32 [%rd2+4], %r1;
468-
; PTX-NEXT: ld.param.b32 %r2, [memcpy_to_param_param_1];
469-
; PTX-NEXT: st.local.b32 [%rd2], %r2;
470-
; PTX-NEXT: ld.volatile.b8 %rd3, [%rd1];
471-
; PTX-NEXT: ld.volatile.b8 %rd4, [%rd1+1];
472-
; PTX-NEXT: shl.b64 %rd5, %rd4, 8;
473-
; PTX-NEXT: or.b64 %rd6, %rd5, %rd3;
474-
; PTX-NEXT: ld.volatile.b8 %rd7, [%rd1+2];
475-
; PTX-NEXT: shl.b64 %rd8, %rd7, 16;
476-
; PTX-NEXT: ld.volatile.b8 %rd9, [%rd1+3];
477-
; PTX-NEXT: shl.b64 %rd10, %rd9, 24;
478-
; PTX-NEXT: or.b64 %rd11, %rd10, %rd8;
479-
; PTX-NEXT: or.b64 %rd12, %rd11, %rd6;
480-
; PTX-NEXT: ld.volatile.b8 %rd13, [%rd1+4];
481-
; PTX-NEXT: ld.volatile.b8 %rd14, [%rd1+5];
482-
; PTX-NEXT: shl.b64 %rd15, %rd14, 8;
483-
; PTX-NEXT: or.b64 %rd16, %rd15, %rd13;
484-
; PTX-NEXT: ld.volatile.b8 %rd17, [%rd1+6];
485-
; PTX-NEXT: shl.b64 %rd18, %rd17, 16;
486-
; PTX-NEXT: ld.volatile.b8 %rd19, [%rd1+7];
487-
; PTX-NEXT: shl.b64 %rd20, %rd19, 24;
488-
; PTX-NEXT: or.b64 %rd21, %rd20, %rd18;
489-
; PTX-NEXT: or.b64 %rd22, %rd21, %rd16;
490-
; PTX-NEXT: shl.b64 %rd23, %rd22, 32;
491-
; PTX-NEXT: or.b64 %rd24, %rd23, %rd12;
492-
; PTX-NEXT: st.volatile.b64 [%SP], %rd24;
493-
; PTX-NEXT: ld.volatile.b8 %rd25, [%rd1+8];
494-
; PTX-NEXT: ld.volatile.b8 %rd26, [%rd1+9];
495-
; PTX-NEXT: shl.b64 %rd27, %rd26, 8;
496-
; PTX-NEXT: or.b64 %rd28, %rd27, %rd25;
497-
; PTX-NEXT: ld.volatile.b8 %rd29, [%rd1+10];
498-
; PTX-NEXT: shl.b64 %rd30, %rd29, 16;
499-
; PTX-NEXT: ld.volatile.b8 %rd31, [%rd1+11];
500-
; PTX-NEXT: shl.b64 %rd32, %rd31, 24;
501-
; PTX-NEXT: or.b64 %rd33, %rd32, %rd30;
502-
; PTX-NEXT: or.b64 %rd34, %rd33, %rd28;
503-
; PTX-NEXT: ld.volatile.b8 %rd35, [%rd1+12];
504-
; PTX-NEXT: ld.volatile.b8 %rd36, [%rd1+13];
505-
; PTX-NEXT: shl.b64 %rd37, %rd36, 8;
506-
; PTX-NEXT: or.b64 %rd38, %rd37, %rd35;
507-
; PTX-NEXT: ld.volatile.b8 %rd39, [%rd1+14];
508-
; PTX-NEXT: shl.b64 %rd40, %rd39, 16;
509-
; PTX-NEXT: ld.volatile.b8 %rd41, [%rd1+15];
510-
; PTX-NEXT: shl.b64 %rd42, %rd41, 24;
511-
; PTX-NEXT: or.b64 %rd43, %rd42, %rd40;
512-
; PTX-NEXT: or.b64 %rd44, %rd43, %rd38;
513-
; PTX-NEXT: shl.b64 %rd45, %rd44, 32;
514-
; PTX-NEXT: or.b64 %rd46, %rd45, %rd34;
515-
; PTX-NEXT: st.volatile.b64 [%SP+8], %rd46;
464+
; PTX-NEXT: cvta.to.global.u64 %rd2, %rd1;
465+
; PTX-NEXT: ld.param.b32 %rd3, [memcpy_to_param_param_1+4];
466+
; PTX-NEXT: shl.b64 %rd4, %rd3, 32;
467+
; PTX-NEXT: ld.param.b32 %rd5, [memcpy_to_param_param_1];
468+
; PTX-NEXT: or.b64 %rd6, %rd4, %rd5;
469+
; PTX-NEXT: st.b64 [%SP], %rd6;
470+
; PTX-NEXT: ld.volatile.global.b8 %rd7, [%rd2];
471+
; PTX-NEXT: ld.volatile.global.b8 %rd8, [%rd2+1];
472+
; PTX-NEXT: shl.b64 %rd9, %rd8, 8;
473+
; PTX-NEXT: or.b64 %rd10, %rd9, %rd7;
474+
; PTX-NEXT: ld.volatile.global.b8 %rd11, [%rd2+2];
475+
; PTX-NEXT: shl.b64 %rd12, %rd11, 16;
476+
; PTX-NEXT: ld.volatile.global.b8 %rd13, [%rd2+3];
477+
; PTX-NEXT: shl.b64 %rd14, %rd13, 24;
478+
; PTX-NEXT: or.b64 %rd15, %rd14, %rd12;
479+
; PTX-NEXT: or.b64 %rd16, %rd15, %rd10;
480+
; PTX-NEXT: ld.volatile.global.b8 %rd17, [%rd2+4];
481+
; PTX-NEXT: ld.volatile.global.b8 %rd18, [%rd2+5];
482+
; PTX-NEXT: shl.b64 %rd19, %rd18, 8;
483+
; PTX-NEXT: or.b64 %rd20, %rd19, %rd17;
484+
; PTX-NEXT: ld.volatile.global.b8 %rd21, [%rd2+6];
485+
; PTX-NEXT: shl.b64 %rd22, %rd21, 16;
486+
; PTX-NEXT: ld.volatile.global.b8 %rd23, [%rd2+7];
487+
; PTX-NEXT: shl.b64 %rd24, %rd23, 24;
488+
; PTX-NEXT: or.b64 %rd25, %rd24, %rd22;
489+
; PTX-NEXT: or.b64 %rd26, %rd25, %rd20;
490+
; PTX-NEXT: shl.b64 %rd27, %rd26, 32;
491+
; PTX-NEXT: or.b64 %rd28, %rd27, %rd16;
492+
; PTX-NEXT: add.u64 %rd29, %SPL, 0;
493+
; PTX-NEXT: st.local.b64 [%rd29], %rd28;
516494
; PTX-NEXT: ret;
517495
entry:
518496
tail call void @llvm.memcpy.p0.p0.i64(ptr %s, ptr %in, i64 16, i1 true)

0 commit comments

Comments
 (0)