@@ -343,61 +343,77 @@ define <4 x i8> @test_smax(<4 x i8> %a, <4 x i8> %b) #0 {
343343; O0-LABEL: test_smax(
344344; O0: {
345345; O0-NEXT: .reg .pred %p<5>;
346- ; O0-NEXT: .reg .b32 %r<18 >;
346+ ; O0-NEXT: .reg .b32 %r<26 >;
347347; O0-EMPTY:
348348; O0-NEXT: // %bb.0:
349349; O0-NEXT: ld.param.b32 %r2, [test_smax_param_1];
350350; O0-NEXT: ld.param.b32 %r1, [test_smax_param_0];
351- ; O0-NEXT: prmt.b32 %r3, %r2, 0, 0x7770U ;
352- ; O0-NEXT: prmt.b32 %r4, %r1, 0, 0x7770U ;
351+ ; O0-NEXT: prmt.b32 %r3, %r2, 0, 0x8880U ;
352+ ; O0-NEXT: prmt.b32 %r4, %r1, 0, 0x8880U ;
353353; O0-NEXT: setp.gt.s32 %p1, %r4, %r3;
354- ; O0-NEXT: prmt.b32 %r5, %r2, 0, 0x7771U ;
355- ; O0-NEXT: prmt.b32 %r6, %r1, 0, 0x7771U ;
354+ ; O0-NEXT: prmt.b32 %r5, %r2, 0, 0x9991U ;
355+ ; O0-NEXT: prmt.b32 %r6, %r1, 0, 0x9991U ;
356356; O0-NEXT: setp.gt.s32 %p2, %r6, %r5;
357- ; O0-NEXT: prmt.b32 %r7, %r2, 0, 0x7772U ;
358- ; O0-NEXT: prmt.b32 %r8, %r1, 0, 0x7772U ;
357+ ; O0-NEXT: prmt.b32 %r7, %r2, 0, 0xaaa2U ;
358+ ; O0-NEXT: prmt.b32 %r8, %r1, 0, 0xaaa2U ;
359359; O0-NEXT: setp.gt.s32 %p3, %r8, %r7;
360- ; O0-NEXT: prmt.b32 %r9, %r2, 0, 0x7773U ;
361- ; O0-NEXT: prmt.b32 %r10, %r1, 0, 0x7773U ;
360+ ; O0-NEXT: prmt.b32 %r9, %r2, 0, 0xbbb3U ;
361+ ; O0-NEXT: prmt.b32 %r10, %r1, 0, 0xbbb3U ;
362362; O0-NEXT: setp.gt.s32 %p4, %r10, %r9;
363- ; O0-NEXT: selp.b32 %r11, %r10, %r9, %p4;
364- ; O0-NEXT: selp.b32 %r12, %r8, %r7, %p3;
365- ; O0-NEXT: prmt.b32 %r13, %r12, %r11, 0x3340U;
366- ; O0-NEXT: selp.b32 %r14, %r6, %r5, %p2;
367- ; O0-NEXT: selp.b32 %r15, %r4, %r3, %p1;
368- ; O0-NEXT: prmt.b32 %r16, %r15, %r14, 0x3340U;
369- ; O0-NEXT: prmt.b32 %r17, %r16, %r13, 0x5410U;
370- ; O0-NEXT: st.param.b32 [func_retval0], %r17;
363+ ; O0-NEXT: prmt.b32 %r11, %r2, 0, 0x7770U;
364+ ; O0-NEXT: prmt.b32 %r12, %r2, 0, 0x7771U;
365+ ; O0-NEXT: prmt.b32 %r13, %r2, 0, 0x7772U;
366+ ; O0-NEXT: prmt.b32 %r14, %r2, 0, 0x7773U;
367+ ; O0-NEXT: prmt.b32 %r15, %r1, 0, 0x7773U;
368+ ; O0-NEXT: selp.b32 %r16, %r15, %r14, %p4;
369+ ; O0-NEXT: prmt.b32 %r17, %r1, 0, 0x7772U;
370+ ; O0-NEXT: selp.b32 %r18, %r17, %r13, %p3;
371+ ; O0-NEXT: prmt.b32 %r19, %r18, %r16, 0x3340U;
372+ ; O0-NEXT: prmt.b32 %r20, %r1, 0, 0x7771U;
373+ ; O0-NEXT: selp.b32 %r21, %r20, %r12, %p2;
374+ ; O0-NEXT: prmt.b32 %r22, %r1, 0, 0x7770U;
375+ ; O0-NEXT: selp.b32 %r23, %r22, %r11, %p1;
376+ ; O0-NEXT: prmt.b32 %r24, %r23, %r21, 0x3340U;
377+ ; O0-NEXT: prmt.b32 %r25, %r24, %r19, 0x5410U;
378+ ; O0-NEXT: st.param.b32 [func_retval0], %r25;
371379; O0-NEXT: ret;
372380;
373381; O3-LABEL: test_smax(
374382; O3: {
375383; O3-NEXT: .reg .pred %p<5>;
376- ; O3-NEXT: .reg .b32 %r<18 >;
384+ ; O3-NEXT: .reg .b32 %r<26 >;
377385; O3-EMPTY:
378386; O3-NEXT: // %bb.0:
379387; O3-NEXT: ld.param.b32 %r1, [test_smax_param_0];
380388; O3-NEXT: ld.param.b32 %r2, [test_smax_param_1];
381- ; O3-NEXT: prmt.b32 %r3, %r2, 0, 0x7770U ;
382- ; O3-NEXT: prmt.b32 %r4, %r1, 0, 0x7770U ;
389+ ; O3-NEXT: prmt.b32 %r3, %r2, 0, 0x8880U ;
390+ ; O3-NEXT: prmt.b32 %r4, %r1, 0, 0x8880U ;
383391; O3-NEXT: setp.gt.s32 %p1, %r4, %r3;
384- ; O3-NEXT: prmt.b32 %r5, %r2, 0, 0x7771U ;
385- ; O3-NEXT: prmt.b32 %r6, %r1, 0, 0x7771U ;
392+ ; O3-NEXT: prmt.b32 %r5, %r2, 0, 0x9991U ;
393+ ; O3-NEXT: prmt.b32 %r6, %r1, 0, 0x9991U ;
386394; O3-NEXT: setp.gt.s32 %p2, %r6, %r5;
387- ; O3-NEXT: prmt.b32 %r7, %r2, 0, 0x7772U ;
388- ; O3-NEXT: prmt.b32 %r8, %r1, 0, 0x7772U ;
395+ ; O3-NEXT: prmt.b32 %r7, %r2, 0, 0xaaa2U ;
396+ ; O3-NEXT: prmt.b32 %r8, %r1, 0, 0xaaa2U ;
389397; O3-NEXT: setp.gt.s32 %p3, %r8, %r7;
390- ; O3-NEXT: prmt.b32 %r9, %r2, 0, 0x7773U ;
391- ; O3-NEXT: prmt.b32 %r10, %r1, 0, 0x7773U ;
398+ ; O3-NEXT: prmt.b32 %r9, %r2, 0, 0xbbb3U ;
399+ ; O3-NEXT: prmt.b32 %r10, %r1, 0, 0xbbb3U ;
392400; O3-NEXT: setp.gt.s32 %p4, %r10, %r9;
393- ; O3-NEXT: selp.b32 %r11, %r10, %r9, %p4;
394- ; O3-NEXT: selp.b32 %r12, %r8, %r7, %p3;
395- ; O3-NEXT: prmt.b32 %r13, %r12, %r11, 0x3340U;
396- ; O3-NEXT: selp.b32 %r14, %r6, %r5, %p2;
397- ; O3-NEXT: selp.b32 %r15, %r4, %r3, %p1;
398- ; O3-NEXT: prmt.b32 %r16, %r15, %r14, 0x3340U;
399- ; O3-NEXT: prmt.b32 %r17, %r16, %r13, 0x5410U;
400- ; O3-NEXT: st.param.b32 [func_retval0], %r17;
401+ ; O3-NEXT: prmt.b32 %r11, %r2, 0, 0x7770U;
402+ ; O3-NEXT: prmt.b32 %r12, %r2, 0, 0x7771U;
403+ ; O3-NEXT: prmt.b32 %r13, %r2, 0, 0x7772U;
404+ ; O3-NEXT: prmt.b32 %r14, %r2, 0, 0x7773U;
405+ ; O3-NEXT: prmt.b32 %r15, %r1, 0, 0x7773U;
406+ ; O3-NEXT: selp.b32 %r16, %r15, %r14, %p4;
407+ ; O3-NEXT: prmt.b32 %r17, %r1, 0, 0x7772U;
408+ ; O3-NEXT: selp.b32 %r18, %r17, %r13, %p3;
409+ ; O3-NEXT: prmt.b32 %r19, %r18, %r16, 0x3340U;
410+ ; O3-NEXT: prmt.b32 %r20, %r1, 0, 0x7771U;
411+ ; O3-NEXT: selp.b32 %r21, %r20, %r12, %p2;
412+ ; O3-NEXT: prmt.b32 %r22, %r1, 0, 0x7770U;
413+ ; O3-NEXT: selp.b32 %r23, %r22, %r11, %p1;
414+ ; O3-NEXT: prmt.b32 %r24, %r23, %r21, 0x3340U;
415+ ; O3-NEXT: prmt.b32 %r25, %r24, %r19, 0x5410U;
416+ ; O3-NEXT: st.param.b32 [func_retval0], %r25;
401417; O3-NEXT: ret;
402418 %cmp = icmp sgt <4 x i8 > %a , %b
403419 %r = select <4 x i1 > %cmp , <4 x i8 > %a , <4 x i8 > %b
@@ -473,61 +489,77 @@ define <4 x i8> @test_smin(<4 x i8> %a, <4 x i8> %b) #0 {
473489; O0-LABEL: test_smin(
474490; O0: {
475491; O0-NEXT: .reg .pred %p<5>;
476- ; O0-NEXT: .reg .b32 %r<18 >;
492+ ; O0-NEXT: .reg .b32 %r<26 >;
477493; O0-EMPTY:
478494; O0-NEXT: // %bb.0:
479495; O0-NEXT: ld.param.b32 %r2, [test_smin_param_1];
480496; O0-NEXT: ld.param.b32 %r1, [test_smin_param_0];
481- ; O0-NEXT: prmt.b32 %r3, %r2, 0, 0x7770U ;
482- ; O0-NEXT: prmt.b32 %r4, %r1, 0, 0x7770U ;
497+ ; O0-NEXT: prmt.b32 %r3, %r2, 0, 0x8880U ;
498+ ; O0-NEXT: prmt.b32 %r4, %r1, 0, 0x8880U ;
483499; O0-NEXT: setp.le.s32 %p1, %r4, %r3;
484- ; O0-NEXT: prmt.b32 %r5, %r2, 0, 0x7771U ;
485- ; O0-NEXT: prmt.b32 %r6, %r1, 0, 0x7771U ;
500+ ; O0-NEXT: prmt.b32 %r5, %r2, 0, 0x9991U ;
501+ ; O0-NEXT: prmt.b32 %r6, %r1, 0, 0x9991U ;
486502; O0-NEXT: setp.le.s32 %p2, %r6, %r5;
487- ; O0-NEXT: prmt.b32 %r7, %r2, 0, 0x7772U ;
488- ; O0-NEXT: prmt.b32 %r8, %r1, 0, 0x7772U ;
503+ ; O0-NEXT: prmt.b32 %r7, %r2, 0, 0xaaa2U ;
504+ ; O0-NEXT: prmt.b32 %r8, %r1, 0, 0xaaa2U ;
489505; O0-NEXT: setp.le.s32 %p3, %r8, %r7;
490- ; O0-NEXT: prmt.b32 %r9, %r2, 0, 0x7773U ;
491- ; O0-NEXT: prmt.b32 %r10, %r1, 0, 0x7773U ;
506+ ; O0-NEXT: prmt.b32 %r9, %r2, 0, 0xbbb3U ;
507+ ; O0-NEXT: prmt.b32 %r10, %r1, 0, 0xbbb3U ;
492508; O0-NEXT: setp.le.s32 %p4, %r10, %r9;
493- ; O0-NEXT: selp.b32 %r11, %r10, %r9, %p4;
494- ; O0-NEXT: selp.b32 %r12, %r8, %r7, %p3;
495- ; O0-NEXT: prmt.b32 %r13, %r12, %r11, 0x3340U;
496- ; O0-NEXT: selp.b32 %r14, %r6, %r5, %p2;
497- ; O0-NEXT: selp.b32 %r15, %r4, %r3, %p1;
498- ; O0-NEXT: prmt.b32 %r16, %r15, %r14, 0x3340U;
499- ; O0-NEXT: prmt.b32 %r17, %r16, %r13, 0x5410U;
500- ; O0-NEXT: st.param.b32 [func_retval0], %r17;
509+ ; O0-NEXT: prmt.b32 %r11, %r2, 0, 0x7770U;
510+ ; O0-NEXT: prmt.b32 %r12, %r2, 0, 0x7771U;
511+ ; O0-NEXT: prmt.b32 %r13, %r2, 0, 0x7772U;
512+ ; O0-NEXT: prmt.b32 %r14, %r2, 0, 0x7773U;
513+ ; O0-NEXT: prmt.b32 %r15, %r1, 0, 0x7773U;
514+ ; O0-NEXT: selp.b32 %r16, %r15, %r14, %p4;
515+ ; O0-NEXT: prmt.b32 %r17, %r1, 0, 0x7772U;
516+ ; O0-NEXT: selp.b32 %r18, %r17, %r13, %p3;
517+ ; O0-NEXT: prmt.b32 %r19, %r18, %r16, 0x3340U;
518+ ; O0-NEXT: prmt.b32 %r20, %r1, 0, 0x7771U;
519+ ; O0-NEXT: selp.b32 %r21, %r20, %r12, %p2;
520+ ; O0-NEXT: prmt.b32 %r22, %r1, 0, 0x7770U;
521+ ; O0-NEXT: selp.b32 %r23, %r22, %r11, %p1;
522+ ; O0-NEXT: prmt.b32 %r24, %r23, %r21, 0x3340U;
523+ ; O0-NEXT: prmt.b32 %r25, %r24, %r19, 0x5410U;
524+ ; O0-NEXT: st.param.b32 [func_retval0], %r25;
501525; O0-NEXT: ret;
502526;
503527; O3-LABEL: test_smin(
504528; O3: {
505529; O3-NEXT: .reg .pred %p<5>;
506- ; O3-NEXT: .reg .b32 %r<18 >;
530+ ; O3-NEXT: .reg .b32 %r<26 >;
507531; O3-EMPTY:
508532; O3-NEXT: // %bb.0:
509533; O3-NEXT: ld.param.b32 %r1, [test_smin_param_0];
510534; O3-NEXT: ld.param.b32 %r2, [test_smin_param_1];
511- ; O3-NEXT: prmt.b32 %r3, %r2, 0, 0x7770U ;
512- ; O3-NEXT: prmt.b32 %r4, %r1, 0, 0x7770U ;
535+ ; O3-NEXT: prmt.b32 %r3, %r2, 0, 0x8880U ;
536+ ; O3-NEXT: prmt.b32 %r4, %r1, 0, 0x8880U ;
513537; O3-NEXT: setp.le.s32 %p1, %r4, %r3;
514- ; O3-NEXT: prmt.b32 %r5, %r2, 0, 0x7771U ;
515- ; O3-NEXT: prmt.b32 %r6, %r1, 0, 0x7771U ;
538+ ; O3-NEXT: prmt.b32 %r5, %r2, 0, 0x9991U ;
539+ ; O3-NEXT: prmt.b32 %r6, %r1, 0, 0x9991U ;
516540; O3-NEXT: setp.le.s32 %p2, %r6, %r5;
517- ; O3-NEXT: prmt.b32 %r7, %r2, 0, 0x7772U ;
518- ; O3-NEXT: prmt.b32 %r8, %r1, 0, 0x7772U ;
541+ ; O3-NEXT: prmt.b32 %r7, %r2, 0, 0xaaa2U ;
542+ ; O3-NEXT: prmt.b32 %r8, %r1, 0, 0xaaa2U ;
519543; O3-NEXT: setp.le.s32 %p3, %r8, %r7;
520- ; O3-NEXT: prmt.b32 %r9, %r2, 0, 0x7773U ;
521- ; O3-NEXT: prmt.b32 %r10, %r1, 0, 0x7773U ;
544+ ; O3-NEXT: prmt.b32 %r9, %r2, 0, 0xbbb3U ;
545+ ; O3-NEXT: prmt.b32 %r10, %r1, 0, 0xbbb3U ;
522546; O3-NEXT: setp.le.s32 %p4, %r10, %r9;
523- ; O3-NEXT: selp.b32 %r11, %r10, %r9, %p4;
524- ; O3-NEXT: selp.b32 %r12, %r8, %r7, %p3;
525- ; O3-NEXT: prmt.b32 %r13, %r12, %r11, 0x3340U;
526- ; O3-NEXT: selp.b32 %r14, %r6, %r5, %p2;
527- ; O3-NEXT: selp.b32 %r15, %r4, %r3, %p1;
528- ; O3-NEXT: prmt.b32 %r16, %r15, %r14, 0x3340U;
529- ; O3-NEXT: prmt.b32 %r17, %r16, %r13, 0x5410U;
530- ; O3-NEXT: st.param.b32 [func_retval0], %r17;
547+ ; O3-NEXT: prmt.b32 %r11, %r2, 0, 0x7770U;
548+ ; O3-NEXT: prmt.b32 %r12, %r2, 0, 0x7771U;
549+ ; O3-NEXT: prmt.b32 %r13, %r2, 0, 0x7772U;
550+ ; O3-NEXT: prmt.b32 %r14, %r2, 0, 0x7773U;
551+ ; O3-NEXT: prmt.b32 %r15, %r1, 0, 0x7773U;
552+ ; O3-NEXT: selp.b32 %r16, %r15, %r14, %p4;
553+ ; O3-NEXT: prmt.b32 %r17, %r1, 0, 0x7772U;
554+ ; O3-NEXT: selp.b32 %r18, %r17, %r13, %p3;
555+ ; O3-NEXT: prmt.b32 %r19, %r18, %r16, 0x3340U;
556+ ; O3-NEXT: prmt.b32 %r20, %r1, 0, 0x7771U;
557+ ; O3-NEXT: selp.b32 %r21, %r20, %r12, %p2;
558+ ; O3-NEXT: prmt.b32 %r22, %r1, 0, 0x7770U;
559+ ; O3-NEXT: selp.b32 %r23, %r22, %r11, %p1;
560+ ; O3-NEXT: prmt.b32 %r24, %r23, %r21, 0x3340U;
561+ ; O3-NEXT: prmt.b32 %r25, %r24, %r19, 0x5410U;
562+ ; O3-NEXT: st.param.b32 [func_retval0], %r25;
531563; O3-NEXT: ret;
532564 %cmp = icmp sle <4 x i8 > %a , %b
533565 %r = select <4 x i1 > %cmp , <4 x i8 > %a , <4 x i8 > %b
0 commit comments