@@ -343,61 +343,77 @@ define <4 x i8> @test_smax(<4 x i8> %a, <4 x i8> %b) #0 {
343
343
; O0-LABEL: test_smax(
344
344
; O0: {
345
345
; O0-NEXT: .reg .pred %p<5>;
346
- ; O0-NEXT: .reg .b32 %r<18 >;
346
+ ; O0-NEXT: .reg .b32 %r<26 >;
347
347
; O0-EMPTY:
348
348
; O0-NEXT: // %bb.0:
349
349
; O0-NEXT: ld.param.b32 %r2, [test_smax_param_1];
350
350
; O0-NEXT: ld.param.b32 %r1, [test_smax_param_0];
351
- ; O0-NEXT: prmt.b32 %r3, %r2, 0, 0x7770U ;
352
- ; O0-NEXT: prmt.b32 %r4, %r1, 0, 0x7770U ;
351
+ ; O0-NEXT: prmt.b32 %r3, %r2, 0, 0x8880U ;
352
+ ; O0-NEXT: prmt.b32 %r4, %r1, 0, 0x8880U ;
353
353
; O0-NEXT: setp.gt.s32 %p1, %r4, %r3;
354
- ; O0-NEXT: prmt.b32 %r5, %r2, 0, 0x7771U ;
355
- ; O0-NEXT: prmt.b32 %r6, %r1, 0, 0x7771U ;
354
+ ; O0-NEXT: prmt.b32 %r5, %r2, 0, 0x9991U ;
355
+ ; O0-NEXT: prmt.b32 %r6, %r1, 0, 0x9991U ;
356
356
; O0-NEXT: setp.gt.s32 %p2, %r6, %r5;
357
- ; O0-NEXT: prmt.b32 %r7, %r2, 0, 0x7772U ;
358
- ; O0-NEXT: prmt.b32 %r8, %r1, 0, 0x7772U ;
357
+ ; O0-NEXT: prmt.b32 %r7, %r2, 0, 0xaaa2U ;
358
+ ; O0-NEXT: prmt.b32 %r8, %r1, 0, 0xaaa2U ;
359
359
; O0-NEXT: setp.gt.s32 %p3, %r8, %r7;
360
- ; O0-NEXT: prmt.b32 %r9, %r2, 0, 0x7773U ;
361
- ; O0-NEXT: prmt.b32 %r10, %r1, 0, 0x7773U ;
360
+ ; O0-NEXT: prmt.b32 %r9, %r2, 0, 0xbbb3U ;
361
+ ; O0-NEXT: prmt.b32 %r10, %r1, 0, 0xbbb3U ;
362
362
; O0-NEXT: setp.gt.s32 %p4, %r10, %r9;
363
- ; O0-NEXT: selp.b32 %r11, %r10, %r9, %p4;
364
- ; O0-NEXT: selp.b32 %r12, %r8, %r7, %p3;
365
- ; O0-NEXT: prmt.b32 %r13, %r12, %r11, 0x3340U;
366
- ; O0-NEXT: selp.b32 %r14, %r6, %r5, %p2;
367
- ; O0-NEXT: selp.b32 %r15, %r4, %r3, %p1;
368
- ; O0-NEXT: prmt.b32 %r16, %r15, %r14, 0x3340U;
369
- ; O0-NEXT: prmt.b32 %r17, %r16, %r13, 0x5410U;
370
- ; O0-NEXT: st.param.b32 [func_retval0], %r17;
363
+ ; O0-NEXT: prmt.b32 %r11, %r2, 0, 0x7770U;
364
+ ; O0-NEXT: prmt.b32 %r12, %r2, 0, 0x7771U;
365
+ ; O0-NEXT: prmt.b32 %r13, %r2, 0, 0x7772U;
366
+ ; O0-NEXT: prmt.b32 %r14, %r2, 0, 0x7773U;
367
+ ; O0-NEXT: prmt.b32 %r15, %r1, 0, 0x7773U;
368
+ ; O0-NEXT: selp.b32 %r16, %r15, %r14, %p4;
369
+ ; O0-NEXT: prmt.b32 %r17, %r1, 0, 0x7772U;
370
+ ; O0-NEXT: selp.b32 %r18, %r17, %r13, %p3;
371
+ ; O0-NEXT: prmt.b32 %r19, %r18, %r16, 0x3340U;
372
+ ; O0-NEXT: prmt.b32 %r20, %r1, 0, 0x7771U;
373
+ ; O0-NEXT: selp.b32 %r21, %r20, %r12, %p2;
374
+ ; O0-NEXT: prmt.b32 %r22, %r1, 0, 0x7770U;
375
+ ; O0-NEXT: selp.b32 %r23, %r22, %r11, %p1;
376
+ ; O0-NEXT: prmt.b32 %r24, %r23, %r21, 0x3340U;
377
+ ; O0-NEXT: prmt.b32 %r25, %r24, %r19, 0x5410U;
378
+ ; O0-NEXT: st.param.b32 [func_retval0], %r25;
371
379
; O0-NEXT: ret;
372
380
;
373
381
; O3-LABEL: test_smax(
374
382
; O3: {
375
383
; O3-NEXT: .reg .pred %p<5>;
376
- ; O3-NEXT: .reg .b32 %r<18 >;
384
+ ; O3-NEXT: .reg .b32 %r<26 >;
377
385
; O3-EMPTY:
378
386
; O3-NEXT: // %bb.0:
379
387
; O3-NEXT: ld.param.b32 %r1, [test_smax_param_0];
380
388
; O3-NEXT: ld.param.b32 %r2, [test_smax_param_1];
381
- ; O3-NEXT: prmt.b32 %r3, %r2, 0, 0x7770U ;
382
- ; O3-NEXT: prmt.b32 %r4, %r1, 0, 0x7770U ;
389
+ ; O3-NEXT: prmt.b32 %r3, %r2, 0, 0x8880U ;
390
+ ; O3-NEXT: prmt.b32 %r4, %r1, 0, 0x8880U ;
383
391
; O3-NEXT: setp.gt.s32 %p1, %r4, %r3;
384
- ; O3-NEXT: prmt.b32 %r5, %r2, 0, 0x7771U ;
385
- ; O3-NEXT: prmt.b32 %r6, %r1, 0, 0x7771U ;
392
+ ; O3-NEXT: prmt.b32 %r5, %r2, 0, 0x9991U ;
393
+ ; O3-NEXT: prmt.b32 %r6, %r1, 0, 0x9991U ;
386
394
; O3-NEXT: setp.gt.s32 %p2, %r6, %r5;
387
- ; O3-NEXT: prmt.b32 %r7, %r2, 0, 0x7772U ;
388
- ; O3-NEXT: prmt.b32 %r8, %r1, 0, 0x7772U ;
395
+ ; O3-NEXT: prmt.b32 %r7, %r2, 0, 0xaaa2U ;
396
+ ; O3-NEXT: prmt.b32 %r8, %r1, 0, 0xaaa2U ;
389
397
; O3-NEXT: setp.gt.s32 %p3, %r8, %r7;
390
- ; O3-NEXT: prmt.b32 %r9, %r2, 0, 0x7773U ;
391
- ; O3-NEXT: prmt.b32 %r10, %r1, 0, 0x7773U ;
398
+ ; O3-NEXT: prmt.b32 %r9, %r2, 0, 0xbbb3U ;
399
+ ; O3-NEXT: prmt.b32 %r10, %r1, 0, 0xbbb3U ;
392
400
; O3-NEXT: setp.gt.s32 %p4, %r10, %r9;
393
- ; O3-NEXT: selp.b32 %r11, %r10, %r9, %p4;
394
- ; O3-NEXT: selp.b32 %r12, %r8, %r7, %p3;
395
- ; O3-NEXT: prmt.b32 %r13, %r12, %r11, 0x3340U;
396
- ; O3-NEXT: selp.b32 %r14, %r6, %r5, %p2;
397
- ; O3-NEXT: selp.b32 %r15, %r4, %r3, %p1;
398
- ; O3-NEXT: prmt.b32 %r16, %r15, %r14, 0x3340U;
399
- ; O3-NEXT: prmt.b32 %r17, %r16, %r13, 0x5410U;
400
- ; O3-NEXT: st.param.b32 [func_retval0], %r17;
401
+ ; O3-NEXT: prmt.b32 %r11, %r2, 0, 0x7770U;
402
+ ; O3-NEXT: prmt.b32 %r12, %r2, 0, 0x7771U;
403
+ ; O3-NEXT: prmt.b32 %r13, %r2, 0, 0x7772U;
404
+ ; O3-NEXT: prmt.b32 %r14, %r2, 0, 0x7773U;
405
+ ; O3-NEXT: prmt.b32 %r15, %r1, 0, 0x7773U;
406
+ ; O3-NEXT: selp.b32 %r16, %r15, %r14, %p4;
407
+ ; O3-NEXT: prmt.b32 %r17, %r1, 0, 0x7772U;
408
+ ; O3-NEXT: selp.b32 %r18, %r17, %r13, %p3;
409
+ ; O3-NEXT: prmt.b32 %r19, %r18, %r16, 0x3340U;
410
+ ; O3-NEXT: prmt.b32 %r20, %r1, 0, 0x7771U;
411
+ ; O3-NEXT: selp.b32 %r21, %r20, %r12, %p2;
412
+ ; O3-NEXT: prmt.b32 %r22, %r1, 0, 0x7770U;
413
+ ; O3-NEXT: selp.b32 %r23, %r22, %r11, %p1;
414
+ ; O3-NEXT: prmt.b32 %r24, %r23, %r21, 0x3340U;
415
+ ; O3-NEXT: prmt.b32 %r25, %r24, %r19, 0x5410U;
416
+ ; O3-NEXT: st.param.b32 [func_retval0], %r25;
401
417
; O3-NEXT: ret;
402
418
%cmp = icmp sgt <4 x i8 > %a , %b
403
419
%r = select <4 x i1 > %cmp , <4 x i8 > %a , <4 x i8 > %b
@@ -473,61 +489,77 @@ define <4 x i8> @test_smin(<4 x i8> %a, <4 x i8> %b) #0 {
473
489
; O0-LABEL: test_smin(
474
490
; O0: {
475
491
; O0-NEXT: .reg .pred %p<5>;
476
- ; O0-NEXT: .reg .b32 %r<18 >;
492
+ ; O0-NEXT: .reg .b32 %r<26 >;
477
493
; O0-EMPTY:
478
494
; O0-NEXT: // %bb.0:
479
495
; O0-NEXT: ld.param.b32 %r2, [test_smin_param_1];
480
496
; O0-NEXT: ld.param.b32 %r1, [test_smin_param_0];
481
- ; O0-NEXT: prmt.b32 %r3, %r2, 0, 0x7770U ;
482
- ; O0-NEXT: prmt.b32 %r4, %r1, 0, 0x7770U ;
497
+ ; O0-NEXT: prmt.b32 %r3, %r2, 0, 0x8880U ;
498
+ ; O0-NEXT: prmt.b32 %r4, %r1, 0, 0x8880U ;
483
499
; O0-NEXT: setp.le.s32 %p1, %r4, %r3;
484
- ; O0-NEXT: prmt.b32 %r5, %r2, 0, 0x7771U ;
485
- ; O0-NEXT: prmt.b32 %r6, %r1, 0, 0x7771U ;
500
+ ; O0-NEXT: prmt.b32 %r5, %r2, 0, 0x9991U ;
501
+ ; O0-NEXT: prmt.b32 %r6, %r1, 0, 0x9991U ;
486
502
; O0-NEXT: setp.le.s32 %p2, %r6, %r5;
487
- ; O0-NEXT: prmt.b32 %r7, %r2, 0, 0x7772U ;
488
- ; O0-NEXT: prmt.b32 %r8, %r1, 0, 0x7772U ;
503
+ ; O0-NEXT: prmt.b32 %r7, %r2, 0, 0xaaa2U ;
504
+ ; O0-NEXT: prmt.b32 %r8, %r1, 0, 0xaaa2U ;
489
505
; O0-NEXT: setp.le.s32 %p3, %r8, %r7;
490
- ; O0-NEXT: prmt.b32 %r9, %r2, 0, 0x7773U ;
491
- ; O0-NEXT: prmt.b32 %r10, %r1, 0, 0x7773U ;
506
+ ; O0-NEXT: prmt.b32 %r9, %r2, 0, 0xbbb3U ;
507
+ ; O0-NEXT: prmt.b32 %r10, %r1, 0, 0xbbb3U ;
492
508
; O0-NEXT: setp.le.s32 %p4, %r10, %r9;
493
- ; O0-NEXT: selp.b32 %r11, %r10, %r9, %p4;
494
- ; O0-NEXT: selp.b32 %r12, %r8, %r7, %p3;
495
- ; O0-NEXT: prmt.b32 %r13, %r12, %r11, 0x3340U;
496
- ; O0-NEXT: selp.b32 %r14, %r6, %r5, %p2;
497
- ; O0-NEXT: selp.b32 %r15, %r4, %r3, %p1;
498
- ; O0-NEXT: prmt.b32 %r16, %r15, %r14, 0x3340U;
499
- ; O0-NEXT: prmt.b32 %r17, %r16, %r13, 0x5410U;
500
- ; O0-NEXT: st.param.b32 [func_retval0], %r17;
509
+ ; O0-NEXT: prmt.b32 %r11, %r2, 0, 0x7770U;
510
+ ; O0-NEXT: prmt.b32 %r12, %r2, 0, 0x7771U;
511
+ ; O0-NEXT: prmt.b32 %r13, %r2, 0, 0x7772U;
512
+ ; O0-NEXT: prmt.b32 %r14, %r2, 0, 0x7773U;
513
+ ; O0-NEXT: prmt.b32 %r15, %r1, 0, 0x7773U;
514
+ ; O0-NEXT: selp.b32 %r16, %r15, %r14, %p4;
515
+ ; O0-NEXT: prmt.b32 %r17, %r1, 0, 0x7772U;
516
+ ; O0-NEXT: selp.b32 %r18, %r17, %r13, %p3;
517
+ ; O0-NEXT: prmt.b32 %r19, %r18, %r16, 0x3340U;
518
+ ; O0-NEXT: prmt.b32 %r20, %r1, 0, 0x7771U;
519
+ ; O0-NEXT: selp.b32 %r21, %r20, %r12, %p2;
520
+ ; O0-NEXT: prmt.b32 %r22, %r1, 0, 0x7770U;
521
+ ; O0-NEXT: selp.b32 %r23, %r22, %r11, %p1;
522
+ ; O0-NEXT: prmt.b32 %r24, %r23, %r21, 0x3340U;
523
+ ; O0-NEXT: prmt.b32 %r25, %r24, %r19, 0x5410U;
524
+ ; O0-NEXT: st.param.b32 [func_retval0], %r25;
501
525
; O0-NEXT: ret;
502
526
;
503
527
; O3-LABEL: test_smin(
504
528
; O3: {
505
529
; O3-NEXT: .reg .pred %p<5>;
506
- ; O3-NEXT: .reg .b32 %r<18 >;
530
+ ; O3-NEXT: .reg .b32 %r<26 >;
507
531
; O3-EMPTY:
508
532
; O3-NEXT: // %bb.0:
509
533
; O3-NEXT: ld.param.b32 %r1, [test_smin_param_0];
510
534
; O3-NEXT: ld.param.b32 %r2, [test_smin_param_1];
511
- ; O3-NEXT: prmt.b32 %r3, %r2, 0, 0x7770U ;
512
- ; O3-NEXT: prmt.b32 %r4, %r1, 0, 0x7770U ;
535
+ ; O3-NEXT: prmt.b32 %r3, %r2, 0, 0x8880U ;
536
+ ; O3-NEXT: prmt.b32 %r4, %r1, 0, 0x8880U ;
513
537
; O3-NEXT: setp.le.s32 %p1, %r4, %r3;
514
- ; O3-NEXT: prmt.b32 %r5, %r2, 0, 0x7771U ;
515
- ; O3-NEXT: prmt.b32 %r6, %r1, 0, 0x7771U ;
538
+ ; O3-NEXT: prmt.b32 %r5, %r2, 0, 0x9991U ;
539
+ ; O3-NEXT: prmt.b32 %r6, %r1, 0, 0x9991U ;
516
540
; O3-NEXT: setp.le.s32 %p2, %r6, %r5;
517
- ; O3-NEXT: prmt.b32 %r7, %r2, 0, 0x7772U ;
518
- ; O3-NEXT: prmt.b32 %r8, %r1, 0, 0x7772U ;
541
+ ; O3-NEXT: prmt.b32 %r7, %r2, 0, 0xaaa2U ;
542
+ ; O3-NEXT: prmt.b32 %r8, %r1, 0, 0xaaa2U ;
519
543
; O3-NEXT: setp.le.s32 %p3, %r8, %r7;
520
- ; O3-NEXT: prmt.b32 %r9, %r2, 0, 0x7773U ;
521
- ; O3-NEXT: prmt.b32 %r10, %r1, 0, 0x7773U ;
544
+ ; O3-NEXT: prmt.b32 %r9, %r2, 0, 0xbbb3U ;
545
+ ; O3-NEXT: prmt.b32 %r10, %r1, 0, 0xbbb3U ;
522
546
; O3-NEXT: setp.le.s32 %p4, %r10, %r9;
523
- ; O3-NEXT: selp.b32 %r11, %r10, %r9, %p4;
524
- ; O3-NEXT: selp.b32 %r12, %r8, %r7, %p3;
525
- ; O3-NEXT: prmt.b32 %r13, %r12, %r11, 0x3340U;
526
- ; O3-NEXT: selp.b32 %r14, %r6, %r5, %p2;
527
- ; O3-NEXT: selp.b32 %r15, %r4, %r3, %p1;
528
- ; O3-NEXT: prmt.b32 %r16, %r15, %r14, 0x3340U;
529
- ; O3-NEXT: prmt.b32 %r17, %r16, %r13, 0x5410U;
530
- ; O3-NEXT: st.param.b32 [func_retval0], %r17;
547
+ ; O3-NEXT: prmt.b32 %r11, %r2, 0, 0x7770U;
548
+ ; O3-NEXT: prmt.b32 %r12, %r2, 0, 0x7771U;
549
+ ; O3-NEXT: prmt.b32 %r13, %r2, 0, 0x7772U;
550
+ ; O3-NEXT: prmt.b32 %r14, %r2, 0, 0x7773U;
551
+ ; O3-NEXT: prmt.b32 %r15, %r1, 0, 0x7773U;
552
+ ; O3-NEXT: selp.b32 %r16, %r15, %r14, %p4;
553
+ ; O3-NEXT: prmt.b32 %r17, %r1, 0, 0x7772U;
554
+ ; O3-NEXT: selp.b32 %r18, %r17, %r13, %p3;
555
+ ; O3-NEXT: prmt.b32 %r19, %r18, %r16, 0x3340U;
556
+ ; O3-NEXT: prmt.b32 %r20, %r1, 0, 0x7771U;
557
+ ; O3-NEXT: selp.b32 %r21, %r20, %r12, %p2;
558
+ ; O3-NEXT: prmt.b32 %r22, %r1, 0, 0x7770U;
559
+ ; O3-NEXT: selp.b32 %r23, %r22, %r11, %p1;
560
+ ; O3-NEXT: prmt.b32 %r24, %r23, %r21, 0x3340U;
561
+ ; O3-NEXT: prmt.b32 %r25, %r24, %r19, 0x5410U;
562
+ ; O3-NEXT: st.param.b32 [func_retval0], %r25;
531
563
; O3-NEXT: ret;
532
564
%cmp = icmp sle <4 x i8 > %a , %b
533
565
%r = select <4 x i1 > %cmp , <4 x i8 > %a , <4 x i8 > %b
0 commit comments