|
452 | 452 |
|
453 | 453 | MOVDDUP(4 * SIZE, A1, a1)
|
454 | 454 |
|
455 |
| - movsd 0 * SIZE(YY), yy1 |
456 |
| - movhpd 1 * SIZE(YY), yy1 |
457 |
| - movsd 2 * SIZE(YY), yy2 |
458 |
| - movhpd 3 * SIZE(YY), yy2 |
459 |
| - |
460 | 455 | movapd 8 * SIZE(XX), xtemp1
|
461 | 456 | movapd 10 * SIZE(XX), xtemp2
|
462 | 457 | movapd 12 * SIZE(XX), xtemp3
|
|
475 | 470 | MOVDDUP(6 * SIZE - (4 * SIZE), A2, a2)
|
476 | 471 | ALIGN_3
|
477 | 472 |
|
| 473 | +.L12_prep: |
| 474 | + movsd 0 * SIZE(YY), yy1 |
| 475 | + movhpd 1 * SIZE(YY), yy1 |
| 476 | + movsd 2 * SIZE(YY), yy2 |
| 477 | + movhpd 3 * SIZE(YY), yy2 |
| 478 | + |
478 | 479 | .L12:
|
479 | 480 | movapd xtemp1, xt1
|
480 | 481 | mulpd a1, xt1
|
|
608 | 609 |
|
609 | 610 | movlpd yy2, 6 * SIZE(YY)
|
610 | 611 | movhpd yy2, 7 * SIZE(YY)
|
611 |
| - movsd 10 * SIZE(YY), yy2 |
612 |
| - movhpd 11 * SIZE(YY), yy2 |
613 | 612 |
|
614 | 613 | movapd xtemp2, xt1
|
615 | 614 | movapd 18 * SIZE(XX), xtemp2
|
|
621 | 620 |
|
622 | 621 | movlpd yy1, 4 * SIZE(YY)
|
623 | 622 | movhpd yy1, 5 * SIZE(YY)
|
624 |
| - movsd 8 * SIZE(YY), yy1 |
625 |
| - movhpd 9 * SIZE(YY), yy1 |
626 | 623 |
|
627 | 624 | subq $-16 * SIZE, XX
|
628 | 625 | addq $ 8 * SIZE, YY
|
629 | 626 | addq $ 8 * SIZE, A1
|
630 | 627 | addq $ 8 * SIZE, A2
|
631 | 628 |
|
632 | 629 | decq I
|
633 |
| - jg .L12 |
| 630 | + jg .L12_prep |
| 631 | + jmp .L15 |
634 | 632 | ALIGN_3
|
635 | 633 |
|
636 | 634 | .L14:
|
|
641 | 639 | jle .L16
|
642 | 640 |
|
643 | 641 | MOVDDUP(6 * SIZE - (4 * SIZE), A2, a2)
|
644 |
| - jmp .L15_pastcheck |
645 | 642 |
|
646 | 643 | .L15:
|
647 | 644 | movq M, I
|
|
650 | 647 | testq $2, I
|
651 | 648 | jle .L16
|
652 | 649 |
|
| 650 | + movsd 0 * SIZE(YY), yy1 |
| 651 | + movhpd 1 * SIZE(YY), yy1 |
| 652 | + movsd 2 * SIZE(YY), yy2 |
| 653 | + movhpd 3 * SIZE(YY), yy2 |
| 654 | + |
653 | 655 | .L15_pastcheck:
|
654 | 656 | movapd xtemp1, xt1
|
655 | 657 | mulpd a1, xt1
|
|
705 | 707 |
|
706 | 708 | movlpd yy2, 2 * SIZE(YY)
|
707 | 709 | movhpd yy2, 3 * SIZE(YY)
|
708 |
| - movsd 6 * SIZE(YY), yy2 |
709 |
| - movhpd 7 * SIZE(YY), yy2 |
710 | 710 |
|
711 | 711 | movapd xtemp2, xt1
|
712 | 712 | movapd 10 * SIZE(XX), xtemp2
|
|
717 | 717 |
|
718 | 718 | movlpd yy1, 0 * SIZE(YY)
|
719 | 719 | movhpd yy1, 1 * SIZE(YY)
|
720 |
| - movsd 4 * SIZE(YY), yy1 |
721 |
| - movhpd 5 * SIZE(YY), yy1 |
722 | 720 |
|
723 | 721 | addq $4 * SIZE, YY
|
724 | 722 | addq $4 * SIZE, A1
|
|
731 | 729 |
|
732 | 730 | MOVDDUP(1 * SIZE, A1, a2)
|
733 | 731 |
|
| 732 | + movsd 0 * SIZE(YY), yy1 |
| 733 | + movhpd 1 * SIZE(YY), yy1 |
| 734 | + |
734 | 735 | movapd xtemp1, xt1
|
735 | 736 | mulpd a1, xt1
|
736 | 737 | mulpd atemp1, a1
|
|
0 commit comments