@@ -283,6 +283,8 @@ func NewConfig(arch string, types Types, ctxt *obj.Link, optimize, softfloat boo
283
283
c .RegSize = 8
284
284
c .lowerBlock = rewriteBlockLOONG64
285
285
c .lowerValue = rewriteValueLOONG64
286
+ c .lateLowerBlock = rewriteBlockLOONG64latelower
287
+ c .lateLowerValue = rewriteValueLOONG64latelower
286
288
c .registers = registersLOONG64 [:]
287
289
c .gpRegMask = gpRegMaskLOONG64
288
290
c .fpRegMask = fpRegMaskLOONG64
@@ -562,6 +564,43 @@ func (c *Config) buildRecipes(arch string) {
562
564
return m .Block .NewValue2I (m .Pos , OpARM64SUBshiftLL , m .Type , int64 (i ), x , y )
563
565
})
564
566
}
567
+ case "loong64" :
568
+ // - multiply is 4 cycles.
569
+ // - add/sub/shift are 1 cycle.
570
+ // On loong64, using a multiply also needs to load the constant into a register.
571
+ // TODO: figure out a happy medium.
572
+ mulCost = 45
573
+
574
+ // add
575
+ r (1 , 1 , 10 ,
576
+ func (m , x , y * Value ) * Value {
577
+ return m .Block .NewValue2 (m .Pos , OpLOONG64ADDV , m .Type , x , y )
578
+ })
579
+ // neg
580
+ r (- 1 , 0 , 10 ,
581
+ func (m , x , y * Value ) * Value {
582
+ return m .Block .NewValue1 (m .Pos , OpLOONG64NEGV , m .Type , x )
583
+ })
584
+ // sub
585
+ r (1 , - 1 , 10 ,
586
+ func (m , x , y * Value ) * Value {
587
+ return m .Block .NewValue2 (m .Pos , OpLOONG64SUBV , m .Type , x , y )
588
+ })
589
+
590
+ // regular shifts
591
+ for i := 1 ; i < 64 ; i ++ {
592
+ c := 10
593
+ if i == 1 {
594
+ // Prefer x<<1 over x+x.
595
+ // Note that we eventually reverse this decision in LOONG64latelower.rules,
596
+ // but this makes shift combining rules in LOONG64.rules simpler.
597
+ c --
598
+ }
599
+ r (1 << i , 0 , c ,
600
+ func (m , x , y * Value ) * Value {
601
+ return m .Block .NewValue1I (m .Pos , OpLOONG64SLLVconst , m .Type , int64 (i ), x )
602
+ })
603
+ }
565
604
}
566
605
567
606
c .mulRecipes = map [int64 ]mulRecipe {}
@@ -628,17 +667,58 @@ func (c *Config) buildRecipes(arch string) {
628
667
}
629
668
}
630
669
670
+ // Currently we only process 3 linear combination instructions for loong64.
671
+ if arch == "loong64" {
672
+ // Three-instruction recipes.
673
+ // D: The first and the second are all single-instruction recipes, and they are also the third's inputs.
674
+ // E: The first single-instruction is the second's input, and the second is the third's input.
675
+
676
+ // D
677
+ for _ , first := range linearCombos {
678
+ for _ , second := range linearCombos {
679
+ for _ , third := range linearCombos {
680
+ x := third .a * (first .a + first .b ) + third .b * (second .a + second .b )
681
+ cost := first .cost + second .cost + third .cost
682
+ old := c .mulRecipes [x ]
683
+ if (old .build == nil || cost < old .cost ) && cost < mulCost {
684
+ c .mulRecipes [x ] = mulRecipe {cost : cost , build : func (m , v * Value ) * Value {
685
+ v1 := first .build (m , v , v )
686
+ v2 := second .build (m , v , v )
687
+ return third .build (m , v1 , v2 )
688
+ }}
689
+ }
690
+ }
691
+ }
692
+ }
693
+
694
+ // E
695
+ for _ , first := range linearCombos {
696
+ for _ , second := range linearCombos {
697
+ for _ , third := range linearCombos {
698
+ x := third .a * (second .a * (first .a + first .b )+ second .b ) + third .b
699
+ cost := first .cost + second .cost + third .cost
700
+ old := c .mulRecipes [x ]
701
+ if (old .build == nil || cost < old .cost ) && cost < mulCost {
702
+ c .mulRecipes [x ] = mulRecipe {cost : cost , build : func (m , v * Value ) * Value {
703
+ v1 := first .build (m , v , v )
704
+ v2 := second .build (m , v1 , v )
705
+ return third .build (m , v2 , v )
706
+ }}
707
+ }
708
+ }
709
+ }
710
+ }
711
+ }
712
+
631
713
// These cases should be handled specially by rewrite rules.
632
714
// (Otherwise v * 1 == (neg (neg v)))
633
715
delete (c .mulRecipes , 0 )
634
716
delete (c .mulRecipes , 1 )
635
717
636
- // Currently we assume that it doesn't help to do 3 linear
637
- // combination instructions.
638
-
639
718
// Currently:
640
719
// len(c.mulRecipes) == 5984 on arm64
641
720
// 680 on amd64
721
+ // 5984 on loong64
642
722
// This function takes ~2.5ms on arm64.
643
723
//println(len(c.mulRecipes))
644
724
}
0 commit comments