@@ -1162,41 +1162,119 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
1162
1162
// BNE loop
1163
1163
// There's a past-the-end pointer here, any problem with that?
1164
1164
1165
- case ssa .OpARM64DUFFCOPY :
1166
- p := s .Prog (obj .ADUFFCOPY )
1167
- p .To .Type = obj .TYPE_MEM
1168
- p .To .Name = obj .NAME_EXTERN
1169
- p .To .Sym = ir .Syms .Duffcopy
1170
- p .To .Offset = v .AuxInt
1171
1165
case ssa .OpARM64LoweredMove :
1172
- // LDP.P 16(R16), (R25, Rtmp)
1173
- // STP.P (R25, Rtmp), 16(R17)
1174
- // CMP Rarg2, R16
1175
- // BLE -3(PC)
1176
- // arg2 is the address of the last element of src
1177
- p := s .Prog (arm64 .ALDP )
1178
- p .Scond = arm64 .C_XPOST
1179
- p .From .Type = obj .TYPE_MEM
1180
- p .From .Reg = arm64 .REG_R16
1181
- p .From .Offset = 16
1182
- p .To .Type = obj .TYPE_REGREG
1183
- p .To .Reg = arm64 .REG_R25
1184
- p .To .Offset = int64 (arm64 .REGTMP )
1185
- p2 := s .Prog (arm64 .ASTP )
1186
- p2 .Scond = arm64 .C_XPOST
1187
- p2 .From .Type = obj .TYPE_REGREG
1188
- p2 .From .Reg = arm64 .REG_R25
1189
- p2 .From .Offset = int64 (arm64 .REGTMP )
1190
- p2 .To .Type = obj .TYPE_MEM
1191
- p2 .To .Reg = arm64 .REG_R17
1192
- p2 .To .Offset = 16
1193
- p3 := s .Prog (arm64 .ACMP )
1194
- p3 .From .Type = obj .TYPE_REG
1195
- p3 .From .Reg = v .Args [2 ].Reg ()
1196
- p3 .Reg = arm64 .REG_R16
1197
- p4 := s .Prog (arm64 .ABLE )
1198
- p4 .To .Type = obj .TYPE_BRANCH
1199
- p4 .To .SetTarget (p )
1166
+ dstReg := v .Args [0 ].Reg ()
1167
+ srcReg := v .Args [1 ].Reg ()
1168
+ if dstReg == srcReg {
1169
+ break
1170
+ }
1171
+ tmpReg1 := int16 (arm64 .REG_R24 )
1172
+ tmpReg2 := int16 (arm64 .REG_R25 )
1173
+ n := v .AuxInt
1174
+ if n < 16 {
1175
+ v .Fatalf ("Move too small %d" , n )
1176
+ }
1177
+
1178
+ // Generate copying instructions.
1179
+ var off int64
1180
+ for n >= 16 {
1181
+ // LDP off(srcReg), (tmpReg1, tmpReg2)
1182
+ // STP (tmpReg1, tmpReg2), off(dstReg)
1183
+ move16 (s , srcReg , dstReg , tmpReg1 , tmpReg2 , off , false )
1184
+ off += 16
1185
+ n -= 16
1186
+ }
1187
+ if n > 8 {
1188
+ // MOVD off(srcReg), tmpReg1
1189
+ // MOVD tmpReg1, off(dstReg)
1190
+ move8 (s , srcReg , dstReg , tmpReg1 , off )
1191
+ off += 8
1192
+ n -= 8
1193
+ }
1194
+ if n != 0 {
1195
+ // MOVD off+n-8(srcReg), tmpReg1
1196
+ // MOVD tmpReg1, off+n-8(dstReg)
1197
+ move8 (s , srcReg , dstReg , tmpReg1 , off + n - 8 )
1198
+ }
1199
+ case ssa .OpARM64LoweredMoveLoop :
1200
+ dstReg := v .Args [0 ].Reg ()
1201
+ srcReg := v .Args [1 ].Reg ()
1202
+ if dstReg == srcReg {
1203
+ break
1204
+ }
1205
+ countReg := int16 (arm64 .REG_R23 )
1206
+ tmpReg1 := int16 (arm64 .REG_R24 )
1207
+ tmpReg2 := int16 (arm64 .REG_R25 )
1208
+ n := v .AuxInt
1209
+ loopSize := int64 (64 )
1210
+ if n < 3 * loopSize {
1211
+ // - a loop count of 0 won't work.
1212
+ // - a loop count of 1 is useless.
1213
+ // - a loop count of 2 is a code size ~tie
1214
+ // 3 instructions to implement the loop
1215
+ // 4 instructions in the loop body
1216
+ // vs
1217
+ // 8 instructions in the straightline code
1218
+ // Might as well use straightline code.
1219
+ v .Fatalf ("ZeroLoop size too small %d" , n )
1220
+ }
1221
+
1222
+ // Put iteration count in a register.
1223
+ // MOVD $n, countReg
1224
+ p := s .Prog (arm64 .AMOVD )
1225
+ p .From .Type = obj .TYPE_CONST
1226
+ p .From .Offset = n / loopSize
1227
+ p .To .Type = obj .TYPE_REG
1228
+ p .To .Reg = countReg
1229
+ cntInit := p
1230
+
1231
+ // Move loopSize bytes starting at srcReg to dstReg.
1232
+ // Increment srcReg and destReg by loopSize as a side effect.
1233
+ for range loopSize / 16 {
1234
+ // LDP.P 16(srcReg), (tmpReg1, tmpReg2)
1235
+ // STP.P (tmpReg1, tmpReg2), 16(dstReg)
1236
+ move16 (s , srcReg , dstReg , tmpReg1 , tmpReg2 , 0 , true )
1237
+ }
1238
+ // Decrement loop count.
1239
+ // SUB $1, countReg
1240
+ p = s .Prog (arm64 .ASUB )
1241
+ p .From .Type = obj .TYPE_CONST
1242
+ p .From .Offset = 1
1243
+ p .To .Type = obj .TYPE_REG
1244
+ p .To .Reg = countReg
1245
+ // Jump to loop header if we're not done yet.
1246
+ // CBNZ head
1247
+ p = s .Prog (arm64 .ACBNZ )
1248
+ p .From .Type = obj .TYPE_REG
1249
+ p .From .Reg = countReg
1250
+ p .To .Type = obj .TYPE_BRANCH
1251
+ p .To .SetTarget (cntInit .Link )
1252
+
1253
+ // Multiples of the loop size are now done.
1254
+ n %= loopSize
1255
+
1256
+ // Copy any fractional portion.
1257
+ var off int64
1258
+ for n >= 16 {
1259
+ // LDP off(srcReg), (tmpReg1, tmpReg2)
1260
+ // STP (tmpReg1, tmpReg2), off(dstReg)
1261
+ move16 (s , srcReg , dstReg , tmpReg1 , tmpReg2 , off , false )
1262
+ off += 16
1263
+ n -= 16
1264
+ }
1265
+ if n > 8 {
1266
+ // MOVD off(srcReg), tmpReg1
1267
+ // MOVD tmpReg1, off(dstReg)
1268
+ move8 (s , srcReg , dstReg , tmpReg1 , off )
1269
+ off += 8
1270
+ n -= 8
1271
+ }
1272
+ if n != 0 {
1273
+ // MOVD off+n-8(srcReg), tmpReg1
1274
+ // MOVD tmpReg1, off+n-8(dstReg)
1275
+ move8 (s , srcReg , dstReg , tmpReg1 , off + n - 8 )
1276
+ }
1277
+
1200
1278
case ssa .OpARM64CALLstatic , ssa .OpARM64CALLclosure , ssa .OpARM64CALLinter :
1201
1279
s .Call (v )
1202
1280
case ssa .OpARM64CALLtail :
@@ -1599,3 +1677,53 @@ func zero8(s *ssagen.State, reg int16, off int64) {
1599
1677
p .To .Reg = reg
1600
1678
p .To .Offset = off
1601
1679
}
1680
+
1681
+ // move16 copies 16 bytes at src+off to dst+off.
1682
+ // Uses registers tmp1 and tmp2.
1683
+ // If postInc is true, increment src and dst by 16.
1684
+ func move16 (s * ssagen.State , src , dst , tmp1 , tmp2 int16 , off int64 , postInc bool ) {
1685
+ // LDP off(src), (tmp1, tmp2)
1686
+ ld := s .Prog (arm64 .ALDP )
1687
+ ld .From .Type = obj .TYPE_MEM
1688
+ ld .From .Reg = src
1689
+ ld .From .Offset = off
1690
+ ld .To .Type = obj .TYPE_REGREG
1691
+ ld .To .Reg = tmp1
1692
+ ld .To .Offset = int64 (tmp2 )
1693
+ // STP (tmp1, tmp2), off(dst)
1694
+ st := s .Prog (arm64 .ASTP )
1695
+ st .From .Type = obj .TYPE_REGREG
1696
+ st .From .Reg = tmp1
1697
+ st .From .Offset = int64 (tmp2 )
1698
+ st .To .Type = obj .TYPE_MEM
1699
+ st .To .Reg = dst
1700
+ st .To .Offset = off
1701
+ if postInc {
1702
+ if off != 0 {
1703
+ panic ("can't postinc with non-zero offset" )
1704
+ }
1705
+ ld .Scond = arm64 .C_XPOST
1706
+ st .Scond = arm64 .C_XPOST
1707
+ ld .From .Offset = 16
1708
+ st .To .Offset = 16
1709
+ }
1710
+ }
1711
+
1712
+ // move8 copies 8 bytes at src+off to dst+off.
1713
+ // Uses register tmp.
1714
+ func move8 (s * ssagen.State , src , dst , tmp int16 , off int64 ) {
1715
+ // MOVD off(src), tmp
1716
+ ld := s .Prog (arm64 .AMOVD )
1717
+ ld .From .Type = obj .TYPE_MEM
1718
+ ld .From .Reg = src
1719
+ ld .From .Offset = off
1720
+ ld .To .Type = obj .TYPE_REG
1721
+ ld .To .Reg = tmp
1722
+ // MOVD tmp, off(dst)
1723
+ st := s .Prog (arm64 .AMOVD )
1724
+ st .From .Type = obj .TYPE_REG
1725
+ st .From .Reg = tmp
1726
+ st .To .Type = obj .TYPE_MEM
1727
+ st .To .Reg = dst
1728
+ st .To .Offset = off
1729
+ }
0 commit comments