@@ -38,7 +38,7 @@ import dmd.backend.ty;
3838import dmd.backend.type;
3939import dmd.backend.x86.xmm;
4040import dmd.backend.arm.cod1 : loadFromEA, storeToEA, getlvalue, CLIB_A , callclib;
41- import dmd.backend.arm.cod3 : conditionCode, genBranch, gentstreg, movregconst, COND , loadFloatRegConst;
41+ import dmd.backend.arm.cod3 : conditionCode, genBranch, genCompBranch, gentstreg, movregconst, COND , loadFloatRegConst;
4242import dmd.backend.arm.instr;
4343
4444nothrow :
@@ -1297,104 +1297,118 @@ private void cdmemsetn(ref CGstate cg, ref CodeBuilder cdb,elem* e,ref regm_t pr
12971297 elem* e2 = e.E2 ;
12981298 assert (e2.Eoper == OPparam);
12991299
1300- elem* evalue = e2.E2 ;
1301- elem* enelems = e2.E1 ;
1300+ /* evalue: value to store
1301+ szv: size in bytes of value
1302+ tyv: type of value
1303+ vregs: mask of registers for evalue
1304+ Rv: register holding evalue
1305+ Rvhi: MSW register holding evalue for 2*REGSIZE
13021306
1303- tym_t tymv = tybasic(evalue.Ety);
1304- const sz = tysize(evalue.Ety);
1305- assert ( cast ( int )sz > 1 );
1307+ enelems: count of evalues to store
1308+ cregs: mask of registers enelems is in
1309+ Rc: register with count
13061310
1307- if (tyxmmreg(tymv) && config.fpxmmregs)
1308- assert (0 ); // fix later
1309- if (tyfloating(tymv) && config.inline8087)
1310- assert (0 ); // fix later
1311+ e.E1: pointer to destination
1312+ dregs: mask of registers for pointer to destination
1313+ Rd: pointer to destination, return value
13111314
1312- const grex = I64 ? (REX_W << 16 ) : 0 ;
1315+ Rp: incrementing pointer
1316+ Rlim: limit pointer value
1317+ */
13131318
1314- // get the count of elems into CX
1315- regm_t mregcx = mCX;
1316- codelem(cgstate,cdb,enelems,mregcx,false );
1319+ // Set cregs to count of elems
1320+ elem* enelems = e2.E1 ;
1321+ regm_t cregs = cg.allregs & ~ pretregs;
1322+ if (! cregs)
1323+ cregs = cg.allregs;
1324+ codelem(cgstate,cdb,enelems,cregs,false );
13171325
1318- // Get value into AX
1319- regm_t retregs3 = cgstate.allregs & ~ mregcx;
1320- if (sz == 2 * REGSIZE )
1321- retregs3 &= ~ (mBP | IDXREGS ); // BP cannot be used for register pair,
1322- // IDXREGS could deplete index regs - see sdtor.d test14815()
1323- scodelem(cgstate,cdb,evalue,retregs3,mregcx,false );
1326+ // Set vregs to value
1327+ elem* evalue = e2.E2 ;
1328+ tym_t tyv = tybasic(evalue.Ety);
1329+ const szv = tysize(tyv);
1330+ assert (cast (int )szv > 1 );
1331+ if (tyfloating(tyv))
1332+ assert (0 ); // TODO AArch64
1333+ regm_t vregs = cgstate.allregs & ~ cregs;
1334+ scodelem(cgstate,cdb,evalue,vregs,cregs,false );
13241335
13251336 /* Necessary because if evalue calls a function, and that function never returns,
13261337 * it doesn't affect registers. Which means those registers can be used for enregistering
13271338 * variables, and next pass fails because it can't use those registers, and so cannot
1328- * allocate registers for retregs3 . See ice11596.d
1339+ * allocate registers for vregs . See ice11596.d
13291340 */
1330- useregs(retregs3 );
1341+ useregs(vregs );
13311342
1332- reg_t valreg = findreg(retregs3);
1333- reg_t valreghi;
1334- if (sz == 2 * REGSIZE )
1343+ // Set [Rvhi,Rv] to value
1344+ reg_t Rv = findreg(vregs);
1345+ reg_t Rvhi = NOREG ;
1346+ if (szv == 2 * REGSIZE )
13351347 {
1336- valreg = findreglsw(retregs3 );
1337- valreghi = findregmsw(retregs3 );
1348+ Rv = findreg(vregs & INSTR . LSW );
1349+ Rvhi = findreg(vregs & INSTR . MSW );
13381350 }
13391351
13401352 freenode(e2);
13411353
1342- // Get s into ES:DI
1343- regm_t mregidx = IDXREGS & ~ (mregcx | retregs3);
1344- assert (mregidx);
1345- tym_t ty1 = tybasic(e.E1 .Ety);
1346- if (! tyreg(ty1))
1347- mregidx |= mES;
1348- scodelem(cgstate,cdb,e.E1 ,mregidx,mregcx | retregs3,false );
1349- reg_t idxreg = findreg(mregidx);
1354+ // Set Rd to destination
1355+ regm_t dregs = cg.allregs & ~ (cregs | vregs);
1356+ assert (dregs);
1357+ scodelem(cgstate,cdb,e.E1 ,dregs,cregs | vregs,false );
1358+ reg_t Rd = findreg(dregs);
13501359
1351- regm_t mregbx = 0 ;
1360+ reg_t Rp = Rd ;
13521361 if (pretregs) // if need return value
13531362 {
1354- mregbx = pretregs & ~ (mregidx | mregcx | retregs3 );
1355- if (! mregbx )
1356- mregbx = cgstate.allregs & ~ (mregidx | mregcx | retregs3 );
1357- const regbx = allocreg(cdb, mregbx , TYnptr);
1358- getregs(cdb, mregbx );
1359- genmovreg(cdb,regbx,idxreg ); // MOV BX,DI
1363+ regm_t mRp = pretregs & ~ (dregs | cregs | vregs );
1364+ if (! mRp )
1365+ mRp = cgstate.allregs & ~ (dregs | cregs | vregs );
1366+ Rp = allocreg(cdb, mRp , TYnptr);
1367+ getregs(cdb, mRp );
1368+ genmovreg(cdb,Rp,Rd ); // MOV Rp,Rd
13601369 }
13611370
1362- getregs(cdb,mask(idxreg) | mCX); // modify DI and CX
1371+ // allocate limit register Rl
1372+ regm_t lims = cg.allregs & ~ (dregs | cregs | vregs | mask(Rp));
1373+ const Rl = allocreg(cdb, lims, TYnptr);
1374+
1375+ getregs(cdb,mask(Rp) | lims); // modify Rp,Rl
13631376
13641377 /* Generate:
1365- * JCXZ L1
1366- * L2:
1367- * MOV [idxreg],AX
1368- * ADD idxreg,sz
1369- * LOOP L2
1370- * L1:
1371- * NOP
1378+ cbz Rc, L1
1379+ add Rl, Rd, Rc, uxtw #2
1380+ mov Rp, Rd
1381+ L2: str Rv, [Rp], #4
1382+ cmp Rp, Rl
1383+ b.ne L2
1384+ L1: nop
13721385 */
1373- code* c1 = gennop(null );
1374- genjmp(cdb, JCXZ , FL .code, cast (block* )c1);
1375- code cs;
1376- buildEA(&cs,idxreg,- 1 ,1 ,0 );
1377- cs.Iop = 0x89 ;
1378- if (! I16 && sz == 2 )
1379- cs.Iflags |= CFopsize;
1380- if (I64 && sz == 8 )
1381- cs.Irex |= REX_W ;
1382- code_newreg(&cs, valreg);
1383- cdb.gen(&cs); // MOV [idxreg],AX
1384- code* c2 = cdb.last();
1385- if (sz == REGSIZE * 2 )
1386- {
1387- cs.IEV1 .Vuns = REGSIZE ;
1388- code_newreg(&cs, valreghi);
1389- cdb.gen(&cs); // MOV REGSIZE[idxreg],DX
1390- }
1391- cdb.genc2(0x81 , grex | modregrmx(3 ,0 ,idxreg), sz); // ADD idxreg,sz
1392- genjmp(cdb, LOOP , FL .code, cast (block* )c2); // LOOP L2
1386+ reg_t Rc = findreg(cregs);
1387+ code* c1 = gen1(null , INSTR .nop);
1388+ uint sf = tysize(enelems.Ety) == 8 ;
1389+ genCompBranch(cdb,sf,Rc,0 ,FL .code,cast (block* )c1); // cbz Rc,c1
1390+
1391+ // http://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#addsub_ext
1392+ uint op = 0 ; // add
1393+ uint S = 0 ; // don't set flags
1394+ uint opt = 0 ;
1395+ uint option = tyToExtend(enelems.Ety);
1396+ uint opc;
1397+ uint imm3;
1398+ INSTR .szToSizeOpc(szv,imm3,opc); // shift 0..4
1399+ assert (szv != REGSIZE * 2 ); // TODO AArch64
1400+ cdb.gen1(INSTR .addsub_ext(1 ,op,S,opt,Rc,option,imm3,Rd,Rl));
1401+
1402+ if (Rp != Rd)
1403+ genmovreg(cdb,Rp,Rd);
1404+
1405+ cdb.gen1(INSTR .ldst_immpost(imm3,0 ,0 ,0 ,Rp,Rv)); // L2: STR Rv,[Rp],#0 // *Rp++ = Rv
1406+ code* L2 = cdb.last();
1407+ cdb.gen1(INSTR .cmp_shift(1 ,Rl,0 ,0 ,Rp)); // CMP Rp,Rl
1408+ genBranch(cdb,COND .ne,FL .code,cast (block* )L2 ); // b.ne L2
13931409 cdb.append(c1);
13941410
1395- cgstate.regimmed_set(CX , 0 ); // CX is now 0
1396-
1397- fixresult(cdb,e,mregbx,pretregs);
1411+ fixresult(cdb,e,dregs,pretregs);
13981412}
13991413
14001414/* *********************
0 commit comments