Skip to content

Commit 0f04398

Browse files
authored
implement cdmemsetn() (dlang#21554)
1 parent 31329c7 commit 0f04398

File tree

2 files changed

+117
-75
lines changed

2 files changed

+117
-75
lines changed

compiler/src/dmd/backend/arm/cod2.d

Lines changed: 87 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ import dmd.backend.ty;
3838
import dmd.backend.type;
3939
import dmd.backend.x86.xmm;
4040
import dmd.backend.arm.cod1 : loadFromEA, storeToEA, getlvalue, CLIB_A, callclib;
41-
import dmd.backend.arm.cod3 : conditionCode, genBranch, gentstreg, movregconst, COND, loadFloatRegConst;
41+
import dmd.backend.arm.cod3 : conditionCode, genBranch, genCompBranch, gentstreg, movregconst, COND, loadFloatRegConst;
4242
import dmd.backend.arm.instr;
4343

4444
nothrow:
@@ -1297,104 +1297,118 @@ private void cdmemsetn(ref CGstate cg, ref CodeBuilder cdb,elem* e,ref regm_t pr
12971297
elem* e2 = e.E2;
12981298
assert(e2.Eoper == OPparam);
12991299

1300-
elem* evalue = e2.E2;
1301-
elem* enelems = e2.E1;
1300+
/* evalue: value to store
1301+
szv: size in bytes of value
1302+
tyv: type of value
1303+
vregs: mask of registers for evalue
1304+
Rv: register holding evalue
1305+
Rvhi: MSW register holding evalue for 2*REGSIZE
13021306
1303-
tym_t tymv = tybasic(evalue.Ety);
1304-
const sz = tysize(evalue.Ety);
1305-
assert(cast(int)sz > 1);
1307+
enelems: count of evalues to store
1308+
cregs: mask of registers enelems is in
1309+
Rc: register with count
13061310
1307-
if (tyxmmreg(tymv) && config.fpxmmregs)
1308-
assert(0); // fix later
1309-
if (tyfloating(tymv) && config.inline8087)
1310-
assert(0); // fix later
1311+
e.E1: pointer to destination
1312+
dregs: mask of registers for pointer to destination
1313+
Rd: pointer to destination, return value
13111314
1312-
const grex = I64 ? (REX_W << 16) : 0;
1315+
Rp: incrementing pointer
1316+
Rlim: limit pointer value
1317+
*/
13131318

1314-
// get the count of elems into CX
1315-
regm_t mregcx = mCX;
1316-
codelem(cgstate,cdb,enelems,mregcx,false);
1319+
// Set cregs to count of elems
1320+
elem* enelems = e2.E1;
1321+
regm_t cregs = cg.allregs & ~pretregs;
1322+
if (!cregs)
1323+
cregs = cg.allregs;
1324+
codelem(cgstate,cdb,enelems,cregs,false);
13171325

1318-
// Get value into AX
1319-
regm_t retregs3 = cgstate.allregs & ~mregcx;
1320-
if (sz == 2 * REGSIZE)
1321-
retregs3 &= ~(mBP | IDXREGS); // BP cannot be used for register pair,
1322-
// IDXREGS could deplete index regs - see sdtor.d test14815()
1323-
scodelem(cgstate,cdb,evalue,retregs3,mregcx,false);
1326+
// Set vregs to value
1327+
elem* evalue = e2.E2;
1328+
tym_t tyv = tybasic(evalue.Ety);
1329+
const szv = tysize(tyv);
1330+
assert(cast(int)szv > 1);
1331+
if (tyfloating(tyv))
1332+
assert(0); // TODO AArch64
1333+
regm_t vregs = cgstate.allregs & ~cregs;
1334+
scodelem(cgstate,cdb,evalue,vregs,cregs,false);
13241335

13251336
/* Necessary because if evalue calls a function, and that function never returns,
13261337
* it doesn't affect registers. Which means those registers can be used for enregistering
13271338
* variables, and next pass fails because it can't use those registers, and so cannot
1328-
* allocate registers for retregs3. See ice11596.d
1339+
* allocate registers for vregs. See ice11596.d
13291340
*/
1330-
useregs(retregs3);
1341+
useregs(vregs);
13311342

1332-
reg_t valreg = findreg(retregs3);
1333-
reg_t valreghi;
1334-
if (sz == 2 * REGSIZE)
1343+
// Set [Rvhi,Rv] to value
1344+
reg_t Rv = findreg(vregs);
1345+
reg_t Rvhi = NOREG;
1346+
if (szv == 2 * REGSIZE)
13351347
{
1336-
valreg = findreglsw(retregs3);
1337-
valreghi = findregmsw(retregs3);
1348+
Rv = findreg(vregs & INSTR.LSW);
1349+
Rvhi = findreg(vregs & INSTR.MSW);
13381350
}
13391351

13401352
freenode(e2);
13411353

1342-
// Get s into ES:DI
1343-
regm_t mregidx = IDXREGS & ~(mregcx | retregs3);
1344-
assert(mregidx);
1345-
tym_t ty1 = tybasic(e.E1.Ety);
1346-
if (!tyreg(ty1))
1347-
mregidx |= mES;
1348-
scodelem(cgstate,cdb,e.E1,mregidx,mregcx | retregs3,false);
1349-
reg_t idxreg = findreg(mregidx);
1354+
// Set Rd to destination
1355+
regm_t dregs = cg.allregs & ~(cregs | vregs);
1356+
assert(dregs);
1357+
scodelem(cgstate,cdb,e.E1,dregs,cregs | vregs,false);
1358+
reg_t Rd = findreg(dregs);
13501359

1351-
regm_t mregbx = 0;
1360+
reg_t Rp = Rd;
13521361
if (pretregs) // if need return value
13531362
{
1354-
mregbx = pretregs & ~(mregidx | mregcx | retregs3);
1355-
if (!mregbx)
1356-
mregbx = cgstate.allregs & ~(mregidx | mregcx | retregs3);
1357-
const regbx = allocreg(cdb, mregbx, TYnptr);
1358-
getregs(cdb, mregbx);
1359-
genmovreg(cdb,regbx,idxreg); // MOV BX,DI
1363+
regm_t mRp = pretregs & ~(dregs | cregs | vregs);
1364+
if (!mRp)
1365+
mRp = cgstate.allregs & ~(dregs | cregs | vregs);
1366+
Rp = allocreg(cdb, mRp, TYnptr);
1367+
getregs(cdb, mRp);
1368+
genmovreg(cdb,Rp,Rd); // MOV Rp,Rd
13601369
}
13611370

1362-
getregs(cdb,mask(idxreg) | mCX); // modify DI and CX
1371+
// allocate limit register Rl
1372+
regm_t lims = cg.allregs & ~(dregs | cregs | vregs | mask(Rp));
1373+
const Rl = allocreg(cdb, lims, TYnptr);
1374+
1375+
getregs(cdb,mask(Rp) | lims); // modify Rp,Rl
13631376

13641377
/* Generate:
1365-
* JCXZ L1
1366-
* L2:
1367-
* MOV [idxreg],AX
1368-
* ADD idxreg,sz
1369-
* LOOP L2
1370-
* L1:
1371-
* NOP
1378+
cbz Rc, L1
1379+
add Rl, Rd, Rc, uxtw #2
1380+
mov Rp, Rd
1381+
L2: str Rv, [Rp], #4
1382+
cmp Rp, Rl
1383+
b.ne L2
1384+
L1: nop
13721385
*/
1373-
code* c1 = gennop(null);
1374-
genjmp(cdb, JCXZ, FL.code, cast(block*)c1);
1375-
code cs;
1376-
buildEA(&cs,idxreg,-1,1,0);
1377-
cs.Iop = 0x89;
1378-
if (!I16 && sz == 2)
1379-
cs.Iflags |= CFopsize;
1380-
if (I64 && sz == 8)
1381-
cs.Irex |= REX_W;
1382-
code_newreg(&cs, valreg);
1383-
cdb.gen(&cs); // MOV [idxreg],AX
1384-
code* c2 = cdb.last();
1385-
if (sz == REGSIZE * 2)
1386-
{
1387-
cs.IEV1.Vuns = REGSIZE;
1388-
code_newreg(&cs, valreghi);
1389-
cdb.gen(&cs); // MOV REGSIZE[idxreg],DX
1390-
}
1391-
cdb.genc2(0x81, grex | modregrmx(3,0,idxreg), sz); // ADD idxreg,sz
1392-
genjmp(cdb, LOOP, FL.code, cast(block*)c2); // LOOP L2
1386+
reg_t Rc = findreg(cregs);
1387+
code* c1 = gen1(null, INSTR.nop);
1388+
uint sf = tysize(enelems.Ety) == 8;
1389+
genCompBranch(cdb,sf,Rc,0,FL.code,cast(block*)c1); // cbz Rc,c1
1390+
1391+
// http://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#addsub_ext
1392+
uint op = 0; // add
1393+
uint S = 0; // don't set flags
1394+
uint opt = 0;
1395+
uint option = tyToExtend(enelems.Ety);
1396+
uint opc;
1397+
uint imm3;
1398+
INSTR.szToSizeOpc(szv,imm3,opc); // shift 0..4
1399+
assert(szv != REGSIZE * 2); // TODO AArch64
1400+
cdb.gen1(INSTR.addsub_ext(1,op,S,opt,Rc,option,imm3,Rd,Rl));
1401+
1402+
if (Rp != Rd)
1403+
genmovreg(cdb,Rp,Rd);
1404+
1405+
cdb.gen1(INSTR.ldst_immpost(imm3,0,0,0,Rp,Rv)); // L2: STR Rv,[Rp],#0 // *Rp++ = Rv
1406+
code* L2 = cdb.last();
1407+
cdb.gen1(INSTR.cmp_shift(1,Rl,0,0,Rp)); // CMP Rp,Rl
1408+
genBranch(cdb,COND.ne,FL.code,cast(block*)L2); // b.ne L2
13931409
cdb.append(c1);
13941410

1395-
cgstate.regimmed_set(CX, 0); // CX is now 0
1396-
1397-
fixresult(cdb,e,mregbx,pretregs);
1411+
fixresult(cdb,e,dregs,pretregs);
13981412
}
13991413

14001414
/**********************

compiler/src/dmd/backend/arm/cod3.d

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,8 @@ void REGSAVE_restore(const ref REGSAVE regsave, ref CodeBuilder cdb, reg_t reg,
131131

132132
// https://www.scs.stanford.edu/~zyedidia/arm64/b_cond.html
133133
// https://www.scs.stanford.edu/~zyedidia/arm64/bc_cond.html
134-
bool isBranch(uint ins) { return (ins & 0xFF00_0000) == 0x5400_0000; }
134+
bool isBranch(uint ins) { return ((ins & 0xFF00_0000) == 0x5400_0000) ||
135+
((ins & 0x7E00_0000) == 0x3400_0000); }
135136

136137
enum MARS = true;
137138

@@ -321,7 +322,8 @@ void gentstreg(ref CodeBuilder cdb, reg_t reg, uint sf)
321322
// genshift
322323

323324
/**************************
324-
* Generate a jump instruction.
325+
* Generate a conditional branch (immediate) instruction.
326+
* https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#condbranch
325327
*/
326328

327329
@trusted
@@ -337,6 +339,32 @@ void genBranch(ref CodeBuilder cdb, COND cond, FL fltarg, block* targ)
337339
cdb.gen(&cs);
338340
}
339341

342+
/**************************
343+
* Generate a compare and branch (immediate) instruction.
344+
* https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#compbranch
345+
* Params:
346+
* cdb = code sink
347+
* sf = 1 for 64 bit, 0 for 32
348+
* R = register
349+
* op = true for testing for not zero
350+
* fltarg = FL.block or FL.code
351+
* targ = block or code target
352+
*/
353+
354+
@trusted
355+
void genCompBranch(ref CodeBuilder cdb, uint sf, reg_t R, bool op, FL fltarg, block* targ)
356+
{
357+
code cs;
358+
uint imm19 = 0; // offset is 0 for now, fix in codout()
359+
cs.Iop = INSTR.compbranch(sf, op, imm19, R);
360+
cs.Iflags = 0;
361+
cs.IFL1 = fltarg; // FL.block (or FL.code)
362+
cs.IEV1.Vblock = targ; // target block (or code)
363+
if (fltarg == FL.code)
364+
(cast(code*)targ).Iflags |= CFtarg;
365+
cdb.gen(&cs);
366+
}
367+
340368
// prolog_ifunc
341369
// prolog_ifunc2
342370
// prolog_16bit_windows_farfunc

0 commit comments

Comments
 (0)