Skip to content

Commit 602b596

Browse files
committed
don't zero created fields
1 parent b366cfd commit 602b596

File tree

5 files changed

+105
-257
lines changed

5 files changed

+105
-257
lines changed

src/field/fieldET.nim

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,9 +137,18 @@ proc new*[V:static[int],T](x:var FieldObj[V,T]; l:Layout[V]) =
137137
x.s.new(l.nSitesOuter)
138138
#fence()
139139
x.elemSize = sizeOf(T)
140+
proc newU*[V:static[int],T](x:var FieldObj[V,T]; l:Layout[V]) =
141+
# remember to change newFieldArray if the following changes
142+
x.l = l
143+
x.s.newU(l.nSitesOuter)
144+
#fence()
145+
x.elemSize = sizeOf(T)
140146
proc new*[V:static[int],T](x:var Field[V,T]; l:Layout[V]) =
141147
x.new()
142148
new(x[], l)
149+
proc newU*[V:static[int],T](x:var Field[V,T]; l:Layout[V]) =
150+
x.new()
151+
newU(x[], l)
143152
proc new*[V:static[int],T](x:var FieldObj[V,T]; y:Field) = x.new(y.l)
144153
proc new*[V:static[int],T](x:var Field[V,T]; y:Field) = x.new(y.l)
145154
proc newField*[V:static[int],T](l:Layout[V]; t:typedesc[T]):Field[V,T] =
@@ -148,6 +157,10 @@ proc newOneOf*(x: Field): auto =
148157
var r: type(x)
149158
r.new(x.l)
150159
r
160+
proc newOneOfU*(x: Field): auto =
161+
var r: type(x)
162+
r.newU(x.l)
163+
r
151164
template l*(x: FieldUnop): untyped = x.f1.l
152165
proc newOneOf*(x: FieldUnop): auto =
153166
var r: evalType(x)

src/gauge/gaugeUtils.nim

Lines changed: 35 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,7 @@ proc staples*[T,F,B](ss,uu,vv:openArray[T];ff:openArray[F];bb:openArray[B]) =
220220

221221
proc plaq*[T](uu: openArray[T]): auto =
222222
mixin mul, load1, createShiftBufs
223-
tic()
223+
tic("plaq")
224224
template getIp(mu,nu: int): int = ((mu*(mu-1)) div 2) + nu
225225
let u = cast[ptr cArray[T]](unsafeAddr(uu[0]))
226226
let lo = u[0].l
@@ -233,17 +233,17 @@ proc plaq*[T](uu: openArray[T]): auto =
233233
for i in 0..<nd-1: sf[nd-1][i] = sf[i][i]
234234
let np = (nd*(nd-1)) div 2
235235
var pl = newSeq[float64](np)
236-
toc("plaq setup")
236+
toc("setup")
237237
threads:
238-
tic()
238+
tic("plaq threads")
239239
#var plt = newSeq[float64](np)
240240
var plt: array[6,float64]
241241
var umunu,unumu: type(load1(u[0][0]))
242242
for mu in 0..<nd:
243243
for nu in 0..<nd:
244244
if mu != nu:
245245
startSB(sf[mu][nu], u[mu][ix])
246-
toc("plaq start shifts")
246+
toc("start shifts")
247247
for ir in u[0]:
248248
for mu in 1..<nd:
249249
for nu in 0..<mu:
@@ -253,13 +253,13 @@ proc plaq*[T](uu: openArray[T]): auto =
253253
let ip = getIp(mu,nu)
254254
let dt = redot(umunu,unumu)
255255
plt[ip] += simdSum(dt)
256-
toc("plaq local")
256+
toc("local")
257257
var needBoundary = false
258258
for mu in 0..<nd:
259259
for nu in 0..<nd:
260260
if mu != nu:
261261
boundaryWaitSB(sf[mu][nu]): needBoundary = true
262-
toc("plaq wait")
262+
toc("wait")
263263
if needBoundary:
264264
boundarySyncSB()
265265
for ir in u[0]:
@@ -279,15 +279,15 @@ proc plaq*[T](uu: openArray[T]): auto =
279279
let ip = getIp(mu,nu)
280280
let dt = redot(umunu,unumu)
281281
plt[ip] += simdSum(dt)
282-
toc("plaq boundary")
282+
toc("boundary")
283283
threadSum(plt)
284284
if threadNum == 0:
285285
for i in 0..<pl.len:
286286
pl[i] = plt[i]/(lo.physVol.float*float(np*nc))
287287
rankSum(pl)
288-
toc("plaq sum")
288+
toc("sum")
289289
result = pl
290-
toc("plaq end", flops=lo.nSites.float*float(np*(2*8*nc*nc*nc-1)))
290+
toc("end", flops=lo.nSites.float*float(np*(2*8*nc*nc*nc-1)))
291291

292292
template plaq*(g: Gauge): auto = plaq(g.u)
293293

@@ -324,7 +324,7 @@ proc staples*[T,A,F,B](staples,uu,vv:openArray[T]; aa:openArray[A];
324324

325325
proc plaq2*[T](gg:openArray[T]):auto =
326326
mixin adj
327-
tic()
327+
tic("plaq2")
328328
let g = cast[ptr cArray[T]](unsafeAddr(gg[0]))
329329
let lo = g[0].l
330330
let nd = lo.nDim
@@ -335,57 +335,61 @@ proc plaq2*[T](gg:openArray[T]):auto =
335335
var s1 = lo.ColorMatrix()
336336
#var t1 = lo.ColorMatrix()
337337
var tr:type(trace(m))
338-
toc("plaq2 setup")
338+
toc("setup")
339339
threads:
340-
#tic()
340+
tic("plaq2 threads")
341341
m := 0
342-
#toc("plaq2 zero")
342+
toc("zero")
343343
for mu in 1..<nd:
344344
for nu in 0..<mu:
345-
#tic()
345+
tic("plaq2 loop")
346+
#toc("before shift1")
346347
shift(s0, mu,1, g[nu])
347-
#toc("plaq2 shift1")
348+
toc("shift1")
348349
shift(s1, nu,1, g[mu])
349-
#toc("plaq2 shift2")
350+
toc("shift2")
350351
#echo "s0: ", trace(s0)
351352
#echo "s1: ", trace(s1)
352353
m += (g[mu]*s0) * (g[nu]*s1).adj
353354
#m += (g[mu]*s0) * (g[nu]*s1)
354355
#echo mu, " ", nu, " ", trace(m)/nc
355-
#toc("plaq2 mul")
356-
#toc("plaq2 work")
356+
toc("mul")
357+
toc("work")
357358
tr = trace(m)
358-
#toc("plaq2 trace")
359-
toc("plaq2 threads")
359+
toc("trace")
360+
toc("end")
360361
result = tr/(lo.physVol.float*0.5*float(nd*(nd-1)*nc))
361362

362363
proc plaq3*[T](g: seq[T]): auto =
363364
mixin adj, newTransporters
364-
tic()
365+
tic("plaq3")
365366
let lo = g[0].l
366367
let nd = lo.nDim
367368
let nc = g[0][0].ncols
368369
let t = newTransporters(g, g[0], 1)
369370
var m = lo.ColorMatrix()
370371
var tr: type(trace(m))
371-
toc("plaq3 setup")
372+
toc("setup")
372373
threads:
373-
tic()
374+
tic("plaq3 threads")
374375
m := 0
375-
toc("plaq3 zero")
376+
toc("zero")
376377
for mu in 1..<nd:
377378
for nu in 0..<mu:
378-
tic()
379+
tic("plaq3 loop")
379380
#m += (t[mu]^*g[nu]) * (t[nu]^*g[mu]).adj
380-
discard t[mu]^*g[nu]
381-
discard t[nu]^*g[mu]
381+
discard t[mu]^*!g[nu]
382+
toc("transport1")
383+
discard t[nu]^*!g[mu]
384+
threadBarrier()
385+
toc("transport2")
382386
m += t[mu].field * t[nu].field.adj
383387
#echo mu, " ", nu, " ", trace(m)/nc
384-
toc("plaq3 mul")
385-
toc("plaq3 work")
388+
toc("mul")
389+
toc("work")
386390
tr = trace(m)
387-
toc("plaq3 trace")
388-
toc("plaq3 threads")
391+
toc("trace")
392+
toc("end")
389393
result = tr/(lo.physVol.float*0.5*float(nd*(nd-1)*nc))
390394

391395
proc echoPlaq*(g: auto) =

src/gauge/symanzik1loopAction.nim

Lines changed: 10 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -43,20 +43,13 @@ proc symanzik1loopAction*[T](coeffs: GaugeActionCoeffs;
4343
nflops = newThreadSingle(0)
4444
U = newSeq[GF](nd)
4545
Uf = newSeq2d[Shft](nd,nd)
46-
#var mu: cint = 0
47-
#while mu < nd:
4846
for mu in 0..<nd:
4947
U[mu] = links[mu]
50-
#var nu: cint = 0
51-
#while nu < nd:
5248
for nu in 0..<nd:
53-
#if nu == mu: continue
54-
#Uf[mu,nu] = links[0].newOneOf
55-
#QDP_M_eq_sM(Uf[mu][nu], U[mu], neighbor[nu], QDP_forward, sub)
5649
if nu != mu:
50+
#Uf[mu,nu] = links[0].newOneOf
51+
#QDP_M_eq_sM(Uf[mu][nu], U[mu], neighbor[nu], QDP_forward, sub)
5752
Uf[mu,nu] = newShifter(U[mu], nu, 1)
58-
#inc(nu)
59-
#inc(mu)
6053
threads:
6154
for mu in 0..<nd:
6255
for nu in 0..<nd:
@@ -73,11 +66,7 @@ proc symanzik1loopAction*[T](coeffs: GaugeActionCoeffs;
7366
bstpl0 = newSeq2d[GF](nd,nd)
7467
bstpl = newSeq2d[Shft](nd,nd)
7568
tc = lo.Complex
76-
#var mu: cint = 1
77-
#while mu < nd:
7869
for mu in 1..<nd:
79-
#var nu: cint = 0
80-
#while nu < mu:
8170
for nu in 0..<mu:
8271
if pgm != 0.0:
8372
UUf[mu,nu] = links[0].newOneOf
@@ -242,68 +231,30 @@ proc symanzik1loopAction*[T](coeffs: GaugeActionCoeffs;
242231
nflops += 2 * EQMTM + 3 * PEQMTM + 4 * nc * nc
243232

244233
if pgm != 0.0:
245-
## FIXME: only works for nd=4
234+
# FIXME: only works for nd=4
246235
if nd != 4:
247236
qexError("symanzik1loopAction with parallelogram only works for nDim == 4")
248237
combinefb(pgms, 0, 1, 2)
249238
combineb(pgms, 0, 2, 1)
250239
combineb(pgms, 1, 2, 0)
251-
## rest
252-
## combinefb(pgmt,0,3,1);
253-
## combinefb(pgmt,0,3,2);
240+
# rest
241+
# combinefb(pgmt,0,3,1);
242+
# combinefb(pgmt,0,3,2);
254243
combinefb2(pgmt, 0, 3, 1, 2)
255244
combineb(pgmt, 0, 1, 3)
256245
combineb(pgmt, 0, 2, 3)
257246
combinefb(pgmt, 1, 2, 3)
258-
## combineb(pgmt,1,3,0);
259-
## combineb(pgmt,1,3,2);
247+
# combineb(pgmt,1,3,0);
248+
# combineb(pgmt,1,3,2);
260249
combineb2(pgmt, 1, 3, 0, 2)
261-
## combineb(pgmt,2,3,0);
262-
## combineb(pgmt,2,3,1);
250+
# combineb(pgmt,2,3,0);
251+
# combineb(pgmt,2,3,1);
263252
combineb2(pgmt, 2, 3, 0, 1)
264253

265254
threadSingle:
266255
acts = plaq * plaqs + rect * rects + pgm * pgms + adpl * adpls
267256
actt = plaq * plaqt + rect * rectt + pgm * pgmt + adpl * adplt
268257

269-
#[
270-
var mu: cint = 0
271-
while mu < nd:
272-
var nu: cint = 0
273-
while nu < nd:
274-
if nu == mu:
275-
inc(nu)
276-
inc(mu)
277-
continue
278-
QDP_destroy_M(Uf[mu,nu])
279-
inc(nu)
280-
inc(mu)
281-
if pgm:
282-
var mu: cint = 0
283-
while mu < nd:
284-
var nu: cint = 0
285-
while nu < nd:
286-
if nu == mu:
287-
inc(nu)
288-
inc(mu)
289-
continue
290-
QDP_destroy_M(UUf[mu,nu])
291-
QDP_destroy_M(fstpl[mu,nu])
292-
QDP_destroy_M(bstpl0[mu,nu])
293-
QDP_destroy_M(bstpl[mu,nu])
294-
inc(nu)
295-
inc(mu)
296-
else:
297-
QDP_destroy_M(UUf[1,0])
298-
QDP_destroy_M(UUf[0,1])
299-
if rect:
300-
QDP_destroy_M(fstpl[1,0])
301-
QDP_destroy_M(fstpl[0,1])
302-
QDP_destroy_M(bstpl0[1,0])
303-
QDP_destroy_M(bstpl0[0,1])
304-
QDP_destroy_M(bstpl[1,0])
305-
QDP_destroy_M(bstpl[0,1])
306-
]#
307258
let act0 = lo.physVol*(coeffs.plaq + 2*coeffs.rect + coeffs.adjplaq)
308259
let act1 = lo.physVol*(4*coeffs.pgm)
309260
result.space = 0.5*(nd-1)*(nd-2)*act0 + act1 - acts

0 commit comments

Comments
 (0)