Skip to content

Commit 2601cb5

Browse files
committed
improve benchGauge
1 parent 602b596 commit 2601cb5

File tree

4 files changed

+38
-12
lines changed

4 files changed

+38
-12
lines changed

src/base/flopcount.nim

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# flops counts
2+
# all names start with 'f' for flops
3+
4+
# Complex number (fc...)
5+
template fcadd*:int = 2
6+
template fcmul*:int = 6
7+
template fcredot*:int = 3
8+
9+
# Complex matrix (fcm...)
10+
template fcmadd*(n:int):int = n*n*fcadd
11+
template fcmmul*(n:int):int = n*n*(n*fcmul + (n-1)*fcadd)
12+
template fcmredot*(n:int):int = n*n*fcredot + (n*n-1)
13+
14+
# single plaquette: redot(A*B,C*D)
15+
template fplaq*(n:int):int = 2*fcmmul(n) + fcmredot(n)

src/base/profile.nim

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -857,7 +857,8 @@ proc echoHotspots* =
857857
else:
858858
tsns += nk.ns
859859
let tsnspct = 100.0 * tsns / nstot
860-
echo &"{pct:6.3f} {tsnspct:7.3f} {count} {mf} {nc} S {lc} {nm}"
860+
#echo &"{pct:6.3f} {tsnspct:7.3f} {count} {mf} {nc} S {lc} {nm}"
861+
echo &"{pct:6.3f} {tsnspct:7.3f} {count} {nc} S {lc} {nm}"
861862

862863
proc echoProf*(def = 0) =
863864
case intParam("prof",def)

src/bench/benchGauge.nim

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import parseUtils
66
import macros
77
import gauge/hypsmear
88
import commonBench
9+
import base/flopcount
910

1011
proc checkMem =
1112
#echo("mem: (used+free)/total: (", getOccupiedMem(), "+", getFreeMem(), ")/",
@@ -37,6 +38,9 @@ template bench(fps,bps:SomeNumber; eqn:untyped) =
3738
var t1 = epochTime()
3839
let dt = t1 - t0
3940
]#
41+
block:
42+
eqn
43+
resetTimers()
4044
let br = benchSingle:
4145
eqn
4246
let (nrep,dt) = (br.reps,br.secs)
@@ -45,6 +49,7 @@ template bench(fps,bps:SomeNumber; eqn:untyped) =
4549
let mb = (nrep.float*bytes)/(1e6*dt)
4650
echo "(", exp2string(eqn), ") secs: ", dt|(5,3), " sec/n: ", dtn|(5,3),
4751
" mf: ", mf.int, " mb: ", mb.int
52+
echoProf()
4853

4954
proc test(lat: auto) =
5055
#var scale = 1
@@ -86,12 +91,16 @@ proc test(lat: auto) =
8691

8792
resetTimers()
8893

89-
bench(np*(2*8*nc*nc*nc-1), nd*2*nc*nc*sizeof(numberType(g[0][0]))):
90-
var pl = plaq(g)
91-
bench(np*(2*8*nc*nc*nc-1), nd*2*nc*nc*sizeof(numberType(g[0][0]))):
92-
var pl2 = plaq2(g)
93-
bench(np*(2*8*nc*nc*nc-1), nd*2*nc*nc*sizeof(numberType(g[0][0]))):
94-
var pl3 = plaq3(g)
94+
block:
95+
let flop = np*(fplaq(nc)+1)
96+
#let mem = nd*2*nc*nc*sizeof(numberType(g[0][0]))
97+
let mem = nd*sizeof(g[0][0]) div g[0][0][0,0].re.numNumbers
98+
bench(flop, mem):
99+
var pl = plaq(g)
100+
bench(flop, mem):
101+
var pl2 = plaq2(g)
102+
bench(flop, mem):
103+
var pl3 = plaq3(g)
95104

96105
bench(np*(2*8*nc*nc*nc-1), nd*2*nc*nc*sizeof(numberType(g[0][0]))):
97106
var ga = gaugeAction1(g)

src/gauge/gaugeUtils.nim

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -287,7 +287,7 @@ proc plaq*[T](uu: openArray[T]): auto =
287287
rankSum(pl)
288288
toc("sum")
289289
result = pl
290-
toc("end", flops=lo.nSites.float*float(np*(2*8*nc*nc*nc-1)))
290+
toc("end", flops=lo.nSites.float*float(np*(fplaq(nc)+1)))
291291

292292
template plaq*(g: Gauge): auto = plaq(g.u)
293293

@@ -329,6 +329,7 @@ proc plaq2*[T](gg:openArray[T]):auto =
329329
let lo = g[0].l
330330
let nd = lo.nDim
331331
let nc = g[0][0].ncols
332+
#let np = (nd*(nd-1)) div 2
332333
var m = lo.ColorMatrix()
333334
var s0 = lo.ColorMatrix()
334335
#var t0 = lo.ColorMatrix()
@@ -353,7 +354,7 @@ proc plaq2*[T](gg:openArray[T]):auto =
353354
m += (g[mu]*s0) * (g[nu]*s1).adj
354355
#m += (g[mu]*s0) * (g[nu]*s1)
355356
#echo mu, " ", nu, " ", trace(m)/nc
356-
toc("mul")
357+
toc("mul", flops=lo.nSites.float*float(3*fcmmul(nc)+fcmadd(nc)))
357358
toc("work")
358359
tr = trace(m)
359360
toc("trace")
@@ -379,13 +380,13 @@ proc plaq3*[T](g: seq[T]): auto =
379380
tic("plaq3 loop")
380381
#m += (t[mu]^*g[nu]) * (t[nu]^*g[mu]).adj
381382
discard t[mu]^*!g[nu]
382-
toc("transport1")
383+
toc("transport1", flops=lo.nSites.float*float(fcmmul(nc)))
383384
discard t[nu]^*!g[mu]
384385
threadBarrier()
385-
toc("transport2")
386+
toc("transport2", flops=lo.nSites.float*float(fcmmul(nc)))
386387
m += t[mu].field * t[nu].field.adj
387388
#echo mu, " ", nu, " ", trace(m)/nc
388-
toc("mul")
389+
toc("mul", flops=lo.nSites.float*float(fcmmul(nc)+fcmadd(nc)))
389390
toc("work")
390391
tr = trace(m)
391392
toc("trace")

0 commit comments

Comments
 (0)