Skip to content

Commit 3f94f3a

Browse files
Merge branch 'devel/improved_timing' into 'master'
Improve existing timing infrastructure See merge request exastencils/exastencils!148
2 parents 1855805 + 673c452 commit 3f94f3a

30 files changed

+805
-198
lines changed

Compiler/src/exastencils/applications/ir/IR_HandleMainApplication.scala

Lines changed: 23 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ import exastencils.logger.Logger
3030
import exastencils.parallelization.api.mpi._
3131
import exastencils.parallelization.api.omp.OMP_Parallel
3232
import exastencils.timing.ir.IR_CollectUnresolvedBenchmarkFunctions
33+
import exastencils.util.ir.DLB_Monitor
3334

3435
/// IR_HandleMainApplication
3536

@@ -53,20 +54,32 @@ object IR_HandleMainApplication extends DefaultStrategy("HandleMainApplication")
5354
body
5455
}
5556

56-
if ("likwid" == Knowledge.benchmark_backend) {
57-
// register timers
58-
var registerMarkers = ListBuffer[IR_Statement]()
57+
if (Knowledge.benchmark_backend != "None") {
58+
val registerMarkers = ListBuffer[IR_Statement]()
5959
IR_CollectUnresolvedBenchmarkFunctions.applyStandalone(StateManager.root)
60-
IR_CollectUnresolvedBenchmarkFunctions.benchmarkNames foreach { name =>
61-
registerMarkers += IR_Native("LIKWID_MARKER_REGISTER(\"" + name + "\")")
62-
}
63-
func.body.prependAll(wrapAroundParallelRegion(registerMarkers))
6460

65-
func.body.prependAll(wrapAroundParallelRegion(ListBuffer[IR_Statement](IR_Native("LIKWID_MARKER_THREADINIT"))))
66-
func.body.prepend(IR_Native("LIKWID_MARKER_INIT"))
67-
func.body.append(IR_Native("LIKWID_MARKER_CLOSE"))
61+
Knowledge.benchmark_backend match {
62+
case "likwid" => {
63+
IR_CollectUnresolvedBenchmarkFunctions.benchmarkNames foreach { name =>
64+
registerMarkers += IR_Native("LIKWID_MARKER_REGISTER(\"" + name + "\")")
65+
}
66+
func.body.prependAll(wrapAroundParallelRegion(registerMarkers))
67+
68+
func.body.prependAll(wrapAroundParallelRegion(ListBuffer[IR_Statement](IR_Native("LIKWID_MARKER_THREADINIT"))))
69+
func.body.prepend(IR_Native("LIKWID_MARKER_INIT"))
70+
func.body.append(IR_Native("LIKWID_MARKER_CLOSE"))
71+
}
72+
case "talp" =>
73+
IR_CollectUnresolvedBenchmarkFunctions.benchmarkNames foreach { name =>
74+
registerMarkers += IR_Native(DLB_Monitor.getMonitorName(name) + " = DLB_MonitoringRegionRegister(\"" + DLB_Monitor.getMonitorName(name) + "\")")
75+
}
76+
func.body.prependAll(wrapAroundParallelRegion(registerMarkers))
77+
case _ =>
78+
Logger.error("Unknown benchmark")
79+
}
6880
}
6981

82+
7083
if (Knowledge.mpi_enabled) {
7184
func.body.prepend(MPI_Init)
7285
func.body.append(MPI_Finalize)

Compiler/src/exastencils/base/ir/IR_FutureFunction.scala

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,10 +50,10 @@ trait IR_FutureFunctionWithTiming extends IR_FutureFunction {
5050
var genFct = generateFct()
5151

5252
if (IR_AutomaticTimingCategory.categoryEnabled(automaticTimingCategory)) {
53-
val timer = IR_IV_AutomaticTimer(s"autoTime_${automaticTimingCategory.toString}_$name", automaticTimingCategory)
53+
val timer = IR_IV_AutomaticTimer(s"autoTime_${ automaticTimingCategory.toString }_$name", automaticTimingCategory)
5454

55-
genFct.body.prepend(IR_FunctionCall(IR_StartTimer().name, timer))
56-
genFct.body.append(IR_FunctionCall(IR_StopTimer().name, timer))
55+
genFct.body.prepend(IR_FunctionCall(IR_TimerFunctionReference(IR_StartTimer().name, IR_UnitDatatype, None), timer))
56+
genFct.body.append(IR_FunctionCall(IR_TimerFunctionReference(IR_StopTimer().name, IR_UnitDatatype, None), timer))
5757
}
5858

5959
genFct
@@ -84,4 +84,17 @@ trait IR_FutureLeveledFunction extends IR_FutureFunction {
8484
trait IR_FutureLeveledFunctionWithTiming extends IR_FutureFunctionWithTiming {
8585
def level : Int
8686
override def generateFct() : IR_LeveledFunction
87+
88+
override def expand() : Output[IR_Function] = {
89+
var genFct = generateFct()
90+
91+
if (IR_AutomaticTimingCategory.categoryEnabled(automaticTimingCategory)) {
92+
val timer = IR_IV_AutomaticLeveledTimer(s"autoTime_${ automaticTimingCategory.toString }_$name", automaticTimingCategory, level)
93+
94+
genFct.body.prepend(IR_FunctionCall(IR_TimerFunctionReference(IR_StartTimer().name, IR_UnitDatatype, Some(level)), timer))
95+
genFct.body.append(IR_FunctionCall(IR_TimerFunctionReference(IR_StopTimer().name, IR_UnitDatatype, Some(level)), timer))
96+
}
97+
98+
genFct
99+
}
87100
}
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
package exastencils.baseExt.ir
2+
3+
import exastencils.base.ir._
4+
import exastencils.prettyprinting.PpStream
5+
6+
trait IR_ContainerDatatypeSTL extends IR_HigherDimensionalDatatype {
7+
override def prettyprint_mpi : String = s"INVALID DATATYPE: " + this.prettyprint()
8+
}
9+
10+
case class IR_StdVectorDatatype(var datatype: IR_Datatype) extends IR_ContainerDatatypeSTL {
11+
12+
override def dimensionality : Int = 1 + this.datatype.dimensionality
13+
override def getSizeArray : Array[Int] = ???
14+
override def resolveDeclType : IR_Datatype = this
15+
override def resolveDeclPostscript : String = ""
16+
override def resolveFlattendSize : Int = ???
17+
override def typicalByteSize = ???
18+
19+
override def prettyprint(out : PpStream) : Unit = out << "std::vector< " << this.datatype << " >"
20+
}
21+
22+
case class IR_StdVectorDatatype_VS(var datatype: IR_Datatype, var numElements : IR_Expression) extends IR_ContainerDatatypeSTL {
23+
24+
override def dimensionality : Int = 1 + this.datatype.dimensionality
25+
override def getSizeArray : Array[Int] = ???
26+
override def resolveDeclType : IR_Datatype = this
27+
override def resolveDeclPostscript : String = s"(${numElements.prettyprint()})"
28+
override def resolveFlattendSize : Int = ???
29+
override def typicalByteSize = ???
30+
31+
override def prettyprint(out : PpStream) : Unit = out << "std::vector< " << this.datatype << " >"
32+
}
33+
34+
case class IR_StdArrayDatatype(var datatype: IR_Datatype, numElements : Int) extends IR_ContainerDatatypeSTL {
35+
36+
override def dimensionality : Int = 1 + this.datatype.dimensionality
37+
override def getSizeArray : Array[Int] = Array(numElements) ++ this.datatype.getSizeArray
38+
override def resolveDeclType : IR_Datatype = this
39+
override def resolveDeclPostscript : String = ""
40+
override def resolveFlattendSize : Int = numElements * this.datatype.resolveFlattendSize
41+
override def typicalByteSize = numElements * this.datatype.typicalByteSize
42+
43+
override def prettyprint(out : PpStream) : Unit = out << "std::array<" << this.datatype << ", " << numElements << ">"
44+
}

Compiler/src/exastencils/communication/ir/IR_LocalRecv.scala

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ import exastencils.datastructures.Transformation.Output
3030
import exastencils.domain.ir._
3131
import exastencils.field.ir._
3232
import exastencils.parallelization.api.omp.OMP_WaitForFlag
33+
import exastencils.timing.ir._
3334

3435
/// IR_LocalRecv
3536

@@ -85,6 +86,15 @@ case class IR_LocalRecv(
8586
// signal other threads that the data reading step is completed
8687
ifCondStmts += IR_Assignment(IR_IV_LocalCommDone(field, neighbor.index), IR_BooleanConstant(true)) // TODO here too
8788

89+
// add automatic timers for unpacking
90+
val timingCategory = IR_AutomaticTimingCategory.UNPACK
91+
if (IR_AutomaticTimingCategory.categoryEnabled(timingCategory)) {
92+
val timer = IR_IV_AutomaticTimer(s"autoTime_${ timingCategory.toString }", timingCategory)
93+
94+
ifCondStmts.prepend(IR_FunctionCall(IR_StartTimer().name, timer))
95+
ifCondStmts.append(IR_FunctionCall(IR_StopTimer().name, timer))
96+
}
97+
8898
IR_IfCondition(IR_IV_NeighborIsValid(field.domain.index, neighbor.index) AndAnd IR_Negation(IR_IV_NeighborIsRemote(field.domain.index, neighbor.index)),
8999
ifCondStmts)
90100

Compiler/src/exastencils/communication/ir/IR_LocalSend.scala

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ import exastencils.datastructures.Transformation.Output
2929
import exastencils.domain.ir._
3030
import exastencils.field.ir._
3131
import exastencils.parallelization.api.omp.OMP_WaitForFlag
32+
import exastencils.timing.ir._
3233

3334
/// IR_LocalSend
3435

@@ -56,13 +57,24 @@ case class IR_LocalSend(
5657
loop.polyOptLevel = 1
5758
loop.parallelization.potentiallyParallel = true
5859

60+
val ifCondStmts = ListBuffer[IR_Statement](
61+
// wait until the fragment to be written to is ready for communication
62+
IR_FunctionCall(OMP_WaitForFlag.generateFctAccess(), IR_AddressOf(IR_IV_LocalCommReady(
63+
field, DefaultNeighbors.getOpposingNeigh(neighbor.index).index, IR_IV_NeighborFragmentIdx(field.domain.index, neighbor.index)))),
64+
loop,
65+
// signal other threads that the data reading step is completed
66+
IR_Assignment(IR_IV_LocalCommDone(field, neighbor.index), IR_BooleanConstant(true)))
67+
68+
// add automatic timers for packing
69+
val timingCategory = IR_AutomaticTimingCategory.PACK
70+
if (IR_AutomaticTimingCategory.categoryEnabled(timingCategory)) {
71+
val timer = IR_IV_AutomaticTimer(s"autoTime_${ timingCategory.toString }", timingCategory)
72+
73+
ifCondStmts.prepend(IR_FunctionCall(IR_StartTimer().name, timer))
74+
ifCondStmts.append(IR_FunctionCall(IR_StopTimer().name, timer))
75+
}
76+
5977
IR_IfCondition(IR_IV_NeighborIsValid(field.domain.index, neighbor.index) AndAnd IR_Negation(IR_IV_NeighborIsRemote(field.domain.index, neighbor.index)),
60-
ListBuffer[IR_Statement](
61-
// wait until the fragment to be written to is ready for communication
62-
IR_FunctionCall(OMP_WaitForFlag.generateFctAccess(), IR_AddressOf(IR_IV_LocalCommReady(
63-
field, DefaultNeighbors.getOpposingNeigh(neighbor.index).index, IR_IV_NeighborFragmentIdx(field.domain.index, neighbor.index)))),
64-
loop,
65-
// signal other threads that the data reading step is completed
66-
IR_Assignment(IR_IV_LocalCommDone(field, neighbor.index), IR_BooleanConstant(true))))
78+
ifCondStmts)
6779
}
6880
}

Compiler/src/exastencils/communication/ir/IR_RemoteCommunicationFinish.scala

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ import exastencils.domain.ir._
3232
import exastencils.field.ir._
3333
import exastencils.optimization.ir.IR_SimplifyExpression
3434
import exastencils.parallelization.api.mpi.MPI_DataType
35+
import exastencils.timing.ir._
3536

3637
/// IR_RemoteCommunicationFinish
3738

@@ -75,7 +76,20 @@ case class IR_RemoteCommunicationFinish(
7576
}
7677

7778
def genWait(neighbor : NeighborInfo) : IR_Statement = {
78-
IR_WaitForRemoteTransfer(field, Duplicate(neighbor), s"Recv_${ concurrencyId }")
79+
val ret = IR_WaitForRemoteTransfer(field, Duplicate(neighbor), s"Recv_${ concurrencyId }")
80+
81+
// add automatic timers for waiting (recv)
82+
val timingCategory = IR_AutomaticTimingCategory.WAIT
83+
if (IR_AutomaticTimingCategory.categoryEnabled(timingCategory)) {
84+
val timer = IR_IV_AutomaticTimer(s"autoTime_${ timingCategory.toString }_recv", timingCategory)
85+
86+
IR_Scope(
87+
IR_FunctionCall(IR_StartTimer().name, timer),
88+
ret,
89+
IR_FunctionCall(IR_StopTimer().name, timer))
90+
} else {
91+
ret
92+
}
7993
}
8094

8195
override def expand() : Output[StatementList] = {

Compiler/src/exastencils/communication/ir/IR_RemoteCommunicationStart.scala

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ import exastencils.domain.ir._
3232
import exastencils.field.ir._
3333
import exastencils.optimization.ir.IR_SimplifyExpression
3434
import exastencils.parallelization.api.mpi.MPI_DataType
35+
import exastencils.timing.ir._
3536

3637
/// IR_RemoteCommunicationStart
3738

@@ -78,7 +79,20 @@ case class IR_RemoteCommunicationStart(
7879
}
7980

8081
def genWait(neighbor : NeighborInfo) : IR_Statement = {
81-
IR_WaitForRemoteTransfer(field, Duplicate(neighbor), s"Send_${ concurrencyId }")
82+
val ret = IR_WaitForRemoteTransfer(field, Duplicate(neighbor), s"Send_${ concurrencyId }")
83+
84+
// add automatic timers for waiting (send)
85+
val timingCategory = IR_AutomaticTimingCategory.WAIT
86+
if (IR_AutomaticTimingCategory.categoryEnabled(timingCategory)) {
87+
val timer = IR_IV_AutomaticTimer(s"autoTime_${ timingCategory.toString }_send", timingCategory)
88+
89+
IR_Scope(
90+
IR_FunctionCall(IR_StartTimer().name, timer),
91+
ret,
92+
IR_FunctionCall(IR_StopTimer().name, timer))
93+
} else {
94+
ret
95+
}
8296
}
8397

8498
override def expand() : Output[StatementList] = {

Compiler/src/exastencils/communication/ir/IR_RemoteRecv.scala

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ import exastencils.domain.ir._
3232
import exastencils.field.ir._
3333
import exastencils.parallelization.api.mpi._
3434
import exastencils.parallelization.ir.IR_PotentiallyCritical
35+
import exastencils.timing.ir._
3536

3637
/// IR_RemoteRecv
3738

@@ -118,6 +119,15 @@ case class IR_CopyFromRecvBuffer(
118119

119120
}
120121

122+
// add automatic timers for unpacking
123+
val timingCategory = IR_AutomaticTimingCategory.UNPACK
124+
if (IR_AutomaticTimingCategory.categoryEnabled(timingCategory)) {
125+
val timer = IR_IV_AutomaticTimer(s"autoTime_${ timingCategory.toString }", timingCategory)
126+
127+
ret.prepend(IR_FunctionCall(IR_StartTimer().name, timer))
128+
ret.append(IR_FunctionCall(IR_StopTimer().name, timer))
129+
}
130+
121131
ret
122132
}
123133
}

Compiler/src/exastencils/communication/ir/IR_RemoteSend.scala

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ import exastencils.domain.ir._
3232
import exastencils.field.ir._
3333
import exastencils.parallelization.api.mpi._
3434
import exastencils.parallelization.ir.IR_PotentiallyCritical
35+
import exastencils.timing.ir._
3536

3637
case class IR_RemoteSend(
3738
var field : IR_Field,
@@ -91,6 +92,15 @@ case class IR_CopyToSendBuffer(
9192
ret += loop
9293
}
9394

95+
// add automatic timers for packing
96+
val timingCategory = IR_AutomaticTimingCategory.PACK
97+
if (IR_AutomaticTimingCategory.categoryEnabled(timingCategory)) {
98+
val timer = IR_IV_AutomaticTimer(s"autoTime_${ timingCategory.toString }", timingCategory)
99+
100+
ret.prepend(IR_FunctionCall(IR_StartTimer().name, timer))
101+
ret.append(IR_FunctionCall(IR_StopTimer().name, timer))
102+
}
103+
94104
ret
95105
}
96106
}

Compiler/src/exastencils/config/Knowledge.scala

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -307,10 +307,13 @@ object Knowledge {
307307
var timer_automaticTiming : Boolean = false
308308
var timer_automaticBCsTiming : Boolean = false
309309
var timer_automaticCommTiming : Boolean = false
310+
var timer_automaticPackingTiming : Boolean = false
311+
var timer_automaticUnpackingTiming : Boolean = false
312+
var timer_automaticWaitTiming : Boolean = false
310313
var timer_automaticIOTiming : Boolean = false
311314

312315
// library/tool to use for benchmarking
313-
// may be one of the following: 'None', 'likwid'
316+
// may be one of the following: 'None', 'likwid', 'talp'
314317
var benchmark_backend = "None"
315318

316319
/// --- interfacing ---
@@ -951,6 +954,10 @@ object Knowledge {
951954
Constraints.condEnsureValue(timer_type, "UNIX_TIME", "Chrono" == timer_type && "IBMBG" == Platform.targetCompiler, "IBM BG does currently not support std::chrono")
952955
Constraints.condEnsureValue(timer_syncDevice, false, !cuda_enabled, "Disabling flag \"timer_syncDevice\". Requires \"cuda_enabled\" to be enabled.")
953956

957+
Constraints.condEnsureValue(timer_automaticCommTiming, true, timer_automaticPackingTiming, "timer_automaticCommTiming must be enabled for timer_automaticPackingTiming.")
958+
Constraints.condEnsureValue(timer_automaticCommTiming, true, timer_automaticUnpackingTiming, "timer_automaticCommTiming must be enabled for timer_automaticUnpackingTiming.")
959+
Constraints.condEnsureValue(timer_automaticCommTiming, true, timer_automaticWaitTiming, "timer_automaticCommTiming must be enabled for timer_automaticWaitTiming.")
960+
954961
Constraints.condEnsureValue(timer_automaticTiming, true, timer_automaticBCsTiming, "Timer flag 'timer_automaticTiming' required for 'timer_automaticBCsTiming = true'")
955962
Constraints.condEnsureValue(timer_automaticTiming, true, timer_automaticCommTiming, "Timer flag 'timer_automaticTiming' required for 'timer_automaticCommTiming = true'")
956963
Constraints.condEnsureValue(timer_automaticTiming, true, timer_automaticIOTiming, "Timer flag 'timer_automaticTiming' required for 'timer_automaticIOTiming = true'")

0 commit comments

Comments
 (0)