@@ -79,14 +79,27 @@ class ProfilingPrototypeMixIn(ABC):
7979
8080 _printLoopSetup = NodeTemplate ("""
8181 StopTimer();
82+ printf("===== Profiling ${nodeName} =====\\ n");
8283 for (int ${profileIdxVar} = ((*${tileIdxPtr} > 0) ? ${numTiles}[(*${tileIdxPtr} - 1)] : 0);
8384 ${profileIdxVar} < ${numTiles}[*${tileIdxPtr}];
8485 ${profileIdxVar}++){
8586 """ )
8687
87- _printCycleDifference = NodeTemplate (r"""
88- printf("%s%u] %s%u%s", ${prefixStr}, ${profileIdxVar}, "${flavorStr}", \
89- ${measurementsEnd}[${profileIdxVar}] - ${measurementsStart}[${profileIdxVar}], ${suffixStr});
88+ _measurementDeclaration = NodeTemplate ("""
89+ uint32_t ${measurement} = ${measurementsEnd}[${profileIdxVar}] - ${measurementsStart}[${profileIdxVar}];
90+ """ )
91+
92+ _printCycleDifference = NodeTemplate ("""
93+ printf("%s%u] %s%6u%s", ${prefixStr}, ${profileIdxVar}, "${flavorStr}", \
94+ ${measurement}, ${suffixStr});
95+ """ )
96+
97+ _printCycleContribution = NodeTemplate ("""
98+ uint32_t total = ${measurementInput} + ${measurementKernel} + ${measurementOutput};
99+ uint32_t dma = ${measurementInput} + ${measurementOutput};
100+ float overhead_percentage = (total == 0) ? 0 : dma * 100.0f / total;
101+ float kernel_percentage = (total == 0) ? 0 : ${measurementKernel} * 100.0f / total;
102+ printf("%s%u] Total :%6u cycles (%2.1f%% Kernel + %2.1f%% Overhead, %u + %u)\\ n", ${prefixStr}, ${profileIdxVar}, total, kernel_percentage, overhead_percentage , ${measurementKernel}, dma);
90103 """ )
91104
92105 _printLoopTeardown = NodeTemplate ("""
@@ -151,13 +164,37 @@ def injectPrintCycleDiff(cls, executionBlock: ExecutionBlock, metaInfo: TilingMe
151164 "tileIdxPtr" : tileIdxPtr ,
152165 })
153166
167+ executionBlock .addRight (
168+ cls ._measurementDeclaration , {
169+ "measurement" : f"{ nodeName } _ingress_dma_wait_measurement" ,
170+ "measurementsStart" : f"{ nodeName } _ingress_dma_wait_start_measurements" ,
171+ "measurementsEnd" : f"{ nodeName } _ingress_dma_wait_end_measurements" ,
172+ "profileIdxVar" : profileIdxVar ,
173+ })
174+
175+ if metaInfo .kernelLevelTiling :
176+ executionBlock .addRight (
177+ cls ._measurementDeclaration , {
178+ "measurement" : f"{ nodeName } _kernel_measurement" ,
179+ "measurementsStart" : f"{ nodeName } _kernel_start_measurements" ,
180+ "measurementsEnd" : f"{ nodeName } _kernel_end_measurements" ,
181+ "profileIdxVar" : profileIdxVar ,
182+ })
183+
184+ executionBlock .addRight (
185+ cls ._measurementDeclaration , {
186+ "measurement" : f"{ nodeName } _egress_dma_wait_measurement" ,
187+ "measurementsStart" : f"{ nodeName } _egress_dma_wait_start_measurements" ,
188+ "measurementsEnd" : f"{ nodeName } _egress_dma_wait_end_measurements" ,
189+ "profileIdxVar" : profileIdxVar ,
190+ })
191+
154192 executionBlock .addRight (
155193 cls ._printCycleDifference , {
156194 "prefixStr" : f"{ nodeName } _prefix" ,
157195 "suffixStr" : f"{ nodeName } _suffix" ,
158- "flavorStr" : "Input DMA took " ,
159- "measurementsStart" : f"{ nodeName } _ingress_dma_wait_start_measurements" ,
160- "measurementsEnd" : f"{ nodeName } _ingress_dma_wait_end_measurements" ,
196+ "flavorStr" : "Pre-Kernel :" ,
197+ "measurement" : f"{ nodeName } _ingress_dma_wait_measurement" ,
161198 "profileIdxVar" : profileIdxVar ,
162199 })
163200
@@ -166,22 +203,32 @@ def injectPrintCycleDiff(cls, executionBlock: ExecutionBlock, metaInfo: TilingMe
166203 cls ._printCycleDifference , {
167204 "prefixStr" : f"{ nodeName } _prefix" ,
168205 "suffixStr" : f"{ nodeName } _suffix" ,
169- "flavorStr" : "Kernel took " ,
170- "measurementsStart" : f"{ nodeName } _kernel_start_measurements" ,
171- "measurementsEnd" : f"{ nodeName } _kernel_end_measurements" ,
206+ "flavorStr" : "Kernel :" ,
207+ "measurement" : f"{ nodeName } _kernel_measurement" ,
172208 "profileIdxVar" : profileIdxVar ,
173209 })
174210
175211 executionBlock .addRight (
176212 cls ._printCycleDifference , {
177213 "prefixStr" : f"{ nodeName } _prefix" ,
178214 "suffixStr" : f"{ nodeName } _suffix" ,
179- "flavorStr" : "Output DMA took " ,
180- "measurementsStart" : f"{ nodeName } _egress_dma_wait_start_measurements" ,
181- "measurementsEnd" : f"{ nodeName } _egress_dma_wait_end_measurements" ,
215+ "flavorStr" : "Post-Kernel:" ,
216+ "measurement" : f"{ nodeName } _egress_dma_wait_measurement" ,
182217 "profileIdxVar" : profileIdxVar ,
183218 })
184219
220+ # Total Time: Input + Kernel + Output
221+ # Overhead: (Input + Output) / Total
222+ if metaInfo .kernelLevelTiling :
223+ executionBlock .addRight (
224+ cls ._printCycleContribution , {
225+ "prefixStr" : f"{ nodeName } _prefix" ,
226+ "measurementInput" : f"{ nodeName } _ingress_dma_wait_measurement" ,
227+ "measurementKernel" : f"{ nodeName } _kernel_measurement" ,
228+ "measurementOutput" : f"{ nodeName } _egress_dma_wait_measurement" ,
229+ "profileIdxVar" : profileIdxVar ,
230+ })
231+
185232 executionBlock .addRight (cls ._printLoopTeardown , {})
186233
187234 return executionBlock
0 commit comments