Decouple LastRunMetrics class from Llama and reuse it for Mistral

orionpapadakis · orionpapadakis · commit 726eec26a093 · 2025-06-11T18:38:14.000+03:00
diff --git a/src/main/java/com/example/auxiliary/LastRunMetrics.java b/src/main/java/com/example/auxiliary/LastRunMetrics.java
@@ -0,0 +1,33 @@
+package com.example.auxiliary;
+
+/**
+ * Record to store metrics from the last model run.
+ * @param totalTokens The total number of tokens processed
+ * @param totalSeconds The total time in seconds
+ */
+public record LastRunMetrics(int totalTokens, double totalSeconds) {
+    /**
+     * Singleton instance to store the latest metrics
+     */
+    private static LastRunMetrics latestMetrics;
+
+    /**
+     * Sets the metrics for the latest run
+     *
+     * @param tokens The total number of tokens processed
+     * @param seconds The total time in seconds
+     */
+    public static void setMetrics(int tokens, double seconds) {
+        latestMetrics = new LastRunMetrics(tokens, seconds);
+    }
+
+    /**
+     * Prints the metrics from the latest run to stderr
+     */
+    public static void printMetrics() {
+        if (latestMetrics != null) {
+            double tokensPerSecond = latestMetrics.totalTokens() / latestMetrics.totalSeconds();
+            System.err.printf("\n\nachieved tok/s: %.2f. Tokens: %d, seconds: %.2f\n", tokensPerSecond, latestMetrics.totalTokens(), latestMetrics.totalSeconds());
+        }
+    }
+}
diff --git a/src/main/java/com/example/model/llama/Llama.java b/src/main/java/com/example/model/llama/Llama.java
@@ -1,7 +1,8 @@
 package com.example.model.llama;
 
-import com.example.auxiliary.Parallel;
+import com.example.auxiliary.LastRunMetrics;
 import com.example.auxiliary.format.LlamaChatFormat;
+import com.example.auxiliary.Parallel;
 import com.example.core.model.tensor.FloatTensor;
 import com.example.model.Configuration;
 import com.example.inference.sampler.Sampler;
@@ -480,7 +481,7 @@ public void runInteractive(Sampler sampler, Options options) {
 
                 // Optionally print performance metrics after each response
                 if (SHOW_PERF_INTERACTIVE) {
-                    Llama.LastRunMetrics.printMetrics();
+                    LastRunMetrics.printMetrics();
                 }
             }
         } finally {
@@ -538,44 +539,12 @@ public void runInstructOnce(Sampler sampler, Options options) {
             System.out.println(responseText);
         }
 
-        Llama.LastRunMetrics.printMetrics();
+        LastRunMetrics.printMetrics();
 
         if (tornadoVMPlan != null) {
             tornadoVMPlan.freeTornadoExecutionPlan();
         }
     }
 
-    /**
-     * Record to store metrics from the last model run.
-     * @param totalTokens The total number of tokens processed
-     * @param totalSeconds The total time in seconds
-     */
-    public record LastRunMetrics(int totalTokens, double totalSeconds) {
-        /**
-         * Singleton instance to store the latest metrics
-         */
-        private static LastRunMetrics latestMetrics;
-
-        /**
-         * Sets the metrics for the latest run
-         *
-         * @param tokens The total number of tokens processed
-         * @param seconds The total time in seconds
-         */
-        public static void setMetrics(int tokens, double seconds) {
-            latestMetrics = new LastRunMetrics(tokens, seconds);
-        }
-
-        /**
-         * Prints the metrics from the latest run to stderr
-         */
-        public static void printMetrics() {
-            if (latestMetrics != null) {
-                double tokensPerSecond = latestMetrics.totalTokens() / latestMetrics.totalSeconds();
-                System.err.printf("\n\nachieved tok/s: %.2f. Tokens: %d, seconds: %.2f\n", tokensPerSecond, latestMetrics.totalTokens(), latestMetrics.totalSeconds());
-            }
-        }
-    }
-
 }
 
diff --git a/src/main/java/com/example/model/mistral/Mistral.java b/src/main/java/com/example/model/mistral/Mistral.java
@@ -1,6 +1,7 @@
 package com.example.model.mistral;
 
 import com.example.auxiliary.Parallel;
+import com.example.auxiliary.LastRunMetrics;
 import com.example.auxiliary.format.MistralChatFormat;
 import com.example.core.model.tensor.FloatTensor;
 import com.example.model.Configuration;
@@ -323,5 +324,7 @@ public void runInstructOnce(Sampler sampler, Options options) {
             String responseText = tokenizer.decode(responseTokens);
             System.out.println(responseText);
         }
+
+        LastRunMetrics.printMetrics();
     }
 }

Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,7 @@`
`1`	`1`	`package com.example.model.mistral;`
`2`	`2`
`3`	`3`	`import com.example.auxiliary.Parallel;`
	`4`	`+import com.example.auxiliary.LastRunMetrics;`
`4`	`5`	`import com.example.auxiliary.format.MistralChatFormat;`
`5`	`6`	`import com.example.core.model.tensor.FloatTensor;`
`6`	`7`	`import com.example.model.Configuration;`
`@@ -323,5 +324,7 @@ public void runInstructOnce(Sampler sampler, Options options) {`
`323`	`324`	`String responseText = tokenizer.decode(responseTokens);`
`324`	`325`	`System.out.println(responseText);`
`325`	`326`	`}`
	`327`	`+`
	`328`	`+ LastRunMetrics.printMetrics();`
`326`	`329`	`}`
`327`	`330`	`}`