|
1 | 1 | package com.example.model.llama;
|
2 | 2 |
|
3 |
| -import com.example.auxiliary.Parallel; |
| 3 | +import com.example.auxiliary.LastRunMetrics; |
4 | 4 | import com.example.auxiliary.format.LlamaChatFormat;
|
| 5 | +import com.example.auxiliary.Parallel; |
5 | 6 | import com.example.core.model.tensor.FloatTensor;
|
6 | 7 | import com.example.model.Configuration;
|
7 | 8 | import com.example.inference.sampler.Sampler;
|
@@ -480,7 +481,7 @@ public void runInteractive(Sampler sampler, Options options) {
|
480 | 481 |
|
481 | 482 | // Optionally print performance metrics after each response
|
482 | 483 | if (SHOW_PERF_INTERACTIVE) {
|
483 |
| - Llama.LastRunMetrics.printMetrics(); |
| 484 | + LastRunMetrics.printMetrics(); |
484 | 485 | }
|
485 | 486 | }
|
486 | 487 | } finally {
|
@@ -538,44 +539,12 @@ public void runInstructOnce(Sampler sampler, Options options) {
|
538 | 539 | System.out.println(responseText);
|
539 | 540 | }
|
540 | 541 |
|
541 |
| - Llama.LastRunMetrics.printMetrics(); |
| 542 | + LastRunMetrics.printMetrics(); |
542 | 543 |
|
543 | 544 | if (tornadoVMPlan != null) {
|
544 | 545 | tornadoVMPlan.freeTornadoExecutionPlan();
|
545 | 546 | }
|
546 | 547 | }
|
547 | 548 |
|
548 |
| - /** |
549 |
| - * Record to store metrics from the last model run. |
550 |
| - * @param totalTokens The total number of tokens processed |
551 |
| - * @param totalSeconds The total time in seconds |
552 |
| - */ |
553 |
| - public record LastRunMetrics(int totalTokens, double totalSeconds) { |
554 |
| - /** |
555 |
| - * Singleton instance to store the latest metrics |
556 |
| - */ |
557 |
| - private static LastRunMetrics latestMetrics; |
558 |
| - |
559 |
| - /** |
560 |
| - * Sets the metrics for the latest run |
561 |
| - * |
562 |
| - * @param tokens The total number of tokens processed |
563 |
| - * @param seconds The total time in seconds |
564 |
| - */ |
565 |
| - public static void setMetrics(int tokens, double seconds) { |
566 |
| - latestMetrics = new LastRunMetrics(tokens, seconds); |
567 |
| - } |
568 |
| - |
569 |
| - /** |
570 |
| - * Prints the metrics from the latest run to stderr |
571 |
| - */ |
572 |
| - public static void printMetrics() { |
573 |
| - if (latestMetrics != null) { |
574 |
| - double tokensPerSecond = latestMetrics.totalTokens() / latestMetrics.totalSeconds(); |
575 |
| - System.err.printf("\n\nachieved tok/s: %.2f. Tokens: %d, seconds: %.2f\n", tokensPerSecond, latestMetrics.totalTokens(), latestMetrics.totalSeconds()); |
576 |
| - } |
577 |
| - } |
578 |
| - } |
579 |
| - |
580 | 549 | }
|
581 | 550 |
|
0 commit comments