Apply a formatter pass

orionpapadakis · orionpapadakis · commit 3b4bb62db437 · 2025-06-12T19:39:28.000+03:00
diff --git a/src/main/java/com/example/inference/InferenceCore.java b/src/main/java/com/example/inference/InferenceCore.java
@@ -195,5 +195,4 @@ public static FloatArray forwardTornadoVM(Model model, State state, int token, i
         return tornadoVMMasterPlan.tornadoVMForwardExecuteLayered(position);
     }
 
-
 }
diff --git a/src/main/java/com/example/inference/InferenceEngine.java b/src/main/java/com/example/inference/InferenceEngine.java
@@ -46,15 +46,8 @@ private InferenceEngine() {
      * @param onTokenGenerated callback, if non-null, it's called every time a token is inferred e.g. it's not called when ingesting prompt tokens
      * @return list of generated/inferred tokens, including the stop token, if any e.g. does not include any token from the prompt
      */
-    public static List<Integer> generateTokens(Model model,
-                                        State state,
-                                        int startPosition,
-                                        List<Integer> promptTokens,
-                                        Set<Integer> stopTokens,
-                                        int maxTokens,
-                                        Sampler sampler,
-                                        boolean echo,
-                                        IntConsumer onTokenGenerated) {
+    public static List<Integer> generateTokens(Model model, State state, int startPosition, List<Integer> promptTokens, Set<Integer> stopTokens, int maxTokens, Sampler sampler, boolean echo,
+            IntConsumer onTokenGenerated) {
         // Start timing the whole process
         long startNanos = System.nanoTime();
         long inferenceStartNanos = 0;
@@ -129,16 +122,8 @@ public static List<Integer> generateTokens(Model model,
         return generatedTokens;
     }
 
-    public static List<Integer> generateTokensGPU(Model model,
-                                           State state,
-                                           int startPosition,
-                                           List<Integer> promptTokens,
-                                           Set<Integer> stopTokens,
-                                           int maxTokens,
-                                           Sampler sampler,
-                                           boolean echo,
-                                           IntConsumer onTokenGenerated,
-                                           TornadoVMMasterPlan tornadoVMPlan) {
+    public static List<Integer> generateTokensGPU(Model model, State state, int startPosition, List<Integer> promptTokens, Set<Integer> stopTokens, int maxTokens, Sampler sampler, boolean echo,
+            IntConsumer onTokenGenerated, TornadoVMMasterPlan tornadoVMPlan) {
         // === Setup and Initialization ===
         long startNanos = System.nanoTime();
         long inferenceStartNanos = 0;
diff --git a/src/main/java/com/example/loader/weights/ModelLoader.java b/src/main/java/com/example/loader/weights/ModelLoader.java
@@ -105,7 +105,7 @@ public static Weights loadWeights(Map<String, GGMLTensorEntry> tensorEntries, Co
     }
 
     private static Weights createTornadoVMWeights(Map<String, GGMLTensorEntry> tensorEntries, Configuration config, Pair<float[], float[]> ropeFreqs, GGMLTensorEntry tokenEmbeddings,
-                                                  GGMLTensorEntry outputWeight) {
+            GGMLTensorEntry outputWeight) {
         return new Weights(
                 // Load directly to TornadoVM format
                 loadTensorAsFloatArray(tokenEmbeddings), loadArrayAsFloatArrayFromBuffer(config.numberOfLayers(), i -> tensorEntries.get("blk." + i + ".attn_norm.weight")),
@@ -124,7 +124,7 @@ private static Weights createTornadoVMWeights(Map<String, GGMLTensorEntry> tenso
      * Creates weights in standard format only
      */
     private static Weights createStandardWeights(Map<String, GGMLTensorEntry> tensorEntries, Configuration config, Pair<float[], float[]> ropeFreqs, GGMLTensorEntry tokenEmbeddings,
-                                                 GGMLTensorEntry outputWeight) {
+            GGMLTensorEntry outputWeight) {
         return new Weights(loadQuantized(tokenEmbeddings), loadArrayOfFloatBuffer(config.numberOfLayers(), i -> tensorEntries.get("blk." + i + ".attn_norm.weight")),
                 loadArrayOfQuantized(config.numberOfLayers(), i -> tensorEntries.get("blk." + i + ".attn_q.weight")),
                 loadArrayOfQuantized(config.numberOfLayers(), i -> tensorEntries.get("blk." + i + ".attn_k.weight")),
diff --git a/src/main/java/com/example/model/Model.java b/src/main/java/com/example/model/Model.java
@@ -21,12 +21,15 @@
 
 public interface Model {
     Configuration configuration();
+
     Tokenizer tokenizer();
+
     Weights weights();
 
     ModelType getModelType();
 
     State createNewState();
+
     State createNewState(int batchsize);
 
     /**
@@ -85,14 +88,12 @@ default void runInteractive(Sampler sampler, Options options) {
                 // Choose between GPU and CPU path based on configuration
                 if (USE_TORNADOVM) {
                     // GPU path using TornadoVM
-                    responseTokens = InferenceEngine.generateTokensGPU(this, state, startPosition,
-                            conversationTokens.subList(startPosition, conversationTokens.size()), stopTokens,
+                    responseTokens = InferenceEngine.generateTokensGPU(this, state, startPosition, conversationTokens.subList(startPosition, conversationTokens.size()), stopTokens,
                             options.maxTokens(), sampler, options.echo(), options.stream() ? tokenConsumer : null, tornadoVMPlan);
                 } else {
                     // CPU path
-                    responseTokens = InferenceEngine.generateTokens(this, state, startPosition,
-                            conversationTokens.subList(startPosition, conversationTokens.size()), stopTokens,
-                            options.maxTokens(), sampler, options.echo(), tokenConsumer);
+                    responseTokens = InferenceEngine.generateTokens(this, state, startPosition, conversationTokens.subList(startPosition, conversationTokens.size()), stopTokens, options.maxTokens(),
+                            sampler, options.echo(), tokenConsumer);
                 }
 
                 // Include stop token in the prompt history, but not in the response displayed to the user.
@@ -164,11 +165,10 @@ default void runInstructOnce(Sampler sampler, Options options) {
         if (USE_TORNADOVM) {
             tornadoVMPlan = TornadoVMMasterPlan.initializeTornadoVMPlan(state, this);
             // Call generateTokensGPU without the token consumer parameter
-            responseTokens = InferenceEngine.generateTokensGPU(this, state, 0, promptTokens, stopTokens,
-                    options.maxTokens(), sampler, options.echo(), options.stream() ? tokenConsumer : null, tornadoVMPlan);
+            responseTokens = InferenceEngine.generateTokensGPU(this, state, 0, promptTokens, stopTokens, options.maxTokens(), sampler, options.echo(), options.stream() ? tokenConsumer : null,
+                    tornadoVMPlan);
         } else {
-            responseTokens = InferenceEngine.generateTokens(this, state, 0, promptTokens, stopTokens,
-                    options.maxTokens(), sampler, options.echo(), tokenConsumer);
+            responseTokens = InferenceEngine.generateTokens(this, state, 0, promptTokens, stopTokens, options.maxTokens(), sampler, options.echo(), tokenConsumer);
         }
 
         if (!responseTokens.isEmpty() && stopTokens.contains(responseTokens.getLast())) {
diff --git a/src/main/java/com/example/model/format/ChatFormat.java b/src/main/java/com/example/model/format/ChatFormat.java
@@ -19,8 +19,11 @@ static ChatFormat create(Object tokenizer) {
     }
 
     List<Integer> encodeHeader(Message message);
+
     List<Integer> encodeMessage(Message message);
+
     int getBeginOfText();
+
     Set<Integer> getStopTokens();
 
     /**
diff --git a/src/main/java/com/example/model/format/LlamaChatFormat.java b/src/main/java/com/example/model/format/LlamaChatFormat.java
@@ -31,10 +31,14 @@ public LlamaChatFormat(LlamaTokenizer tokenizer) {
     }
 
     @Override
-    public int getBeginOfText() { return beginOfText; }
+    public int getBeginOfText() {
+        return beginOfText;
+    }
 
     @Override
-    public Set<Integer> getStopTokens() { return stopTokens; }
+    public Set<Integer> getStopTokens() {
+        return stopTokens;
+    }
 
     @Override
     public List<Integer> encodeHeader(Message message) {
diff --git a/src/main/java/com/example/model/format/MistralChatFormat.java b/src/main/java/com/example/model/format/MistralChatFormat.java
@@ -41,10 +41,14 @@ public MistralChatFormat(MistralTokenizer tokenizer) {
     }
 
     @Override
-    public int getBeginOfText() { return beginOfText; }
+    public int getBeginOfText() {
+        return beginOfText;
+    }
 
     @Override
-    public Set<Integer> getStopTokens() { return Set.of(endOfText); }
+    public Set<Integer> getStopTokens() {
+        return Set.of(endOfText);
+    }
 
     @Override
     public List<Integer> encodeHeader(Message message) {
diff --git a/src/main/java/com/example/model/llama/Llama.java b/src/main/java/com/example/model/llama/Llama.java
@@ -21,7 +21,9 @@ public record Llama(LlamaConfiguration configuration, Tokenizer tokenizer, Weigh
     private static final int BATCH_SIZE = Integer.getInteger("llama.BatchSize", 16);
 
     /* For explicit use */
-    private LlamaTokenizer getAsLlamaTokenizer() { return (LlamaTokenizer) tokenizer; }
+    private LlamaTokenizer getAsLlamaTokenizer() {
+        return (LlamaTokenizer) tokenizer;
+    }
 
     @Override
     public ModelType getModelType() {
@@ -42,6 +44,7 @@ public State createNewState(int batchsize) {
         return state;
     }
 
+    // @formatter:off
     public static Llama loadModel(FileChannel fileChannel, GGUF gguf, int contextLength, boolean loadWeights) {
         try (var ignored = Timer.log("Load LlaMa model")) {
             Map<String, Object> metadata = gguf.getMetadata();
@@ -75,6 +78,7 @@ public static Llama loadModel(FileChannel fileChannel, GGUF gguf, int contextLen
             throw new RuntimeException(e);
         }
     }
+    // @formatter:on
 
 }
 
diff --git a/src/main/java/com/example/model/llama/LlamaConfiguration.java b/src/main/java/com/example/model/llama/LlamaConfiguration.java
@@ -2,11 +2,8 @@
 
 import com.example.model.Configuration;
 
-public record LlamaConfiguration(
-        int dim, int hiddenDim, int numberOfLayers, int numberOfHeads,
-        int numberOfKeyValueHeads, int vocabularySize, int contextLength,
-        float rmsNormEps, float ropeTheta
-) implements Configuration {
+public record LlamaConfiguration(int dim, int hiddenDim, int numberOfLayers, int numberOfHeads, int numberOfKeyValueHeads, int vocabularySize, int contextLength, float rmsNormEps, float ropeTheta)
+        implements Configuration {
 
     public int headSize() {
         return dim / numberOfHeads;
@@ -17,7 +14,6 @@ public int kvDim() {
         return dim * numberOfKeyValueHeads / numberOfHeads;
     }
 
-
     /** Multiplier for key/value sharing in multi-query attention */
     public int kvMul() {
         return numberOfHeads / numberOfKeyValueHeads;
@@ -30,6 +26,7 @@ public int kvMul() {
      * @return A new Configuration instance with updated context length,
      *         or the current instance if newContextLength is negative
      */
+    // @formatter:off
     public LlamaConfiguration withContextLength(int newContextLength) {
         if (newContextLength < 0) {
             return this; // no change
@@ -46,5 +43,6 @@ public LlamaConfiguration withContextLength(int newContextLength) {
                 this.ropeTheta
         );
     }
+    // @formatter:on
 }
 
diff --git a/src/main/java/com/example/model/mistral/Mistral.java b/src/main/java/com/example/model/mistral/Mistral.java
@@ -20,7 +20,9 @@
 public record Mistral(MistralConfiguration configuration, Tokenizer tokenizer, Weights weights) implements Model {
 
     /* For explicit use */
-    private MistralTokenizer getAsMistralTokenizer() { return (MistralTokenizer) tokenizer; }
+    private MistralTokenizer getAsMistralTokenizer() {
+        return (MistralTokenizer) tokenizer;
+    }
 
     @Override
     public ModelType getModelType() {
@@ -39,6 +41,7 @@ public State createNewState(int batchsize) {
         return state;
     }
 
+    // @formatter:off
     public static Mistral loadModel(FileChannel fileChannel, GGUF gguf, int contextLength, boolean loadWeights) {
         try (var ignored = Timer.log("Load Mistral model")) {
             Map<String, Object> metadata = gguf.getMetadata();
@@ -78,5 +81,6 @@ public static Mistral loadModel(FileChannel fileChannel, GGUF gguf, int contextL
             throw new RuntimeException(e);
         }
     }
+    // @formatter:on
 
 }
diff --git a/src/main/java/com/example/tokenizer/impl/LlamaTokenizer.java b/src/main/java/com/example/tokenizer/impl/LlamaTokenizer.java
@@ -240,15 +240,11 @@ private static Map<Integer, Integer> bytesToUnicode() {
         }
 
         // return dict(zip(bs, cs))
-        return IntStream.range(0, bs.size())
-                .boxed()
-                .collect(Collectors.toMap(bs::get, cs::get));
+        return IntStream.range(0, bs.size()).boxed().collect(Collectors.toMap(bs::get, cs::get));
     }
 
     static final Map<Integer, Integer> BYTE_ENCODER = bytesToUnicode();
-    static final Map<Integer, Integer> BYTE_DECODER = BYTE_ENCODER.entrySet()
-            .stream()
-            .collect(Collectors.toMap(Map.Entry::getValue, Map.Entry::getKey));
+    static final Map<Integer, Integer> BYTE_DECODER = BYTE_ENCODER.entrySet().stream().collect(Collectors.toMap(Map.Entry::getValue, Map.Entry::getKey));
 
     public int[] encode(String text) {
         StringBuilder sb = new StringBuilder();
diff --git a/src/main/java/com/example/tokenizer/impl/MistralTokenizer.java b/src/main/java/com/example/tokenizer/impl/MistralTokenizer.java
@@ -61,6 +61,7 @@ public int getTokenType(int tokenIndex) {
         return tokenType[tokenIndex];
     }
 
+    // @formatter:off
     public MistralTokenizer(Map<String, Object> metadata, Vocabulary vocabulary) {
         // load from metadata
         int[] tokenTypes = (int[]) metadata.get("tokenizer.ggml.token_type");
@@ -79,6 +80,7 @@ public MistralTokenizer(Map<String, Object> metadata, Vocabulary vocabulary) {
         this.tokenType = tokenTypes;
         this.byte0 = vocabulary.getIndex("<0x00>").orElseThrow();
     }
+    // @formatter:on
 
     private List<Integer> encodeImpl(String text) {
 
@@ -104,7 +106,6 @@ private List<Integer> encodeImpl(String text) {
             }
         }
 
-
         // merge the best consecutive pair each iteration, according the scores in vocab_scores
         while (true) {
             float best_score = -1e10f;
diff --git a/src/main/java/com/example/tokenizer/impl/Tokenizer.java b/src/main/java/com/example/tokenizer/impl/Tokenizer.java
@@ -6,20 +6,26 @@
 import java.util.Set;
 
 public interface Tokenizer {
-    String                  regexPattern();
-    Map<String, Integer>    getSpecialTokens();
-    boolean                 isSpecialToken(int tokenIndex);
+    String regexPattern();
+
+    Map<String, Integer> getSpecialTokens();
+
+    boolean isSpecialToken(int tokenIndex);
+
     /**
      * Determines if a token should be displayed during streaming output.
      * This filters out special tokens, control characters, or other non-displayable content.
      *
      * @param token the token to check
      * @return true if the token should be displayed to the user, false otherwise
      */
-    boolean                 shouldDisplayToken(int token);
-    List<Integer>           encode(String text, Set<String> allowedSpecial);
-    List<Integer>           encodeAsList(String text);
-    String                  decode(List<Integer> tokens);
+    boolean shouldDisplayToken(int token);
+
+    List<Integer> encode(String text, Set<String> allowedSpecial);
+
+    List<Integer> encodeAsList(String text);
+
+    String decode(List<Integer> tokens);
 
     // Utility method for all tokenizers, implemented as static.
     static String replaceControlCharacters(int[] codePoints) {
diff --git a/src/main/java/com/example/tokenizer/vocabulary/Vocabulary.java b/src/main/java/com/example/tokenizer/vocabulary/Vocabulary.java
@@ -6,17 +6,18 @@
 import java.util.stream.Collectors;
 import java.util.stream.IntStream;
 
-
 public record Vocabulary(String[] tokens, float[] scores, Map<String, Integer> tokenToIndex) {
     private static final String TOKENIZER_LLAMA_3_MODEL = "gpt2";
 
+    // @formatter:off
     public Vocabulary(String[] vocabulary, float[] scores) {
         this(vocabulary, scores,
                 IntStream.range(0, vocabulary.length)
                         .boxed()
                         .collect(Collectors.toMap(i -> vocabulary[i], i -> i))
         );
     }
+    // @formatter:on
 
     public String get(int tokenIndex) {
         return tokens[tokenIndex];
@@ -57,9 +58,7 @@ public String toString() {
         sb.append("Tokens: ").append(Arrays.toString(tokens)).append("\n");
         sb.append("Scores: ").append(Arrays.toString(scores)).append("\n");
         sb.append("Token to Index Map:\n");
-        tokenToIndex.forEach((token, index) ->
-                sb.append("  ").append(token).append(" -> ").append(index).append("\n")
-        );
+        tokenToIndex.forEach((token, index) -> sb.append("  ").append(token).append(" -> ").append(index).append("\n"));
         return sb.toString();
     }
 }
diff --git a/src/main/java/com/example/tornadovm/TornadoVMLayerPlanner.java b/src/main/java/com/example/tornadovm/TornadoVMLayerPlanner.java
@@ -48,10 +48,10 @@ public class TornadoVMLayerPlanner {
         private static final int LOCAL_WORK_GROUP_SIZE_ALLOC = 32;
         private static final int THREAD_SCALE_FOR_LOGITS = 8;
 
-    private final State state;
-    private final Configuration config;
-    private final Weights weights;
-    private final KernelContext context;
+        private final State state;
+        private final Configuration config;
+        private final Weights weights;
+        private final KernelContext context;
 
         /**
          * Constructs a TornadoVMLayerPlanner for the given Llama model.
@@ -432,7 +432,7 @@ private GridScheduler setupGridSchedulersLayeredNonNvidia() {
             // Vocabulary worker configuration
             // OpenCL equivalent: clEnqueueNDRangeKernel(globalWorkSize=[config.vocabularySize,1,1], localWorkSize=[16,1,1])
             // CUDA equivalent: kernel<<<dim3((config.vocabularySize+15)/16,1,1), dim3(16,1,1)>>>
-            int vocabSizeRowMajor = config.vocabularySize() * LOCAL_WORK_GROUP_SIZE_ALLOC * THREAD_SCALE_FOR_LOGITS ;
+            int vocabSizeRowMajor = config.vocabularySize() * LOCAL_WORK_GROUP_SIZE_ALLOC * THREAD_SCALE_FOR_LOGITS;
             WorkerGrid vocabWorker = new WorkerGrid1D(vocabSizeRowMajor);
             vocabWorker.setLocalWork(LOCAL_WORK_GROUP_SIZE_ALLOC * THREAD_SCALE_FOR_LOGITS, 1, 1);
 

Original file line number	Diff line number	Diff line change
`@@ -195,5 +195,4 @@ public static FloatArray forwardTornadoVM(Model model, State state, int token, i`
`195`	`195`	`return tornadoVMMasterPlan.tornadoVMForwardExecuteLayered(position);`
`196`	`196`	`}`
`197`	`197`
`198`		`-`
`199`	`198`	`}`
Original file line number	Diff line number	Diff line change
`@@ -21,7 +21,9 @@ public record Llama(LlamaConfiguration configuration, Tokenizer tokenizer, Weigh`
`21`	`21`	`private static final int BATCH_SIZE = Integer.getInteger("llama.BatchSize", 16);`
`22`	`22`
`23`	`23`	`/* For explicit use */`
`24`		`- private LlamaTokenizer getAsLlamaTokenizer() { return (LlamaTokenizer) tokenizer; }`
	`24`	`+ private LlamaTokenizer getAsLlamaTokenizer() {`
	`25`	`+ return (LlamaTokenizer) tokenizer;`
	`26`	`+ }`
`25`	`27`
`26`	`28`	`@Override`
`27`	`29`	`public ModelType getModelType() {`
`@@ -42,6 +44,7 @@ public State createNewState(int batchsize) {`
`42`	`44`	`return state;`
`43`	`45`	`}`
`44`	`46`
	`47`	`+ // @formatter:off`
`45`	`48`	`public static Llama loadModel(FileChannel fileChannel, GGUF gguf, int contextLength, boolean loadWeights) {`
`46`	`49`	`try (var ignored = Timer.log("Load LlaMa model")) {`
`47`	`50`	`Map<String, Object> metadata = gguf.getMetadata();`
`@@ -75,6 +78,7 @@ public static Llama loadModel(FileChannel fileChannel, GGUF gguf, int contextLen`
`75`	`78`	`throw new RuntimeException(e);`
`76`	`79`	`}`
`77`	`80`	`}`
	`81`	`+ // @formatter:on`
`78`	`82`
`79`	`83`	`}`
`80`	`84`
Original file line number	Diff line number	Diff line change
`@@ -20,7 +20,9 @@`
`20`	`20`	`public record Mistral(MistralConfiguration configuration, Tokenizer tokenizer, Weights weights) implements Model {`
`21`	`21`
`22`	`22`	`/* For explicit use */`
`23`		`- private MistralTokenizer getAsMistralTokenizer() { return (MistralTokenizer) tokenizer; }`
	`23`	`+ private MistralTokenizer getAsMistralTokenizer() {`
	`24`	`+ return (MistralTokenizer) tokenizer;`
	`25`	`+ }`
`24`	`26`
`25`	`27`	`@Override`
`26`	`28`	`public ModelType getModelType() {`
`@@ -39,6 +41,7 @@ public State createNewState(int batchsize) {`
`39`	`41`	`return state;`
`40`	`42`	`}`
`41`	`43`
	`44`	`+ // @formatter:off`
`42`	`45`	`public static Mistral loadModel(FileChannel fileChannel, GGUF gguf, int contextLength, boolean loadWeights) {`
`43`	`46`	`try (var ignored = Timer.log("Load Mistral model")) {`
`44`	`47`	`Map<String, Object> metadata = gguf.getMetadata();`
`@@ -78,5 +81,6 @@ public static Mistral loadModel(FileChannel fileChannel, GGUF gguf, int contextL`
`78`	`81`	`throw new RuntimeException(e);`
`79`	`82`	`}`
`80`	`83`	`}`
	`84`	`+ // @formatter:on`
`81`	`85`
`82`	`86`	`}`