Extend logic for Qwen2

orionpapadakis · orionpapadakis · commit 35b993ee4eeb · 2025-08-06T16:37:20.000+03:00
diff --git a/src/main/java/org/beehive/gpullama3/model/Model.java b/src/main/java/org/beehive/gpullama3/model/Model.java
@@ -164,7 +164,7 @@ default void runInstructOnce(Sampler sampler, Options options) {
 
         List<Integer> promptTokens = new ArrayList<>();
 
-        if (!getModelType().equals(ModelType.QWEN_3) && !getModelType().equals(ModelType.PHI_3)) {
+        if (!getModelType().equals(ModelType.QWEN_3) && !getModelType().equals(ModelType.QWEN_2) && !getModelType().equals(ModelType.PHI_3)) {
             promptTokens.add(chatFormat.getBeginOfText());
         }
 
diff --git a/src/main/java/org/beehive/gpullama3/model/ModelType.java b/src/main/java/org/beehive/gpullama3/model/ModelType.java
@@ -4,6 +4,7 @@
 import org.beehive.gpullama3.model.loader.LlamaModelLoader;
 import org.beehive.gpullama3.model.loader.MistralModelLoader;
 import org.beehive.gpullama3.model.loader.Phi3ModelLoader;
+import org.beehive.gpullama3.model.loader.Qwen2ModelLoader;
 import org.beehive.gpullama3.model.loader.Qwen3ModelLoader;
 
 import java.nio.channels.FileChannel;
@@ -35,6 +36,13 @@ public Model loadModel(FileChannel fileChannel, GGUF gguf, int contextLength, bo
         }
     },
 
+    QWEN_2 {
+        @Override
+        public Model loadModel(FileChannel fileChannel, GGUF gguf, int contextLength, boolean loadWeights) {
+            return new Qwen2ModelLoader(fileChannel, gguf, contextLength, loadWeights).loadModel();
+        }
+    },
+
     QWEN_3 {
         @Override
         public Model loadModel(FileChannel fileChannel, GGUF gguf, int contextLength, boolean loadWeights) {
diff --git a/src/main/java/org/beehive/gpullama3/model/loader/ModelLoader.java b/src/main/java/org/beehive/gpullama3/model/loader/ModelLoader.java
@@ -60,6 +60,8 @@ private static ModelType detectModelType(Map<String, Object> metadata) {
                 return ModelType.MISTRAL;
             } else if (lowerName.contains("llama")) {
                 return ModelType.LLAMA_3;
+            } else if (lowerName.contains("qwen2") || lowerName.contains("deepseek r1 distill")) {
+                return ModelType.QWEN_2;
             } else if (lowerName.contains("qwen3")) {
                 return ModelType.QWEN_3;
             } else if (lowerName.contains("phi3")) {
diff --git a/src/main/java/org/beehive/gpullama3/tornadovm/TornadoVMMasterPlan.java b/src/main/java/org/beehive/gpullama3/tornadovm/TornadoVMMasterPlan.java
@@ -93,7 +93,21 @@ public static TornadoVMMasterPlan initializeTornadoVMPlan(State state, Model mod
     }
 
     /**
-     * Determines whether the NVIDIA-specific scheduler should be used based on the current hardware backend and the model type.
+     * Dispatcher method to select the TornadoVMLayerPlanner for the model.
+     */
+    TornadoVMLayerPlanner createPlanner(State state, Model model) {
+        return switch (model.getModelType()) {
+            case LLAMA_3, MISTRAL -> new TornadoVMLayerPlanner(state, model);
+            case PHI_3 -> new Phi3TornadoVMLayerPlanner((Phi3State) state, model);
+            case QWEN_2 -> throw new UnsupportedOperationException("TornadoVM QWEN 2 not supported");
+            case QWEN_3 -> new Qwen3TornadoVMLayerPlanner((Qwen3State) state, model);
+            case UNKNOWN -> throw new UnsupportedOperationException("Unknown model type");
+        };
+    }
+
+    /**
+     * Determines whether the NVIDIA-specific scheduler should be used based on the current
+     * hardware backend and the model type.
      * <p>
      * The scheduler is used only if the runtime is targeting an NVIDIA backend and the model is not of type {@code MISTRAL}. If either the hardware is not NVIDIA or the model is {@code MISTRAL}, the
      * NVIDIA-specific scheduler should not be used.
@@ -115,19 +129,8 @@ public static boolean shouldUseNvidiaScheduler(Model model) {
     }
 
     /**
-     * Dispatcher method to select the TornadoVMLayerPlanner for the model.
-     */
-    TornadoVMLayerPlanner createPlanner(State state, Model model) {
-        return switch (model.getModelType()) {
-            case LLAMA_3, MISTRAL -> new TornadoVMLayerPlanner(state, model);
-            case QWEN_3 -> new Qwen3TornadoVMLayerPlanner((Qwen3State) state, model);
-            case PHI_3 -> new Phi3TornadoVMLayerPlanner((Phi3State) state, model);
-            case UNKNOWN -> throw new UnsupportedOperationException("Unknown model type");
-        };
-    }
-
-    /**
-     * Executes the forward pass of a LLaMA transformer model using TornadoVM acceleration. This method processes the transformer layers in sequence for a particular token position in the context
+     * Executes the forward pass of a LLaMA transformer model using TornadoVM acceleration.
+     *This method processes the transformer layers in sequence for a particular token position in the context
      * window.
      *
      * <p>The execution happens in three phases:

Original file line number	Diff line number	Diff line change
`@@ -164,7 +164,7 @@ default void runInstructOnce(Sampler sampler, Options options) {`
`164`	`164`
`165`	`165`	`List<Integer> promptTokens = new ArrayList<>();`
`166`	`166`
`167`		`- if (!getModelType().equals(ModelType.QWEN_3) && !getModelType().equals(ModelType.PHI_3)) {`
	`167`	`+ if (!getModelType().equals(ModelType.QWEN_3) && !getModelType().equals(ModelType.QWEN_2) && !getModelType().equals(ModelType.PHI_3)) {`
`168`	`168`	`promptTokens.add(chatFormat.getBeginOfText());`
`169`	`169`	`}`
`170`	`170`