Skip to content

Commit 6c7486d

Browse files
[WIP] Start adding logic for tornado path in service
1 parent b5e9c2a commit 6c7486d

File tree

3 files changed

+14
-2
lines changed

3 files changed

+14
-2
lines changed

llama-tornado

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,12 @@ class LlamaRunner:
229229
if args.service:
230230
print("Starting GPULlama3.java REST API Service...")
231231
print(f"Model: {args.model_path}")
232+
# Display GPU/backend configuration
233+
if args.use_gpu:
234+
print(f"GPU Acceleration: Enabled ({args.backend.value.upper()} backend)")
235+
print(f"GPU Memory: {args.gpu_memory}")
236+
else:
237+
print("GPU Acceleration: Disabled (CPU mode)")
232238
print("API endpoints available at:")
233239
print(" - http://localhost:8080/chat")
234240
print(" - http://localhost:8080/chat/stream")

src/main/java/org/beehive/gpullama3/Options.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@ public static Options parseOptions(String[] args) {
132132
public static Options parseServiceOptions(String[] args) {
133133
Path modelPath = null;
134134
int maxTokens = 512; // Default context length
135+
Boolean useTornadovm = null;
135136

136137
for (int i = 0; i < args.length; i++) {
137138
String optionName = args[i];
@@ -152,11 +153,16 @@ public static Options parseServiceOptions(String[] args) {
152153
switch (optionName) {
153154
case "--model", "-m" -> modelPath = Paths.get(nextArg);
154155
case "--max-tokens", "-n" -> maxTokens = Integer.parseInt(nextArg);
156+
case "--use-tornadovm" -> useTornadovm = Boolean.parseBoolean(nextArg);
155157
}
156158
}
157159

158160
require(modelPath != null, "Missing argument: --model <path> is required");
159161

162+
if (useTornadovm == null) {
163+
useTornadovm = getDefaultTornadoVM();
164+
}
165+
160166
// Create service-mode Options object
161167
return new Options(
162168
modelPath,
@@ -170,7 +176,7 @@ public static Options parseServiceOptions(String[] args) {
170176
maxTokens,
171177
false, // stream - handled per request
172178
false, // echo - not used in service
173-
getDefaultTornadoVM(),
179+
useTornadovm,
174180
true
175181
);
176182
}

src/main/java/org/beehive/gpullama3/api/service/LLMService.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ public void init() {
4545
// Step 2: Load model weights
4646
System.out.println("\nStep 2: Loading model...");
4747
System.out.println("Loading model from: " + options.modelPath());
48-
model = ModelLoader.loadModel(options.modelPath(), options.maxTokens(), true);
48+
model = ModelLoader.loadModel(options.modelPath(), options.maxTokens(), true, options.useTornadovm());
4949
System.out.println("✓ Model loaded successfully");
5050
System.out.println(" Model type: " + model.getClass().getSimpleName());
5151
System.out.println(" Vocabulary size: " + model.configuration().vocabularySize());

0 commit comments

Comments
 (0)