You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
// Configuration flags for hardware acceleration and optimizations
19
20
publicstaticfinalbooleanUSE_VECTOR_API = Boolean.parseBoolean(System.getProperty("llama.VectorAPI", "true")); // Enable Java Vector API for CPU acceleration
20
21
publicstaticfinalbooleanUSE_AOT = Boolean.parseBoolean(System.getProperty("llama.AOT", "false")); // Use Ahead-of-Time compilation
21
-
publicstaticfinalbooleanUSE_TORNADOVM = Boolean.parseBoolean(System.getProperty("use.tornadovm", "false")); // Use TornadoVM for GPU acceleration
22
22
publicstaticfinalbooleanSHOW_PERF_INTERACTIVE = Boolean.parseBoolean(System.getProperty("llama.ShowPerfInteractive", "true")); // Show performance metrics in interactive mode
23
23
24
24
/**
@@ -36,27 +36,29 @@ public class LlamaApp {
36
36
* <p>The method handles both {@link FloatTensor} and {@link FloatArray} logits types
37
37
* to support both CPU and GPU execution paths.</p>
38
38
*
39
-
* @param vocabularySize The size of the model's vocabulary
40
-
* @param temperature A value controlling randomness in sampling:
41
-
* <ul>
42
-
* <li>0.0f: No randomness (greedy sampling)</li>
43
-
* <li>1.0f: Standard sampling from unmodified distribution</li>
44
-
* <li><1.0f: More deterministic (sharper distribution)</li>
45
-
* <li>>1.0f: More random (flatter distribution)</li>
46
-
* </ul>
47
-
* @param topp The cumulative probability threshold for nucleus sampling (0.0-1.0).
48
-
* <ul>
49
-
* <li>Values ≤0 or ≥1: Disables top-p sampling</li>
50
-
* <li>Values in (0,1): Restricts sampling to tokens comprising the top p probability mass</li>
51
-
* </ul>
52
-
* @param rngSeed Seed value for the random number generator to ensure reproducibility
53
-
*
54
-
* @return A configured {@link Sampler} that implements the selected sampling strategy
55
-
* and handles both tensor and array-based logits
56
-
*
57
-
* @throws IllegalArgumentException if logits are of an unsupported type
39
+
* @param vocabularySize
40
+
* The size of the model's vocabulary
41
+
* @param temperature
42
+
* A value controlling randomness in sampling:
43
+
* <ul>
44
+
* <li>0.0f: No randomness (greedy sampling)</li>
45
+
* <li>1.0f: Standard sampling from unmodified distribution</li>
46
+
* <li><1.0f: More deterministic (sharper distribution)</li>
47
+
* <li>>1.0f: More random (flatter distribution)</li>
48
+
* </ul>
49
+
* @param topp
50
+
* The cumulative probability threshold for nucleus sampling (0.0-1.0).
51
+
* <ul>
52
+
* <li>Values ≤0 or ≥1: Disables top-p sampling</li>
53
+
* <li>Values in (0,1): Restricts sampling to tokens comprising the top p probability mass</li>
54
+
* </ul>
55
+
* @param rngSeed
56
+
* Seed value for the random number generator to ensure reproducibility
57
+
* @return A configured {@link Sampler} that implements the selected sampling strategy and handles both tensor and array-based logits
* Loads the language model based on the given options.
111
113
* <p>
112
-
* If Ahead-of-Time (AOT) mode is enabled, attempts to use a pre-loaded compiled model.
113
-
* Otherwise, loads the model from the specified path using the model loader.
114
+
* If Ahead-of-Time (AOT) mode is enabled, attempts to use a pre-loaded compiled model. Otherwise, loads the model from the specified path using the model loader.
114
115
* </p>
115
116
*
116
-
* @param options the parsed CLI options containing model path and max token limit
117
+
* @param options
118
+
* the parsed CLI options containing model path and max token limit
117
119
* @return the loaded {@link Model} instance
118
-
* @throws IOException if the model fails to load
119
-
* @throws IllegalStateException if AOT loading is enabled but the preloaded model is unavailable
120
+
* @throws IOException
121
+
* if the model fails to load
122
+
* @throws IllegalStateException
123
+
* if AOT loading is enabled but the preloaded model is unavailable
0 commit comments