|
1 | 1 | package org.beehive.gpullama3.model.loader;
|
2 | 2 |
|
3 | 3 | import org.beehive.gpullama3.Options;
|
| 4 | +import org.beehive.gpullama3.aot.AOT; |
4 | 5 | import org.beehive.gpullama3.core.model.GGMLType;
|
5 | 6 | import org.beehive.gpullama3.core.model.GGUF;
|
6 | 7 | import org.beehive.gpullama3.core.model.tensor.ArrayFloatTensor;
|
|
35 | 36 |
|
36 | 37 | public abstract class ModelLoader {
|
37 | 38 |
|
| 39 | + public static final boolean USE_AOT = Boolean.parseBoolean(System.getProperty("llama.AOT", "false")); // Use Ahead-of-Time compilation |
| 40 | + |
38 | 41 | protected FileChannel fileChannel;
|
39 | 42 | protected GGUF gguf;
|
40 | 43 | protected int contextLength;
|
@@ -74,6 +77,31 @@ private static ModelType detectModelType(Map<String, Object> metadata) {
|
74 | 77 | return ModelType.UNKNOWN;
|
75 | 78 | }
|
76 | 79 |
|
| 80 | + /** |
| 81 | + // * Loads the language model based on the given options. |
| 82 | + // * <p> |
| 83 | + // * If Ahead-of-Time (AOT) mode is enabled, attempts to use a pre-loaded compiled model. Otherwise, loads the model from the specified path using the model loader. |
| 84 | + // * </p> |
| 85 | + // * |
| 86 | + // * @param options |
| 87 | + // * the parsed CLI options containing model path and max token limit |
| 88 | + // * @return the loaded {@link Model} instance |
| 89 | + // * @throws IOException |
| 90 | + // * if the model fails to load |
| 91 | + // * @throws IllegalStateException |
| 92 | + // * if AOT loading is enabled but the preloaded model is unavailable |
| 93 | + // */ |
| 94 | + public static Model loadModel(Options options) throws IOException { |
| 95 | + if (USE_AOT) { |
| 96 | + Model model = AOT.tryUsePreLoaded(options.modelPath(), options.maxTokens()); |
| 97 | + if (model == null) { |
| 98 | + throw new IllegalStateException("Failed to load precompiled AOT model."); |
| 99 | + } |
| 100 | + return model; |
| 101 | + } |
| 102 | + return ModelLoader.loadModel(options.modelPath(), options.maxTokens(), true); |
| 103 | + } |
| 104 | + |
77 | 105 | public static Model loadModel(Path ggufPath, int contextLength, boolean loadWeights) throws IOException {
|
78 | 106 | // initial load of metadata from gguf file
|
79 | 107 | GGUF gguf = GGUF.loadModel(ggufPath);
|
|
0 commit comments