model force reload bool

getnamo · getnamo · commit 32a17826f155 · 2025-03-25T12:19:16.000-07:00
- by default a load does an unload so reloading is already supported #39 - we add the optimization of calling load without force reloading, allowing it to be called if model is already loaded without checking
diff --git a/Llama.uplugin b/Llama.uplugin
@@ -1,7 +1,7 @@
 {
 	"FileVersion": 3,
 	"Version": 1,
-	"VersionName": "0.9.0",
+	"VersionName": "0.9.1",
 	"FriendlyName": "Llama",
 	"Description": "Llama.cpp plugin for large language model (LLM) inference.",
 	"Category": "LLM",
diff --git a/Source/LlamaCore/Private/LlamaComponent.cpp b/Source/LlamaCore/Private/LlamaComponent.cpp
@@ -53,7 +53,11 @@ ULlamaComponent::~ULlamaComponent()
 void ULlamaComponent::Activate(bool bReset)
 {
     Super::Activate(bReset);
-    LoadModel();
+
+    if (ModelParams.bAutoLoadModelOnStartup)
+    {
+        LoadModel(true);
+    }
 }
 
 void ULlamaComponent::Deactivate()
@@ -105,10 +109,10 @@ void ULlamaComponent::InsertRawPrompt(const FString& Text, bool bGenerateReply)
     });
 }
 
-void ULlamaComponent::LoadModel()
+void ULlamaComponent::LoadModel(bool bForceReload)
 {
     LlamaNative->SetModelParams(ModelParams);
-    LlamaNative->LoadModel([this](const FString& ModelPath, int32 StatusCode)
+    LlamaNative->LoadModel(bForceReload, [this](const FString& ModelPath, int32 StatusCode)
     {
         //We errored, the emit will happen before we reach here so just exit
         if (StatusCode !=0)
diff --git a/Source/LlamaCore/Private/LlamaNative.cpp b/Source/LlamaCore/Private/LlamaNative.cpp
@@ -259,15 +259,24 @@ void FLlamaNative::SetModelParams(const FLLMModelParams& Params)
 	ModelParams = Params;
 }
 
-void FLlamaNative::LoadModel(TFunction<void(const FString&, int32 StatusCode)> ModelLoadedCallback)
+void FLlamaNative::LoadModel(bool bForceReload, TFunction<void(const FString&, int32 StatusCode)> ModelLoadedCallback)
 {
-    EnqueueBGTask([this, ModelLoadedCallback](int64 TaskId)
+    if (IsModelLoaded() && !bForceReload)
+    {
+        //already loaded, we're done
+        return ModelLoadedCallback(ModelParams.PathToModel, 0);
+    }
+
+    //Copy so these dont get modified during enqueue op
+    const FLLMModelParams ParamsAtLoad = ModelParams;
+
+    EnqueueBGTask([this, ParamsAtLoad, ModelLoadedCallback](int64 TaskId)
     {
         //Unload first if any is loaded
         Internal->UnloadModel();
 
         //Now load it
-        bool bSuccess = Internal->LoadModelFromParams(ModelParams);
+        bool bSuccess = Internal->LoadModelFromParams(ParamsAtLoad);
 
         //Sync model state
         if (bSuccess)
diff --git a/Source/LlamaCore/Private/LlamaSubsystem.cpp b/Source/LlamaCore/Private/LlamaSubsystem.cpp
@@ -85,7 +85,7 @@ void ULlamaSubsystem::InsertRawPrompt(const FString& Text, bool bGenerateReply)
     });
 }
 
-void ULlamaSubsystem::LoadModel()
+void ULlamaSubsystem::LoadModel(bool bForceReload)
 {
     //Sync gt params
     LlamaNative->SetModelParams(ModelParams);
@@ -96,7 +96,7 @@ void ULlamaSubsystem::LoadModel()
         LlamaNative->AddTicker();
     }
 
-    LlamaNative->LoadModel([this](const FString& ModelPath, int32 StatusCode)
+    LlamaNative->LoadModel(bForceReload, [this](const FString& ModelPath, int32 StatusCode)
     {
         //We errored, the emit will happen before we reach here so just exit
         if (StatusCode != 0)
diff --git a/Source/LlamaCore/Public/Internal/LlamaInternal.h b/Source/LlamaCore/Public/Internal/LlamaInternal.h
@@ -35,6 +35,9 @@ class FLlamaInternal
     std::string Template;
     std::string TemplateSource;
 
+    //Pacing
+    FLLMModelParams LastLoadedParams;
+
     //Model loading
     bool LoadModelFromParams(const FLLMModelParams& InModelParams);
     void UnloadModel();
@@ -81,7 +84,7 @@ class FLlamaInternal
 
     const char* RoleForEnum(EChatTemplateRole Role);
 
-    bool bIsModelLoaded = false;
+    FThreadSafeBool bIsModelLoaded = false;
     int32 FilledContextCharLength = 0;
     FThreadSafeBool bGenerationActive = false;
 };
diff --git a/Source/LlamaCore/Public/LlamaComponent.h b/Source/LlamaCore/Public/LlamaComponent.h
@@ -73,9 +73,9 @@ class LLAMACORE_API ULlamaComponent : public UActorComponent
     UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "LLM Model Component")
     bool bSyncPromptHistory = true;
 
-    //loads model from ModelParams
+    //loads model from ModelParams. If bForceReload it will force the model to reload even if already loaded.
     UFUNCTION(BlueprintCallable, Category = "LLM Model Component")
-    void LoadModel();
+    void LoadModel(bool bForceReload = true);
 
     UFUNCTION(BlueprintCallable, Category = "LLM Model Component")
     void UnloadModel();
diff --git a/Source/LlamaCore/Public/LlamaDataTypes.h b/Source/LlamaCore/Public/LlamaDataTypes.h
@@ -119,6 +119,14 @@ struct FLLMModelAdvancedParams
     UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "LLM Model Params")
     bool bUseCommonSampler = true;
 
+    //if set above 0.f it will sleep between generation passes to ease gpu pressure
+    UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "LLM Model Params")
+    float TokenGenerationPacingSleep = 0.f;
+
+    //if set above 0.f it will sleep between prompt passes (chunking) to ease gpu pressure
+    UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "LLM Model Params")
+    float PromptProcessingPacingSleep = 0.f;
+
     //usually . ? !
     UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "LLM Model Params")
     TArray<FString> PartialsSeparators;
@@ -220,6 +228,10 @@ struct FLLMModelParams
     UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "LLM Model Params")
     bool bAutoInsertSystemPromptOnLoad = true;
 
+    //applies to component API
+    UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "LLM Model Params")
+    bool bAutoLoadModelOnStartup = true;
+
     //If not different than default empty, no template will be applied
     UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "LLM Model Params")
     FJinjaChatTemplate CustomChatTemplate = "";
diff --git a/Source/LlamaCore/Public/LlamaNative.h b/Source/LlamaCore/Public/LlamaNative.h
@@ -28,7 +28,7 @@ class LLAMACORE_API FLlamaNative
 	void SetModelParams(const FLLMModelParams& Params);
 
 	//Loads the model found at ModelParams.PathToModel, use SetModelParams to specify params before loading
-	void LoadModel(TFunction<void(const FString&, int32 StatusCode)> ModelLoadedCallback = nullptr);
+	void LoadModel(bool bForceReload = false, TFunction<void(const FString&, int32 StatusCode)> ModelLoadedCallback = nullptr);
 	void UnloadModel(TFunction<void(int32 StatusCode)> ModelUnloadedCallback = nullptr);
 	bool IsModelLoaded();
 
diff --git a/Source/LlamaCore/Public/LlamaSubsystem.h b/Source/LlamaCore/Public/LlamaSubsystem.h
@@ -70,7 +70,7 @@ class LLAMACORE_API ULlamaSubsystem : public UEngineSubsystem
 
     //loads model from ModelParams
     UFUNCTION(BlueprintCallable, Category = "LLM Model Subsystem")
-    void LoadModel();
+    void LoadModel(bool bForceReload = true);
 
     UFUNCTION(BlueprintCallable, Category = "LLM Model Subsystem")
     void UnloadModel();

Original file line number	Diff line number	Diff line change
`@@ -1,7 +1,7 @@`
`1`	`1`	`{`
`2`	`2`	`"FileVersion": 3,`
`3`	`3`	`"Version": 1,`
`4`		`- "VersionName": "0.9.0",`
	`4`	`+ "VersionName": "0.9.1",`
`5`	`5`	`"FriendlyName": "Llama",`
`6`	`6`	`"Description": "Llama.cpp plugin for large language model (LLM) inference.",`
`7`	`7`	`"Category": "LLM",`
Original file line number	Diff line number	Diff line change
`@@ -53,7 +53,11 @@ ULlamaComponent::~ULlamaComponent()`
`53`	`53`	`void ULlamaComponent::Activate(bool bReset)`
`54`	`54`	`{`
`55`	`55`	`Super::Activate(bReset);`
`56`		`- LoadModel();`
	`56`	`+`
	`57`	`+ if (ModelParams.bAutoLoadModelOnStartup)`
	`58`	`+ {`
	`59`	`+ LoadModel(true);`
	`60`	`+ }`
`57`	`61`	`}`
`58`	`62`
`59`	`63`	`void ULlamaComponent::Deactivate()`
`@@ -105,10 +109,10 @@ void ULlamaComponent::InsertRawPrompt(const FString& Text, bool bGenerateReply)`
`105`	`109`	`});`
`106`	`110`	`}`
`107`	`111`
`108`		`-void ULlamaComponent::LoadModel()`
	`112`	`+void ULlamaComponent::LoadModel(bool bForceReload)`
`109`	`113`	`{`
`110`	`114`	`LlamaNative->SetModelParams(ModelParams);`
`111`		`- LlamaNative->LoadModel([this](const FString& ModelPath, int32 StatusCode)`
	`115`	`+ LlamaNative->LoadModel(bForceReload, [this](const FString& ModelPath, int32 StatusCode)`
`112`	`116`	`{`
`113`	`117`	`//We errored, the emit will happen before we reach here so just exit`
`114`	`118`	`if (StatusCode !=0)`
Original file line number	Diff line number	Diff line change
`@@ -85,7 +85,7 @@ void ULlamaSubsystem::InsertRawPrompt(const FString& Text, bool bGenerateReply)`
`85`	`85`	`});`
`86`	`86`	`}`
`87`	`87`
`88`		`-void ULlamaSubsystem::LoadModel()`
	`88`	`+void ULlamaSubsystem::LoadModel(bool bForceReload)`
`89`	`89`	`{`
`90`	`90`	`//Sync gt params`
`91`	`91`	`LlamaNative->SetModelParams(ModelParams);`
`@@ -96,7 +96,7 @@ void ULlamaSubsystem::LoadModel()`
`96`	`96`	`LlamaNative->AddTicker();`
`97`	`97`	`}`
`98`	`98`
`99`		`- LlamaNative->LoadModel([this](const FString& ModelPath, int32 StatusCode)`
	`99`	`+ LlamaNative->LoadModel(bForceReload, [this](const FString& ModelPath, int32 StatusCode)`
`100`	`100`	`{`
`101`	`101`	`//We errored, the emit will happen before we reach here so just exit`
`102`	`102`	`if (StatusCode != 0)`