Skip to content

Commit 32a1782

Browse files
committed
model force reload bool
- by default a load does an unload so reloading is already supported #39 - we add the optimization of calling load without force reloading, allowing it to be called if model is already loaded without checking
1 parent 1216c79 commit 32a1782

File tree

9 files changed

+42
-14
lines changed

9 files changed

+42
-14
lines changed

Llama.uplugin

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"FileVersion": 3,
33
"Version": 1,
4-
"VersionName": "0.9.0",
4+
"VersionName": "0.9.1",
55
"FriendlyName": "Llama",
66
"Description": "Llama.cpp plugin for large language model (LLM) inference.",
77
"Category": "LLM",

Source/LlamaCore/Private/LlamaComponent.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,11 @@ ULlamaComponent::~ULlamaComponent()
5353
void ULlamaComponent::Activate(bool bReset)
5454
{
5555
Super::Activate(bReset);
56-
LoadModel();
56+
57+
if (ModelParams.bAutoLoadModelOnStartup)
58+
{
59+
LoadModel(true);
60+
}
5761
}
5862

5963
void ULlamaComponent::Deactivate()
@@ -105,10 +109,10 @@ void ULlamaComponent::InsertRawPrompt(const FString& Text, bool bGenerateReply)
105109
});
106110
}
107111

108-
void ULlamaComponent::LoadModel()
112+
void ULlamaComponent::LoadModel(bool bForceReload)
109113
{
110114
LlamaNative->SetModelParams(ModelParams);
111-
LlamaNative->LoadModel([this](const FString& ModelPath, int32 StatusCode)
115+
LlamaNative->LoadModel(bForceReload, [this](const FString& ModelPath, int32 StatusCode)
112116
{
113117
//We errored, the emit will happen before we reach here so just exit
114118
if (StatusCode !=0)

Source/LlamaCore/Private/LlamaNative.cpp

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -259,15 +259,24 @@ void FLlamaNative::SetModelParams(const FLLMModelParams& Params)
259259
ModelParams = Params;
260260
}
261261

262-
void FLlamaNative::LoadModel(TFunction<void(const FString&, int32 StatusCode)> ModelLoadedCallback)
262+
void FLlamaNative::LoadModel(bool bForceReload, TFunction<void(const FString&, int32 StatusCode)> ModelLoadedCallback)
263263
{
264-
EnqueueBGTask([this, ModelLoadedCallback](int64 TaskId)
264+
if (IsModelLoaded() && !bForceReload)
265+
{
266+
//already loaded, we're done
267+
return ModelLoadedCallback(ModelParams.PathToModel, 0);
268+
}
269+
270+
//Copy so these dont get modified during enqueue op
271+
const FLLMModelParams ParamsAtLoad = ModelParams;
272+
273+
EnqueueBGTask([this, ParamsAtLoad, ModelLoadedCallback](int64 TaskId)
265274
{
266275
//Unload first if any is loaded
267276
Internal->UnloadModel();
268277

269278
//Now load it
270-
bool bSuccess = Internal->LoadModelFromParams(ModelParams);
279+
bool bSuccess = Internal->LoadModelFromParams(ParamsAtLoad);
271280

272281
//Sync model state
273282
if (bSuccess)

Source/LlamaCore/Private/LlamaSubsystem.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ void ULlamaSubsystem::InsertRawPrompt(const FString& Text, bool bGenerateReply)
8585
});
8686
}
8787

88-
void ULlamaSubsystem::LoadModel()
88+
void ULlamaSubsystem::LoadModel(bool bForceReload)
8989
{
9090
//Sync gt params
9191
LlamaNative->SetModelParams(ModelParams);
@@ -96,7 +96,7 @@ void ULlamaSubsystem::LoadModel()
9696
LlamaNative->AddTicker();
9797
}
9898

99-
LlamaNative->LoadModel([this](const FString& ModelPath, int32 StatusCode)
99+
LlamaNative->LoadModel(bForceReload, [this](const FString& ModelPath, int32 StatusCode)
100100
{
101101
//We errored, the emit will happen before we reach here so just exit
102102
if (StatusCode != 0)

Source/LlamaCore/Public/Internal/LlamaInternal.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,9 @@ class FLlamaInternal
3535
std::string Template;
3636
std::string TemplateSource;
3737

38+
//Pacing
39+
FLLMModelParams LastLoadedParams;
40+
3841
//Model loading
3942
bool LoadModelFromParams(const FLLMModelParams& InModelParams);
4043
void UnloadModel();
@@ -81,7 +84,7 @@ class FLlamaInternal
8184

8285
const char* RoleForEnum(EChatTemplateRole Role);
8386

84-
bool bIsModelLoaded = false;
87+
FThreadSafeBool bIsModelLoaded = false;
8588
int32 FilledContextCharLength = 0;
8689
FThreadSafeBool bGenerationActive = false;
8790
};

Source/LlamaCore/Public/LlamaComponent.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,9 +73,9 @@ class LLAMACORE_API ULlamaComponent : public UActorComponent
7373
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "LLM Model Component")
7474
bool bSyncPromptHistory = true;
7575

76-
//loads model from ModelParams
76+
//loads model from ModelParams. If bForceReload it will force the model to reload even if already loaded.
7777
UFUNCTION(BlueprintCallable, Category = "LLM Model Component")
78-
void LoadModel();
78+
void LoadModel(bool bForceReload = true);
7979

8080
UFUNCTION(BlueprintCallable, Category = "LLM Model Component")
8181
void UnloadModel();

Source/LlamaCore/Public/LlamaDataTypes.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,14 @@ struct FLLMModelAdvancedParams
119119
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "LLM Model Params")
120120
bool bUseCommonSampler = true;
121121

122+
//if set above 0.f it will sleep between generation passes to ease gpu pressure
123+
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "LLM Model Params")
124+
float TokenGenerationPacingSleep = 0.f;
125+
126+
//if set above 0.f it will sleep between prompt passes (chunking) to ease gpu pressure
127+
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "LLM Model Params")
128+
float PromptProcessingPacingSleep = 0.f;
129+
122130
//usually . ? !
123131
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "LLM Model Params")
124132
TArray<FString> PartialsSeparators;
@@ -220,6 +228,10 @@ struct FLLMModelParams
220228
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "LLM Model Params")
221229
bool bAutoInsertSystemPromptOnLoad = true;
222230

231+
//applies to component API
232+
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "LLM Model Params")
233+
bool bAutoLoadModelOnStartup = true;
234+
223235
//If not different than default empty, no template will be applied
224236
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "LLM Model Params")
225237
FJinjaChatTemplate CustomChatTemplate = "";

Source/LlamaCore/Public/LlamaNative.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ class LLAMACORE_API FLlamaNative
2828
void SetModelParams(const FLLMModelParams& Params);
2929

3030
//Loads the model found at ModelParams.PathToModel, use SetModelParams to specify params before loading
31-
void LoadModel(TFunction<void(const FString&, int32 StatusCode)> ModelLoadedCallback = nullptr);
31+
void LoadModel(bool bForceReload = false, TFunction<void(const FString&, int32 StatusCode)> ModelLoadedCallback = nullptr);
3232
void UnloadModel(TFunction<void(int32 StatusCode)> ModelUnloadedCallback = nullptr);
3333
bool IsModelLoaded();
3434

Source/LlamaCore/Public/LlamaSubsystem.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ class LLAMACORE_API ULlamaSubsystem : public UEngineSubsystem
7070

7171
//loads model from ModelParams
7272
UFUNCTION(BlueprintCallable, Category = "LLM Model Subsystem")
73-
void LoadModel();
73+
void LoadModel(bool bForceReload = true);
7474

7575
UFUNCTION(BlueprintCallable, Category = "LLM Model Subsystem")
7676
void UnloadModel();

0 commit comments

Comments
 (0)