diff --git a/src/bindings/Llama.ts b/src/bindings/Llama.ts index cb0e4574..cb5f042c 100644 --- a/src/bindings/Llama.ts +++ b/src/bindings/Llama.ts @@ -465,7 +465,7 @@ export class Llama { } try { - const transformedLogLevel = getTransformedLogLevel(level, message); + const transformedLogLevel = getTransformedLogLevel(level, message, this.gpu); if (LlamaLogLevelGreaterThanOrEqual(transformedLogLevel, this._logLevel)) this._logger(transformedLogLevel, message); } catch (err) { @@ -665,7 +665,7 @@ function logMessageIsOnlyDots(message: string | null) { return true; } -function getTransformedLogLevel(level: LlamaLogLevel, message: string): LlamaLogLevel { +function getTransformedLogLevel(level: LlamaLogLevel, message: string, gpu: BuildGpu): LlamaLogLevel { if (level === LlamaLogLevel.warn && message.endsWith("the full capacity of the model will not be utilized")) return LlamaLogLevel.info; else if (level === LlamaLogLevel.warn && message.startsWith("ggml_metal_init: skipping kernel_") && message.endsWith("(not supported)")) @@ -684,6 +684,8 @@ function getTransformedLogLevel(level: LlamaLogLevel, message: string): LlamaLog return LlamaLogLevel.info; else if (level === LlamaLogLevel.warn && message.startsWith("load: special_eog_ids contains both '<|return|>' and '<|call|>' tokens, removing '<|end|>' token from EOG list")) return LlamaLogLevel.info; + else if (gpu === false && level === LlamaLogLevel.warn && message.startsWith("llama_adapter_lora_init_impl: lora for '") && message.endsWith("' cannot use buft 'CPU_REPACK', fallback to CPU")) + return LlamaLogLevel.info; return level; } diff --git a/test/modelDependent/llama3.1/tokenBias.test.ts b/test/modelDependent/llama3.1/tokenBias.test.ts index e718e6aa..b395b72c 100644 --- a/test/modelDependent/llama3.1/tokenBias.test.ts +++ b/test/modelDependent/llama3.1/tokenBias.test.ts @@ -25,9 +25,11 @@ describe("llama 3.1", () => { const text = model.detokenize([token]); if (text.toLowerCase().includes("hello")) - customBias.set(token, -0.99); + customBias.set(token, -1); else if (text.toLowerCase().includes("hi")) customBias.set(token, "never"); + else if (text.toLowerCase().includes("well")) + customBias.set(token, -0.99); } const res = await chatSession.prompt('Greet me by saying "hello" to me', {