Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions LLama.Examples/ExampleRunner.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ public class ExampleRunner
{ "Chat Session: Automatic conversation", TalkToYourself.Run },
{ "Chat Session: Chinese characters", ChatChineseGB2312.Run },
{ "Executor: Interactive mode chat", InteractiveModeExecute.Run },
{ "Executor: Llava Interactive mode chat", LlavaInteractiveModeExecute.Run },
{ "Executor: Mtmd Interactive mode chat", MtmdInteractiveModeExecute.Run },
{ "Executor: Instruct mode chat", InstructModeExecute.Run },
{ "Executor: Stateless mode chat", StatelessModeExecute.Run },
{ "Save and Load: chat session", SaveAndLoadSession.Run },
Expand All @@ -33,7 +33,7 @@ public class ExampleRunner
{ "Batched Executor: Save/Load", BatchedExecutorSaveAndLoad.Run },
{ "Batched Executor: Fork", BatchedExecutorFork.Run },
{ "Batched Executor: Rewind", BatchedExecutorRewind.Run },
{ "Batched Executor: LLava", BatchedExecutorLLava.Run },
{ "Batched Executor: Mtmd", BatchedExecutorMtmd.Run },
{ "Batched Executor: BoolQ Benchmark", BatchedExecutorBoolQ.Run },
{ "Batched Executor: Beam Search", BatchedExecutorBeamSearch.Run },
{ "Custom Sampling Pipeline", CustomSampler.Run },
Expand Down
91 changes: 0 additions & 91 deletions LLama.Examples/Examples/BatchedExecutorLLava.cs

This file was deleted.

126 changes: 126 additions & 0 deletions LLama.Examples/Examples/BatchedExecutorMtmd.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
using System;
using System.Collections.Generic;
using System.IO;
using LLama.Batched;
using LLama.Common;
using LLama.Exceptions;
using LLama.Native;
using LLama.Sampling;
using Spectre.Console;

namespace LLama.Examples.Examples;

/// <summary>
/// Demonstrates how to evaluate an image with MTMD helpers and continue generation by
/// manually scheduling batches, similar to what the batched executor does internally.
/// </summary>
public class BatchedExecutorMtmd
{
/// <summary>
/// Number of completion tokens to generate after sending the image prompt.
/// </summary>
public const int TokenCount = 10000;

public static async Task Run()
{
// Load the base LLM and its clip/mtmd sidecar weights so the executor has everything it needs.
var parameters = new ModelParams(UserSettings.GetModelPath());
using var model = await LLamaWeights.LoadFromFileAsync(parameters);
var mtmdParams = MtmdContextParams.Default(); // reuse llama.cpp defaults for helper settings
mtmdParams.UseGpu = false;
var marker = mtmdParams.MediaMarker ?? NativeApi.MtmdDefaultMarker() ?? "<media>";

using var mtmd = await SafeMtmdWeights.LoadFromFileAsync(UserSettings.GetMMProjPath(), model, mtmdParams); // multimodal helper weights

using var executor = new BatchedExecutor(model, parameters, mtmd); // drives batched token + chunk evaluation

// Prepend the media marker so the helper knows where to inject the encoded image tokens.
var defaultPrompt = "\nUSER: Provide a full description of the image.\nASSISTANT: ";
var promptSuffix = AnsiConsole.Ask("Prompt (or ENTER for default):", defaultPrompt);
var promptText = string.Concat(marker, promptSuffix);

var imagePath = UserSettings.GetImagePath();
AnsiConsole.Write(new CanvasImage(imagePath));

var vocab = executor.Context.NativeHandle.ModelHandle.Vocab;

// Simple low-temperature sampler keeps the demo deterministic-ish.
var sampler = new DefaultSamplingPipeline
{
Temperature = 0.1f
};

// Stream decoded text to the console as soon as tokens arrive.
var decoder = new StreamingTokenDecoder(executor.Context)
{
DecodeSpecialTokens = false
};

try
{
// Each conversation tracks its own KV cache sequence IDs.
var conversation = executor.Create();
// enqueue the image so MtmdHelper sees it
conversation.QueueMedia(imagePath);
// schedule multimodal prompt
conversation.Prompt(promptText, addBos: true, special: true);

Console.ForegroundColor = ConsoleColor.Yellow;
Console.WriteLine("Prompt queued with multimodal chunks. Generating response...\n");
Console.ResetColor();

var remaining = TokenCount;

// Run one decode/sampling/prompt cycle – mirrors the batched executor inner loop.
async Task<bool> ProcessNextAsync()
{
var decodeResult = await executor.Infer();
if (decodeResult == DecodeResult.NoKvSlot) // KV cache exhausted – surface to the user
{
Console.ForegroundColor = ConsoleColor.Red;
Console.WriteLine("Insufficient KV cache space for multimodal evaluation.");
Console.ResetColor();
return false;
}

if (decodeResult != DecodeResult.Ok)
throw new RuntimeError($"Failed to evaluate batch: {decodeResult}.");

if (!conversation.RequiresSampling) // another conversation may still be queued
return true;

var token = conversation.Sample(sampler); // pull logits (or -1 for mtmd chunk) and sample
if (token.IsEndOfGeneration(vocab))
return false;

decoder.Add(token);
var delta = decoder.Read();
if (!string.IsNullOrEmpty(delta))
Console.Write(delta);

sampler.Accept(token); // keep sampler state in sync
conversation.Prompt(token); // feed the accepted token back into the batch
remaining--;
return remaining > 0;
}

while (remaining > 0 && await ProcessNextAsync()) // continue until EOS or budget is reached
{
}

Console.WriteLine();
}
catch (IOException ex)
{
Console.ForegroundColor = ConsoleColor.Red;
Console.WriteLine($"Could not load media '{imagePath}': {ex.Message}");
Console.ResetColor();
}
catch (RuntimeError ex)
{
Console.ForegroundColor = ConsoleColor.Red;
Console.WriteLine($"MTMD processing failed: {ex.Message}");
Console.ResetColor();
}
}
}
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
using System.Collections.Generic;
using System.IO;
using System.Text.RegularExpressions;
using LLama.Common;
using Spectre.Console;
Expand All @@ -6,27 +8,32 @@

namespace LLama.Examples.Examples
{
// This example shows how to chat with LLaVA model with both image and text as input.
// This example shows how to chat with Mtmd model with both image and text as input.
// It uses the interactive executor to inference.
public class LlavaInteractiveModeExecute
public class MtmdInteractiveModeExecute
{
public static async Task Run()
{
string multiModalProj = UserSettings.GetMMProjPath();
string modelPath = UserSettings.GetModelPath();
string modelImage = UserSettings.GetImagePath();
const int maxTokens = 1024;
const int maxTokens = 2048;

var prompt = $"{{{modelImage}}}\nUSER:\nProvide a full description of the image.\nASSISTANT:\n";

var parameters = new ModelParams(modelPath);

var mtmdParameters = MtmdContextParams.Default();
mtmdParameters.UseGpu = false;

using var model = await LLamaWeights.LoadFromFileAsync(parameters);
using var context = model.CreateContext(parameters);

// Llava Init
using var clipModel = await LLavaWeights.LoadFromFileAsync(multiModalProj);


// Mtmd Init
using var clipModel = await SafeMtmdWeights.LoadFromFileAsync(multiModalProj, model, mtmdParameters );

var mediaMarker = mtmdParameters.MediaMarker ?? NativeApi.MtmdDefaultMarker() ?? "<media>";

var ex = new InteractiveExecutor(context, clipModel);

Console.ForegroundColor = ConsoleColor.Yellow;
Expand All @@ -40,38 +47,61 @@ public static async Task Run()
Temperature = 0.1f
},

AntiPrompts = new List<string> { "\nUSER:" },
AntiPrompts = new List<string> { "\nASSISTANT:" },
MaxTokens = maxTokens

};

do
{

// Evaluate if we have images
// Evaluate if we have media
//
var imageMatches = Regex.Matches(prompt, "{([^}]*)}").Select(m => m.Value);
var imageCount = imageMatches.Count();
var hasImages = imageCount > 0;
var mediaMatches = Regex.Matches(prompt, "{([^}]*)}").Select(m => m.Value);
var mediaCount = mediaMatches.Count();
var hasMedia = mediaCount > 0;

if (hasImages)
if (hasMedia)
{
var imagePathsWithCurlyBraces = Regex.Matches(prompt, "{([^}]*)}").Select(m => m.Value);
var imagePaths = Regex.Matches(prompt, "{([^}]*)}").Select(m => m.Groups[1].Value).ToList();
var mediaPathsWithCurlyBraces = Regex.Matches(prompt, "{([^}]*)}").Select(m => m.Value);
var mediaPaths = Regex.Matches(prompt, "{([^}]*)}").Select(m => m.Groups[1].Value).ToList();

List<byte[]> imageBytes;
var embeds = new List<SafeMtmdEmbed>();
var imageList = new List<byte[]>();
var imageExtensions = new HashSet<string>(StringComparer.OrdinalIgnoreCase)
{
".png",
".jpg",
".jpeg",
".bmp",
".gif",
".webp"
};

try
{
imageBytes = imagePaths.Select(File.ReadAllBytes).ToList();
foreach (var mediaPath in mediaPaths)
{
var extension = Path.GetExtension(mediaPath);
if (!string.IsNullOrEmpty(extension) && imageExtensions.Contains(extension))
{
// Keep the raw image data so the caller can reuse or inspect the images later.
imageList.Add(File.ReadAllBytes(mediaPath));
}

var embed = clipModel.LoadMedia(mediaPath);
embeds.Add(embed);
}
}
catch (IOException exception)
{
Console.ForegroundColor = ConsoleColor.Red;
Console.Write(
$"Could not load your {(imageCount == 1 ? "image" : "images")}:");
$"Could not load your {(mediaCount == 1 ? "media" : "medias")}:");
Console.Write($"{exception.Message}");
Console.ForegroundColor = ConsoleColor.Yellow;
Console.WriteLine("Please try again.");
clipModel.ClearMedia();
break;
}

Expand All @@ -81,19 +111,17 @@ public static async Task Run()
// https://github.com/ggerganov/llama.cpp/discussions/3620
ex.Context.NativeHandle.MemorySequenceRemove( LLamaSeqId.Zero, -1, -1 );

int index = 0;
foreach (var path in imagePathsWithCurlyBraces)
// Replace placeholders with media markers (one marker per image)
foreach (var path in mediaPathsWithCurlyBraces)
{
// First image replace to tag <image, the rest of the images delete the tag
prompt = prompt.Replace(path, index++ == 0 ? "<image>" : "");
prompt = prompt.Replace(path, mediaMarker, StringComparison.Ordinal);
}


Console.ForegroundColor = ConsoleColor.Yellow;
Console.WriteLine($"Here are the images, that are sent to the chat model in addition to your message.");
Console.WriteLine();

foreach (var consoleImage in imageBytes?.Select(bytes => new CanvasImage(bytes)) ?? Array.Empty<CanvasImage>())
foreach (var consoleImage in imageList.Select(image => new CanvasImage(image.ToArray())))
{
consoleImage.MaxWidth = 50;
AnsiConsole.Write(consoleImage);
Expand All @@ -108,10 +136,9 @@ public static async Task Run()

// Initialize Images in executor
//
foreach (var image in imagePaths)
{
ex.Images.Add(await File.ReadAllBytesAsync(image));
}
ex.Embeds.Clear();
foreach (var embed in embeds)
ex.Embeds.Add(embed);
}

Console.ForegroundColor = Color.White;
Expand Down
Loading
Loading