Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 16 additions & 1 deletion src/csharp/NativeMethods.cs
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ internal class NativeLib
[DllImport(NativeLib.DllName, CallingConvention = CallingConvention.Winapi)]
public static extern IntPtr /* OgaResult* */ OgaGenerator_AppendTokenSequences(IntPtr /* OgaGenerator* */ generator,
IntPtr /* const OgaSequences* */ sequences);


// This function is used to rewind the generator to the given newLength.
[DllImport(NativeLib.DllName, CallingConvention = CallingConvention.Winapi)]
Expand Down Expand Up @@ -354,6 +354,21 @@ public static extern UIntPtr OgaSequencesGetSequenceCount(IntPtr /* const OgaSeq
IntPtr /* const Audios* */ audios,
out IntPtr /* OgaNamedTensors** */ namedTensors);

[DllImport(NativeLib.DllName, CallingConvention = CallingConvention.Winapi)]
public static extern int /* extError_t */ OgaSplitSignalSegments(
IntPtr /* const OgaTensor* */ input,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we follow the same tab spacings as the other native method APIs listed in this file for the new APIs?

IntPtr /* const OgaTensor* */ sr_tensor,
IntPtr /* const OgaTensor* */ frame_ms_tensor,
IntPtr /* const OgaTensor* */ hop_ms_tensor,
IntPtr /* const OgaTensor* */ energy_threshold_db_tensor,
IntPtr /* OgaTensor* */ output0);

[DllImport(NativeLib.DllName, CallingConvention = CallingConvention.Winapi)]
public static extern int /* extError_t */ OgaMergeSignalSegments(
IntPtr /* const OgaTensor* */ segments_tensor,
IntPtr /* const OgaTensor* */ merge_gap_ms_tensor,
IntPtr /* OgaTensor* */ output0);

[DllImport(NativeLib.DllName, CallingConvention = CallingConvention.Winapi)]
public static extern unsafe IntPtr /* OgaResult* */ OgaProcessorDecode(IntPtr /* const OgaMultiModalProcessor* */ processor,
int* /* const int32_t* */ sequence,
Expand Down
259 changes: 259 additions & 0 deletions src/csharp/SignalProcessor.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,259 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Runtime.InteropServices;

namespace Microsoft.ML.OnnxRuntimeGenAI
{
public static class SignalProcessor
{
Copy link
Contributor

@kunal-vaishnavi kunal-vaishnavi Nov 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can these APIs be added to the MultiModalProcessor instead of introducing a new class? All multi-modal models (e.g. Whisper, Phi-4 mm, etc) use that processor for pre-processing.

private const int ET_Float32 = 1;
private const int ET_Int64 = 7;

private static readonly ConcurrentDictionary<IntPtr, GCHandle> _tensorPins = new();

#region Native wrappers

public static void SplitSignalSegments(
IntPtr inputTensor,
IntPtr srTensor,
IntPtr frameMsTensor,
IntPtr hopMsTensor,
IntPtr energyThresholdDbTensor,
IntPtr outputTensor)
{
int err = NativeMethods.OgaSplitSignalSegments(
inputTensor,
srTensor,
frameMsTensor,
hopMsTensor,
energyThresholdDbTensor,
outputTensor);

if (err != 0)
throw new InvalidOperationException($"OgaSplitSignalSegments failed with error code {err}");
}

public static void MergeSignalSegments(
IntPtr segmentsTensor,
IntPtr mergeGapMsTensor,
IntPtr outputTensor)
{
int err = NativeMethods.OgaMergeSignalSegments(
segmentsTensor,
mergeGapMsTensor,
outputTensor);

if (err != 0)
throw new InvalidOperationException($"OgaMergeSignalSegments failed with error code {err}");
}

#endregion

#region Tensor creation helpers (fixed)

/// <summary>
/// Create a tensor view over a managed float[].
/// </summary>
public static IntPtr CreateFloatTensorFromArray(float[] data, long[] shape)
{
if (data == null) throw new ArgumentNullException(nameof(data));
if (shape == null) throw new ArgumentNullException(nameof(shape));

// The underlying buffer is pinned for the lifetime of the tensor.
var handle = GCHandle.Alloc(data, GCHandleType.Pinned);

IntPtr tensor;
var status = NativeMethods.OgaCreateTensorFromBuffer(
handle.AddrOfPinnedObject(),
shape,
(UIntPtr)shape.Length,
(ElementType)ET_Float32,
out tensor);

if (status.ToInt64() != 0)
{
handle.Free();
throw new InvalidOperationException($"OgaCreateTensorFromBuffer(float) failed with {status.ToInt64()}");
}

if (!_tensorPins.TryAdd(tensor, handle))
{
handle.Free();
throw new InvalidOperationException("Failed to track pinned buffer for float tensor.");
}

return tensor;
}

/// <summary>
/// Create a tensor view over a managed long[].
/// </summary>
public static IntPtr CreateInt64TensorFromArray(long[] data, long[] shape)
{
if (data == null) throw new ArgumentNullException(nameof(data));
if (shape == null) throw new ArgumentNullException(nameof(shape));

// The underlying buffer is pinned for the lifetime of the tensor.
var handle = GCHandle.Alloc(data, GCHandleType.Pinned);

IntPtr tensor;
var status = NativeMethods.OgaCreateTensorFromBuffer(
handle.AddrOfPinnedObject(),
shape,
(UIntPtr)shape.Length,
(ElementType)ET_Int64,
out tensor);

if (status.ToInt64() != 0)
{
handle.Free();
throw new InvalidOperationException($"OgaCreateTensorFromBuffer(int64) failed with {status.ToInt64()}");
}

if (!_tensorPins.TryAdd(tensor, handle))
{
handle.Free();
throw new InvalidOperationException("Failed to track pinned buffer for int64 tensor.");
}

return tensor;
}

/// <summary>
/// Create an output tensor that points at a caller-owned long[] buffer.
/// </summary>
public static IntPtr CreateOutputInt64Tensor(long[] backingBuffer, long rows, long cols)
{
if (backingBuffer == null) throw new ArgumentNullException(nameof(backingBuffer));
if (rows * cols > backingBuffer.LongLength)
throw new ArgumentException("backingBuffer too small for requested shape");

long[] shape = new long[] { rows, cols };

// The buffer is pinned for the lifetime of the tensor.
var handle = GCHandle.Alloc(backingBuffer, GCHandleType.Pinned);

IntPtr tensor;
var status = NativeMethods.OgaCreateTensorFromBuffer(
handle.AddrOfPinnedObject(),
shape,
(UIntPtr)shape.Length,
(ElementType)ET_Int64,
out tensor);

if (status.ToInt64() != 0)
{
handle.Free();
throw new InvalidOperationException($"OgaCreateTensorFromBuffer(output int64) failed with {status.ToInt64()}");
}

if (!_tensorPins.TryAdd(tensor, handle))
{
handle.Free();
throw new InvalidOperationException("Failed to track pinned buffer for output tensor.");
}

return tensor;
}

/// <summary>
/// Destroy a tensor and release its pinned managed buffer (if any).
/// </summary>
private static void SafeDestroyTensor(IntPtr tensor)
{
if (tensor == IntPtr.Zero)
return;

NativeMethods.OgaDestroyTensor(tensor);

if (_tensorPins.TryRemove(tensor, out var handle))
{
if (handle.IsAllocated)
handle.Free();
}
}

#endregion

/// <summary>
/// Runs STFT over the input signal and finds the areas of high energy with start/end timestamps in ms.
/// </summary>
public static (double Start, double End)[] SplitAndMergeSegments(
float[] inputSignal,
int sampleRate,
int frameMs,
int hopMs,
float energyThresholdDb,
int mergeGapMs)
{
if (inputSignal == null || inputSignal.Length == 0)
throw new ArgumentException("Input array cannot be null or empty", nameof(inputSignal));

const int MaxSegs = 1024;

long[] splitBacking = new long[MaxSegs * 2];
long[] mergedBacking = new long[MaxSegs * 2];

IntPtr input = IntPtr.Zero, sr = IntPtr.Zero, frame = IntPtr.Zero, hop = IntPtr.Zero,
thr = IntPtr.Zero, splitOut = IntPtr.Zero, mergeGap = IntPtr.Zero, mergedOut = IntPtr.Zero;

try
{
long[] inputShape = new long[] { 1, inputSignal.Length };

input = CreateFloatTensorFromArray(inputSignal, inputShape);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you construct a Tensor object directly for the input data? Each Tensor object has its own IntPtr handle that you can provide for the NativeMethods.CAPI call.

public class Tensor : IDisposable
{
private IntPtr _tensorHandle;
private bool _disposed = false;
public Tensor(IntPtr data, Int64[] shape, ElementType type)
{
Result.VerifySuccess(NativeMethods.OgaCreateTensorFromBuffer(data, shape, (UIntPtr)shape.Length, type, out _tensorHandle));
}
internal Tensor(IntPtr tensorHandle)
{
Debug.Assert(tensorHandle != IntPtr.Zero);
_tensorHandle = tensorHandle;
_disposed = false;
}
internal IntPtr Handle { get { return _tensorHandle; } }

The Tensor class can help with memory management during disposal.

Here is an example with Tensor from a unit test.

public void TestTensorAndAddExtraInput()
{
string modelPath = _tinyRandomGpt2ModelPath;
using var model = new Model(modelPath);
Assert.NotNull(model);
using var generatorParams = new GeneratorParams(model);
Assert.NotNull(generatorParams);
float[] data = { 0, 1, 2, 3, 4, 10, 11, 12, 13, 14, 20, 21, 22, 23, 24 };
long[] shape = { 3, 5 };
// Pin the array to get its pointer
GCHandle handle = GCHandle.Alloc(data, GCHandleType.Pinned);
try
{
IntPtr data_pointer = handle.AddrOfPinnedObject();
using var tensor = new Tensor(data_pointer, shape, ElementType.float32);
Assert.NotNull(tensor);
Assert.Equal(shape, tensor.Shape());
Assert.Equal(ElementType.float32, tensor.Type());
using var generator = new Generator(model, generatorParams);
Assert.NotNull(generator);
generator.SetModelInput("test_input", tensor);
}
finally
{
handle.Free();
}
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, this approach gave me memory corruption as well, there might be an issue with Tensor class, but I can give it another try.

sr = CreateInt64TensorFromArray(new long[] { sampleRate }, new long[] { 1 });
frame = CreateInt64TensorFromArray(new long[] { frameMs }, new long[] { 1 });
hop = CreateInt64TensorFromArray(new long[] { hopMs }, new long[] { 1 });
thr = CreateFloatTensorFromArray(new float[] { energyThresholdDb }, new long[] { 1 });

splitOut = CreateOutputInt64Tensor(splitBacking, MaxSegs, 2);
mergedOut = CreateOutputInt64Tensor(mergedBacking, MaxSegs, 2);

SplitSignalSegments(input, sr, frame, hop, thr, splitOut);

long[] splitShapeBuf = new long[2];
Result.VerifySuccess(NativeMethods.OgaTensorGetShape(splitOut, splitShapeBuf, (UIntPtr)splitShapeBuf.Length));
long splitRows = splitShapeBuf[0];
long splitCols = splitShapeBuf[1];

mergeGap = CreateInt64TensorFromArray(new long[] { mergeGapMs }, new long[] { 1 });
MergeSignalSegments(splitOut, mergeGap, mergedOut);

long[] mergedShapeBuf = new long[2];
Result.VerifySuccess(NativeMethods.OgaTensorGetShape(mergedOut, mergedShapeBuf, (UIntPtr)mergedShapeBuf.Length));
long mergedRows = mergedShapeBuf[0];
long mergedCols = mergedShapeBuf[1];

if (mergedCols != 2)
throw new InvalidOperationException($"Expected merged output with 2 columns, got {mergedCols}");

var result = new List<(double Start, double End)>();
for (int i = 0; i < mergedRows; ++i)
{
long start = mergedBacking[i * 2 + 0];
long end = mergedBacking[i * 2 + 1];
if (start == 0 && end == 0) continue;
result.Add((start, end));
}

return result.ToArray();
}
finally
{
SafeDestroyTensor(input);
SafeDestroyTensor(sr);
SafeDestroyTensor(frame);
SafeDestroyTensor(hop);
SafeDestroyTensor(thr);
SafeDestroyTensor(splitOut);
SafeDestroyTensor(mergeGap);
SafeDestroyTensor(mergedOut);
}
}
}
}
85 changes: 85 additions & 0 deletions src/models/processor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -198,8 +198,93 @@ std::unique_ptr<OrtValue> ProcessTensor<float, int64_t>(OrtxTensor* tensor, Ort:
return tensor_value;
}

template <typename T>
OrtxTensor* MakeOrtxTensor(Generators::Tensor* src) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since the Generators namespace is included at the top, I think you can remove the Generators:: part before each Tensor.

namespace Generators {

if (!src) {
throw std::runtime_error("Null tensor passed to MakeOrtxTensor");
}

T* data = const_cast<T*>(src->GetData<T>());
std::vector<int64_t> shape = src->GetShape();

auto* ort = new Ort::Custom::Tensor<T>(shape, data);
return reinterpret_cast<OrtxTensor*>(ort);
}

template <typename T>
const OrtxTensor* MakeOrtxTensorConst(const Generators::Tensor* src) {
if (!src) {
throw std::runtime_error("Null tensor passed to MakeOrtxTensorConst");
}

const T* data = src->GetData<T>();
std::vector<int64_t> shape = src->GetShape();

auto* ort = new Ort::Custom::Tensor<T>(shape, const_cast<T*>(data));
return reinterpret_cast<const OrtxTensor*>(ort);
}

void* SplitSignalSegments(
const Generators::Tensor* input,
const Generators::Tensor* sr_tensor,
const Generators::Tensor* frame_ms_tensor,
const Generators::Tensor* hop_ms_tensor,
const Generators::Tensor* energy_threshold_db_tensor,
Generators::Tensor* output0) {
if (!input || !sr_tensor || !frame_ms_tensor || !hop_ms_tensor ||
!energy_threshold_db_tensor || !output0) {
throw std::runtime_error("Null tensor argument passed to OgaSplitSignalSegments");
}

const OrtxTensor* in_tensor = Generators::MakeOrtxTensorConst<float>(input);
const OrtxTensor* sr_tensor_obj = Generators::MakeOrtxTensorConst<int64_t>(sr_tensor);
const OrtxTensor* frame_tensor = Generators::MakeOrtxTensorConst<int64_t>(frame_ms_tensor);
const OrtxTensor* hop_tensor = Generators::MakeOrtxTensorConst<int64_t>(hop_ms_tensor);
const OrtxTensor* thr_tensor = Generators::MakeOrtxTensorConst<float>(energy_threshold_db_tensor);
OrtxTensor* out_tensor = Generators::MakeOrtxTensor<int64_t>(output0);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would recommend looking at how the tokenizer and processor APIs are currently implemented (ex: Process method in the Whisper processor). That will help with making the implementations of the two methods similar to existing implementations.

  1. You can use CheckResult in place of the err check.

  2. Are tensors really needed for most of the parameters passed to OrtxSplitSignalSegments? The sampling rate (sr_tensor), frame size (frame_ms), hop length (hop_ms), and energy threshold (energy_threshold_db) are singular values. Can we modify OrtxSplitSignalSegments to use primitive types so that primitive types can be used in ORT GenAI for the "language binding API --> ORT GenAI C API --> ORT GenAI C++ API" flow? That will also greatly clean up the need for all of the workarounds you have in the C# bindings and in this file.

  3. If OrtxSplitSignalSegments is updated, you can have the output stored as OrtxTensorResult** result inside that extensions API. Then you can create the output tensor inside this method instead of passing it in as a parameter to SplitSignalSegments. Here is an example.

ort_extensions::OrtxObjectPtr<OrtxTensorResult> result;
CheckResult(OrtxFeatureExtraction(processor_.get(), audios->audios_.get(), result.ToBeAssigned()));
ort_extensions::OrtxObjectPtr<OrtxTensor> mel;
CheckResult(OrtxTensorResultGetAt(result.get(), 0, mel.ToBeAssigned()));

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. Its more consistant having tensors, but I can do your approach as well, I have no preference, I was just trying to be consistant + I haven't seen built-in types used for Oga API.

  2. It is possible, yes, but since I am initializing tensors on C# side, again for consistancy and simplicity (no need for **), I decided to create all on the caller side. But the extension should, in theory, work for both cases.


extError_t err = OrtxSplitSignalSegments(
in_tensor,
sr_tensor_obj,
frame_tensor,
hop_tensor,
thr_tensor,
out_tensor);

if (err != kOrtxOK) {
throw std::runtime_error(OrtxGetLastErrorMessage());
}
return nullptr;
}

void* MergeSignalSegments(
const Generators::Tensor* segments_tensor,
const Generators::Tensor* merge_gap_ms_tensor,
Generators::Tensor* output0) {
if (!segments_tensor || !merge_gap_ms_tensor || !output0) {
throw std::runtime_error("Null tensor argument passed to OgaMergeSignalSegments");
}

const OrtxTensor* seg_tensor = Generators::MakeOrtxTensorConst<int64_t>(segments_tensor);
const OrtxTensor* gap_tensor = Generators::MakeOrtxTensorConst<int64_t>(merge_gap_ms_tensor);
OrtxTensor* out_tensor = Generators::MakeOrtxTensor<int64_t>(output0);

extError_t err = OrtxMergeSignalSegments(
seg_tensor,
gap_tensor,
out_tensor);

if (err != kOrtxOK) {
throw std::runtime_error(OrtxGetLastErrorMessage());
}
return nullptr;
}

template std::unique_ptr<OrtValue> ProcessTensor<float>(OrtxTensor* tensor, Ort::Allocator& allocator);
template std::unique_ptr<OrtValue> ProcessTensor<int64_t>(OrtxTensor* tensor, Ort::Allocator& allocator);
template std::unique_ptr<OrtValue> ProcessTensor<bool>(OrtxTensor* tensor, Ort::Allocator& allocator);

template const OrtxTensor* MakeOrtxTensorConst<float>(const Generators::Tensor*);
template const OrtxTensor* MakeOrtxTensorConst<int64_t>(const Generators::Tensor*);
template OrtxTensor* MakeOrtxTensor<int64_t>(Generators::Tensor*);
} // namespace Generators
Loading
Loading