Skip to content

Commit 0c67095

Browse files
committed
feat: adding text-to-audio example
- using `InferenceAPI` to perform `text-to-audio`. - encoding `wave` audio tensors from the rust land
1 parent 62b4a93 commit 0c67095

File tree

7 files changed

+438
-2
lines changed

7 files changed

+438
-2
lines changed

examples/ort-raw-session/index.ts

Lines changed: 75 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,76 @@
1+
/// /// <reference path="./types.d.ts" />
2+
3+
/*
4+
const modelUrl = 'https://huggingface.co/kalleby/hp-to-miles/resolve/main/model.onnx?download=true';
5+
const modelConfigUrl =
6+
'https://huggingface.co/kalleby/hp-to-miles/resolve/main/config.json?download=true';
7+
8+
const model = await Supabase.ai.RawSession.fromUrl(modelUrl);
9+
const modelConfig = await fetch(modelConfigUrl).then((r) => r.json());
10+
11+
Deno.serve(async (req: Request) => {
12+
const params = new URL(req.url).searchParams;
13+
const inputValue = parseInt(params.get('value'));
14+
15+
const input = new Supabase.ai.RawTensor('float32', [inputValue], [1, 1]);
16+
.minMaxNormalize(modelConfig.input.min, modelConfig.input.max);
17+
18+
const output = await model.run({
19+
'dense_dense1_input': input,
20+
});
21+
22+
console.log('output', output);
23+
24+
const outputTensor = output['dense_Dense4']
25+
.minMaxUnnormalize(modelConfig.label.min, modelConfig.label.max);
26+
27+
return Response.json({ result: outputTensor.data });
28+
});
29+
*/
30+
31+
// transformers.js Compatible:
32+
// import { Tensor } from 'https://cdn.jsdelivr.net/npm/@huggingface/[email protected]';
33+
// const rawTensor = new Supabase.ai.RawTensor('string', urls, [urls.length]);
34+
// console.log('raw tensor', rawTensor );
35+
//
36+
// const tensor = new Tensor(rawTensor);
37+
// console.log('hf tensor', tensor);
38+
//
39+
// 'hf tensor operations'
40+
// tensor.min(); tensor.max(); tensor.norm() ....
41+
42+
// const modelUrl =
43+
// 'https://huggingface.co/pirocheto/phishing-url-detection/resolve/main/model.onnx?download=true';
44+
45+
/*
46+
const { Tensor, RawSession } = Supabase.ai;
47+
48+
const model = await RawSession.fromHuggingFace('pirocheto/phishing-url-detection', {
49+
path: {
50+
template: `{REPO_ID}/resolve/{REVISION}/{MODEL_FILE}?donwload=true`,
51+
modelFile: 'model.onnx',
52+
},
53+
});
54+
55+
console.log('session', model);
56+
57+
Deno.serve(async (_req: Request) => {
58+
const urls = [
59+
'https://clubedemilhagem.com/home.php',
60+
'http://www.medicalnewstoday.com/articles/188939.php',
61+
'https://magalu-crediarioluiza.com/Produto_20203/produto.php?sku=1',
62+
];
63+
64+
const inputs = new Tensor('string', urls, [urls.length]);
65+
console.log('tensor', inputs.data);
66+
67+
const output = await model.run({ inputs });
68+
console.log(output);
69+
70+
return Response.json({ result: output.probabilities });
71+
});
72+
*/
73+
174
const { Tensor, RawSession } = Supabase.ai;
275

376
const session = await RawSession.fromHuggingFace('kallebysantos/vehicle-emission', {
@@ -27,14 +100,15 @@ Deno.serve(async (_req: Request) => {
27100
}];
28101

29102
// Parsing objects to tensor input
30-
const inputTensors = {};
103+
const inputTensors: Record<string, Supabase.Tensor<'float32'>> = {};
31104
session.inputs.forEach((inputKey) => {
32105
const values = carsBatchInput.map((item) => item[inputKey]);
33106

34107
inputTensors[inputKey] = new Tensor('float32', values, [values.length, 1]);
35108
});
36109

37110
const { emissions } = await session.run(inputTensors);
111+
console.log(emissions);
38112
// [ 289.01, 199.53]
39113

40114
return Response.json({ result: emissions });

examples/ort-raw-session/types.d.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,8 @@ declare namespace Supabase {
9191
size: number;
9292

9393
constructor(type: T, data: TensorDataTypeMap[T], dims: number[]);
94+
95+
tryEncodeAudio(sampleRate: number): Promise<ArrayBuffer>;
9496
}
9597

9698
export class RawSession {

examples/text-to-audio/index.ts

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
// Setup type definitions for built-in Supabase Runtime APIs
2+
import 'jsr:@supabase/functions-js/edge-runtime.d.ts';
3+
import { PreTrainedTokenizer } from 'https://cdn.jsdelivr.net/npm/@huggingface/[email protected]';
4+
5+
// import 'phonemize' code from Kokoro.js repo
6+
import { phonemize } from './phonemizer.js';
7+
8+
const { Tensor, RawSession } = Supabase.ai;
9+
10+
const STYLE_DIM = 256;
11+
const SAMPLE_RATE = 24000;
12+
const MODEL_ID = 'onnx-community/Kokoro-82M-ONNX';
13+
14+
// https://huggingface.co/onnx-community/Kokoro-82M-ONNX#samples
15+
const ALLOWED_VOICES = [
16+
'af_bella',
17+
'af_nicole',
18+
'af_sarah',
19+
'af_sky',
20+
'am_adam',
21+
'am_michael',
22+
'bf_emma',
23+
'bf_isabella',
24+
'bm_george',
25+
'bm_lewis',
26+
];
27+
28+
const session = await RawSession.fromHuggingFace(MODEL_ID);
29+
30+
Deno.serve(async (req) => {
31+
const params = new URL(req.url).searchParams;
32+
const text = params.get('text') ?? 'Hello from Supabase!';
33+
const voice = params.get('voice') ?? 'af_bella';
34+
35+
if (!ALLOWED_VOICES.includes(voice)) {
36+
return Response.json({
37+
error: `invalid voice '${voice}'`,
38+
must_be_one_of: ALLOWED_VOICES,
39+
}, { status: 400 });
40+
}
41+
42+
const tokenizer = await loadTokenizer();
43+
const language = voice.at(0); // 'a'merican | 'b'ritish
44+
const phonemes = await phonemize(text, language);
45+
const { input_ids } = tokenizer(phonemes, {
46+
truncation: true,
47+
});
48+
49+
// Select voice style based on number of input tokens
50+
const num_tokens = Math.max(
51+
input_ids.dims.at(-1) - 2, // Without padding;
52+
0,
53+
);
54+
55+
const voiceStyle = await loadVoiceStyle(voice, num_tokens);
56+
57+
const { waveform } = await session.run({
58+
input_ids,
59+
style: voiceStyle,
60+
speed: new Tensor('float32', [1], [1]),
61+
});
62+
63+
// Do `wave` encoding from rust backend
64+
const audio = await waveform.tryEncodeAudio(SAMPLE_RATE);
65+
66+
return new Response(audio, {
67+
headers: {
68+
'Content-Type': 'audio/wav',
69+
},
70+
});
71+
});
72+
73+
async function loadVoiceStyle(voice: string, num_tokens: number) {
74+
const voice_url =
75+
`https://huggingface.co/onnx-community/Kokoro-82M-ONNX/resolve/main/voices/${voice}.bin?download=true`;
76+
77+
console.log('loading voice:', voice_url);
78+
79+
const voiceBuffer = await fetch(voice_url).then(async (res) => await res.arrayBuffer());
80+
81+
const offset = num_tokens * STYLE_DIM;
82+
const voiceData = new Float32Array(voiceBuffer).slice(
83+
offset,
84+
offset + STYLE_DIM,
85+
);
86+
87+
return new Tensor('float32', voiceData, [1, STYLE_DIM]);
88+
}
89+
90+
async function loadTokenizer() {
91+
// BUG: invalid 'h' not JSON. That's why we need to manually fetch the assets
92+
// const tokenizer = await AutoTokenizer.from_pretrained(MODEL_ID);
93+
94+
const tokenizerData = await fetch(
95+
'https://huggingface.co/onnx-community/Kokoro-82M-ONNX/resolve/main/tokenizer.json?download=true',
96+
).then(async (res) => await res.json());
97+
98+
const tokenizerConfig = await fetch(
99+
'https://huggingface.co/onnx-community/Kokoro-82M-ONNX/resolve/main/tokenizer_config.json?download=true',
100+
).then(async (res) => await res.json());
101+
102+
return new PreTrainedTokenizer(tokenizerData, tokenizerConfig);
103+
}

0 commit comments

Comments
 (0)