Skip to content

Commit c8f3fb3

Browse files
authored
Add image generation support (#271)
1 parent f7827ba commit c8f3fb3

File tree

5 files changed

+136
-1
lines changed

5 files changed

+136
-1
lines changed

README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,9 @@ ollama.generate(request)
162162
- `logprobs` `<boolean>`: (Optional) Return log probabilities for tokens. Requires model support.
163163
- `top_logprobs` `<number>`: (Optional) Number of top log probabilities to return per token when `logprobs` is enabled.
164164
- `keep_alive` `<string | number>`: (Optional) How long to keep the model loaded. A number (seconds) or a string with a duration unit suffix ("300ms", "1.5h", "2h45m", etc.)
165+
- `width` `<number>`: (Optional, Experimental) Width of the generated image in pixels. For image generation models only.
166+
- `height` `<number>`: (Optional, Experimental) Height of the generated image in pixels. For image generation models only.
167+
- `steps` `<number>`: (Optional, Experimental) Number of diffusion steps. For image generation models only.
165168
- `options` `<Options>`: (Optional) Options to configure the runtime.
166169
- Returns: `<GenerateResponse>`
167170

examples/README.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,10 @@ To run the examples run:
88
```sh
99
npx tsx <folder-name>/<file-name>.ts
1010
```
11+
12+
### Image Generation (Experimental)
13+
14+
> **Note:** Image generation is experimental and currently only available on macOS.
15+
16+
- [image-generation/image-generation.ts](image-generation/image-generation.ts)
17+
- [image-generation/image-generation-stream.ts](image-generation/image-generation-stream.ts) - Streamed progress
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
// Image generation is experimental and currently only available on macOS
2+
3+
import ollama from 'ollama'
4+
import { writeFileSync } from 'fs'
5+
6+
async function main() {
7+
const prompt = 'a sunset over mountains'
8+
console.log(`Prompt: ${prompt}`)
9+
10+
const response = await ollama.generate({
11+
model: 'x/z-image-turbo',
12+
prompt,
13+
stream: true,
14+
})
15+
16+
for await (const part of response) {
17+
if (part.image) {
18+
// Final response contains the image
19+
const imageBuffer = Buffer.from(part.image, 'base64')
20+
writeFileSync('output.png', imageBuffer)
21+
console.log('\nImage saved to output.png')
22+
} else if (part.total) {
23+
// Progress update
24+
process.stdout.write(`\rProgress: ${part.completed}/${part.total}`)
25+
}
26+
}
27+
}
28+
29+
main().catch(console.error)

src/interfaces.ts

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,11 @@ export interface GenerateRequest {
6161
logprobs?: boolean
6262
top_logprobs?: number
6363

64+
// Experimental image generation parameters
65+
width?: number
66+
height?: number
67+
steps?: number
68+
6469
options?: Partial<Options>
6570
}
6671

@@ -191,7 +196,7 @@ export interface Logprob extends TokenLogprob {
191196
export interface GenerateResponse {
192197
model: string
193198
created_at: Date
194-
response: string
199+
response?: string
195200
thinking?: string
196201
done: boolean
197202
done_reason: string
@@ -203,6 +208,11 @@ export interface GenerateResponse {
203208
eval_count: number
204209
eval_duration: number
205210
logprobs?: Logprob[]
211+
212+
// Image generation response fields
213+
image?: string // Base64-encoded generated image data
214+
completed?: number // Number of completed steps (for streaming progress)
215+
total?: number // Total number of steps (for streaming progress)
206216
}
207217

208218
export interface ChatResponse {

test/browser.test.ts

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,3 +56,89 @@ describe('Ollama logprob request fields', () => {
5656
)
5757
})
5858
})
59+
60+
describe('Ollama image generation request fields', () => {
61+
it('forwards image generation parameters in generate requests', async () => {
62+
const client = new Ollama()
63+
const spy = vi
64+
.spyOn(client as any, 'processStreamableRequest')
65+
.mockResolvedValue({} as GenerateResponse)
66+
67+
await client.generate({
68+
model: 'dummy-image',
69+
prompt: 'a sunset over mountains',
70+
width: 1024,
71+
height: 768,
72+
steps: 20,
73+
})
74+
75+
expect(spy).toHaveBeenCalledWith(
76+
'generate',
77+
expect.objectContaining({
78+
model: 'dummy-image',
79+
prompt: 'a sunset over mountains',
80+
width: 1024,
81+
height: 768,
82+
steps: 20,
83+
}),
84+
)
85+
})
86+
87+
it('handles image generation response with image field', async () => {
88+
const mockResponse: GenerateResponse = {
89+
model: 'dummy-image',
90+
created_at: new Date(),
91+
done: true,
92+
done_reason: 'stop',
93+
context: [],
94+
total_duration: 1000,
95+
load_duration: 100,
96+
prompt_eval_count: 10,
97+
prompt_eval_duration: 50,
98+
eval_count: 0,
99+
eval_duration: 0,
100+
image: 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==',
101+
}
102+
103+
const client = new Ollama()
104+
vi.spyOn(client as any, 'processStreamableRequest').mockResolvedValue(mockResponse)
105+
106+
const response = await client.generate({
107+
model: 'dummy-image',
108+
prompt: 'a sunset',
109+
})
110+
111+
expect(response.image).toBeDefined()
112+
expect(response.done).toBe(true)
113+
})
114+
115+
it('handles streaming progress fields for image generation', async () => {
116+
const mockResponse: GenerateResponse = {
117+
model: 'dummy-image',
118+
created_at: new Date(),
119+
done: false,
120+
done_reason: '',
121+
context: [],
122+
total_duration: 0,
123+
load_duration: 0,
124+
prompt_eval_count: 0,
125+
prompt_eval_duration: 0,
126+
eval_count: 0,
127+
eval_duration: 0,
128+
completed: 5,
129+
total: 20,
130+
}
131+
132+
const client = new Ollama()
133+
vi.spyOn(client as any, 'processStreamableRequest').mockResolvedValue(mockResponse)
134+
135+
const response = await client.generate({
136+
model: 'dummy-image',
137+
prompt: 'a sunset',
138+
})
139+
140+
expect(response.completed).toBe(5)
141+
expect(response.total).toBe(20)
142+
expect(response.done).toBe(false)
143+
})
144+
})

0 commit comments

Comments
 (0)