Skip to content

Commit 7190508

Browse files
committed
Update compose to only spin up frontend and backend, update backend logic to handle DMR, update tests to remove model container
1 parent 36c1bed commit 7190508

File tree

5 files changed

+191
-76
lines changed

5 files changed

+191
-76
lines changed

.env.compose

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
1+
DMR=true
12
REACT_APP_NODE_ENV=development
23
REACT_APP_LOCAL=localhost
3-
REACT_APP_MODEL_SERVICE=model
4-
REACT_APP_SERVER_PORT=5002
4+
REACT_APP_MODEL_SERVICE=host.docker.internal
5+
REACT_APP_MODEL_PORT=12434
6+
REACT_APP_MODEL_PATH=/engines/llama.cpp/v1/chat/completions
7+
REACT_APP_SERVER_PORT=5002

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ It is a NodeJS app that uses the llama3.2 model to service prompt requests. It u
2121
### Locally with Docker Compose
2222
I used compose to develop this locally.
2323

24+
- Install model on Docker Model Runner: `docker model pull ai/llama3.2`
25+
- Note: This uses the `DMR` flag in the `env.compose` file to interact with the Open AI API call and llama.cpp server
2426
- `docker compose up --build`
2527
- When done, `docker compose down`
2628

compose.yaml

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,6 @@ services:
2323
- 3000:3000
2424
backend:
2525
container_name: backend
26-
depends_on:
27-
- model
2826
build:
2927
context: .
3028
dockerfile: Dockerfile.backend
@@ -34,9 +32,3 @@ services:
3432
- .env.compose
3533
ports:
3634
- 5002:5002
37-
model:
38-
container_name: model
39-
build:
40-
context: ./ollama
41-
ports:
42-
- 11434:11434

server.js

Lines changed: 148 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -44,45 +44,169 @@ async function handleStreamRequest(req, res) {
4444

4545
try {
4646
const host = ("REACT_APP_MODEL_SERVICE" in process.env) ? process.env.REACT_APP_MODEL_SERVICE : "model-published";
47-
48-
// Make a streaming request to Ollama
49-
const response = await axios({
50-
method: 'post',
51-
url: `http://${host}:11434/api/generate`,
52-
data: {
53-
model: model,
54-
prompt: prompt,
55-
stream: true
56-
},
57-
responseType: 'stream'
58-
});
47+
const port = ("REACT_APP_MODEL_PORT" in process.env) ? process.env.REACT_APP_MODEL_PORT : 11434;
48+
const path = ("REACT_APP_MODEL_PATH" in process.env) ? process.env.REACT_APP_MODEL_PATH : "/api/generate";
49+
50+
const isDMR = "DMR" in process.env ? true : false;
5951

52+
// Add debug logging
53+
console.log(`Making request to ${isDMR ? 'DMR' : 'Ollama'} model service at host: ${host}`);
54+
55+
let response;
56+
57+
if (isDMR) {
58+
// Docker Model Runner (OpenAI format)
59+
console.log(`DMR endpoint: http://${host}:${port}${path}`);
60+
console.log(`Model: ${model}`)
61+
response = await axios({
62+
method: 'post',
63+
url: `http://${host}:${port}${path}`,
64+
data: {
65+
model: 'ai/' + model,
66+
messages: [{ role: "user", content: prompt }],
67+
stream: true
68+
},
69+
headers: {
70+
'Content-Type': 'application/json',
71+
'Accept': 'text/event-stream'
72+
},
73+
responseType: 'stream'
74+
});
75+
} else {
76+
// Ollama format
77+
console.log(`Ollama endpoint: http://${host}:11434/api/generate`);
78+
response = await axios({
79+
method: 'post',
80+
url: `http://${host}:11434/api/generate`,
81+
data: {
82+
model: model,
83+
prompt: prompt,
84+
stream: true
85+
},
86+
responseType: 'stream'
87+
});
88+
}
89+
90+
console.log("Connection established, processing stream...");
91+
6092
// Forward the stream to the client
6193
response.data.on('data', (chunk) => {
6294
try {
63-
const data = JSON.parse(chunk.toString());
64-
// Send each chunk as an SSE event
65-
res.write(`data: ${JSON.stringify(data)}\n\n`);
95+
const chunkStr = chunk.toString();
96+
console.log("Received chunk:", chunkStr.substring(0, 50) + (chunkStr.length > 50 ? '...' : ''));
6697

67-
// If this is the final response, end the connection
68-
if (data.done) {
69-
res.end();
98+
// Handle DMR (OpenAI) format - may contain multiple SSE events
99+
if (isDMR) {
100+
// Split by double newlines to handle multiple SSE events in one chunk
101+
const events = chunkStr.split('\n\n').filter(event => event.trim());
102+
console.log(`Found ${events.length} events in chunk`);
103+
104+
for (const event of events) {
105+
if (event.startsWith('data: ')) {
106+
const dataContent = event.replace('data: ', '');
107+
108+
// Check for "[DONE]" signal
109+
if (dataContent.trim() === '[DONE]') {
110+
console.log("Received [DONE] signal");
111+
res.end();
112+
return;
113+
}
114+
115+
try {
116+
const data = JSON.parse(dataContent);
117+
118+
// Debug the received data structure
119+
console.log("Parsed DMR data:", JSON.stringify(data).substring(0, 100));
120+
121+
// Extract content based on what's available
122+
let content = '';
123+
if (data.choices && data.choices.length > 0) {
124+
// For chat completions delta format
125+
if (data.choices[0].delta && data.choices[0].delta.content) {
126+
content = data.choices[0].delta.content;
127+
}
128+
// For text completions format
129+
else if (data.choices[0].text) {
130+
content = data.choices[0].text;
131+
}
132+
}
133+
134+
// Format to match Ollama response structure that the client expects
135+
const responseData = {
136+
response: content, // Use 'response' field to match Ollama format
137+
done: false
138+
};
139+
140+
if (content) {
141+
console.log(`Sending content: ${content.substring(0, 20)}${content.length > 20 ? '...' : ''}`);
142+
// Send to client
143+
res.write(`data: ${JSON.stringify(responseData)}\n\n`);
144+
}
145+
146+
// Check if it's the final chunk
147+
if (data.choices && data.choices[0] && data.choices[0].finish_reason === 'stop') {
148+
console.log("Detected finish_reason=stop, ending stream");
149+
res.write(`data: ${JSON.stringify({ done: true })}\n\n`);
150+
res.end();
151+
}
152+
} catch (err) {
153+
console.error("Error parsing DMR chunk:", err, "Raw data:", dataContent);
154+
// Don't end the connection on parse error, just log it
155+
}
156+
}
157+
}
158+
}
159+
// Handle Ollama format
160+
else {
161+
try {
162+
const data = JSON.parse(chunkStr);
163+
console.log(`Ollama response: ${data.response ? data.response.substring(0, 20) + '...' : '[no response field]'}, done=${data.done}`);
164+
165+
// Send each chunk as an SSE event
166+
res.write(`data: ${JSON.stringify(data)}\n\n`);
167+
168+
// If this is the final response, end the connection
169+
if (data.done) {
170+
console.log("Ollama stream complete");
171+
res.end();
172+
}
173+
} catch (err) {
174+
console.error("Error parsing Ollama chunk:", err);
175+
// Try to continue processing even if one chunk fails
176+
}
70177
}
71178
} catch (err) {
72-
console.error("Error parsing chunk:", err);
73-
res.write(`data: ${JSON.stringify({ error: "Parse error" })}\n\n`);
179+
console.error("Error processing chunk:", err);
180+
res.write(`data: ${JSON.stringify({ error: "Parse error", message: err.message })}\n\n`);
181+
// Don't end the stream on parse error unless it's critical
74182
}
75183
});
76-
184+
77185
// Handle errors in the stream
78186
response.data.on('error', (err) => {
79187
console.error("Stream error:", err);
80-
res.write(`data: ${JSON.stringify({ error: "Stream error" })}\n\n`);
188+
res.write(`data: ${JSON.stringify({ error: "Stream error", message: err.message, stack: err.stack })}\n\n`);
81189
res.end();
82190
});
191+
192+
// Make sure we handle the end of the stream properly
193+
response.data.on('end', () => {
194+
console.log("Stream ended naturally");
195+
// Only end the response if it hasn't been ended already
196+
if (!res.writableEnded) {
197+
res.write(`data: ${JSON.stringify({ done: true })}\n\n`);
198+
res.end();
199+
}
200+
});
83201
} catch (err) {
84-
console.error("Streaming error: ", err);
85-
res.write(`data: ${JSON.stringify({ error: "Server error", message: err.message })}\n\n`);
202+
console.error("Connection error: ", err.message, err.stack);
203+
res.write(`data: ${JSON.stringify({
204+
error: "Server error",
205+
message: err.message,
206+
url: err.config?.url || 'unknown',
207+
status: err.response?.status || 'unknown',
208+
statusText: err.response?.statusText || 'unknown'
209+
})}\n\n`);
86210
res.end();
87211
}
88212
}

tests/server.test.js

Lines changed: 36 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,54 +1,48 @@
11
const { GenericContainer } = require('testcontainers');
2-
const { getResponse } = require('../server');
3-
const SECONDS = 1000;
2+
const axios = require('axios');
43

5-
describe('Ollama Container Tests', () => {
4+
jest.setTimeout(60000);
5+
6+
describe("Server Endpoints", () => {
67
let container;
8+
let serverUrl;
79

8-
// Use beforeAll instead of beforeEach if you want to reuse the container
910
beforeAll(async () => {
10-
// Initialize container before tests run
11-
container = await new GenericContainer("samanthamorris684/ollama@sha256:78a199fa9652a16429037726943a82bd4916975fecf2b105d06e140ae70a1420")
12-
.withExposedPorts(11434)
11+
// Start your container
12+
container = await new GenericContainer("samanthamorris684/catbot-backend@sha256:6e2bf0ca7fa13fee68e5aa5a85f3c179c5c44bf3a50e3c271c04d64f7cd9e063")
13+
.withExposedPorts(5001) // Your server inside listens on 5001
1314
.start();
1415

15-
// Get logs
16-
(await container.logs())
17-
.on("data", line => console.log(line))
18-
.on("err", line => console.error(line))
19-
.on("end", () => console.log("Stream closed"));
20-
21-
// Set environment variables after container is started
22-
process.env.REACT_APP_MODEL_SERVICE = container.getHost();
23-
process.env.REACT_APP_MODEL_PORT = container.getMappedPort(11434);
24-
25-
console.log(`Container running at ${process.env.REACT_APP_MODEL_SERVICE}:${process.env.REACT_APP_MODEL_PORT}`);
16+
const host = container.getHost();
17+
const port = container.getMappedPort(5001);
18+
serverUrl = `http://${host}:${port}`;
19+
});
2620

27-
console.log()
21+
afterAll(async () => {
22+
await container.stop();
23+
});
2824

29-
}, 180 * SECONDS); // Timeout increased for startup and model pull
25+
test("POST /execute should return 400 if missing model or prompt", async () => {
26+
const response = await axios.post(`${serverUrl}/execute`, { model: "test-model" }, { validateStatus: () => true });
27+
expect(response.status).toBe(400);
28+
expect(response.data).toEqual({ error: "Model name and prompt are required" });
29+
});
3030

31-
afterAll(async () => {
32-
// Clean up container after tests
33-
try {
34-
await container.stop({
35-
force: true,
36-
timeout: 0
37-
});
38-
console.log("Container forcefully stopped");
39-
}
40-
catch (error) {
41-
console.error("Error stopping container: ", error);
42-
}
31+
test("POST /execute should return 500 if backend model is unreachable", async () => {
32+
const response = await axios.post(`${serverUrl}/execute`, { model: "nonexistent", prompt: "hello" }, { validateStatus: () => true });
33+
expect(response.status).toBe(500);
34+
expect(response.data.error).toBeDefined();
35+
});
36+
37+
test("POST /api/stream should return 400 if missing model or prompt", async () => {
38+
const response = await axios.post(`${serverUrl}/api/stream`, { model: "test-model" }, { validateStatus: () => true });
39+
expect(response.status).toBe(400);
40+
expect(response.data).toEqual({ error: "Model name and prompt are required" });
4341
});
4442

45-
test('test nonstreaming response', async () => {
46-
const model = "llama3.2";
47-
const prompt = "How are you?";
48-
49-
const result = await getResponse(model, prompt);
50-
console.log('Response from Ollama:', result);
51-
52-
expect(result["done"]).toBe(true)
53-
}, 60 * SECONDS);
54-
});
43+
test("GET /api/stream should return 400 if missing model or prompt", async () => {
44+
const response = await axios.get(`${serverUrl}/api/stream?model=test-model`, { validateStatus: () => true });
45+
expect(response.status).toBe(400);
46+
expect(response.data).toEqual({ error: "Model name and prompt are required" });
47+
});
48+
});

0 commit comments

Comments
 (0)