Update compose to only spin up frontend and backend, update backend logic to handle DMR, update tests to remove model container

cupofpython · cupofpython · commit 7190508495fd · 2025-04-29T10:55:23.000-04:00
diff --git a/.env.compose b/.env.compose
@@ -1,4 +1,7 @@
+DMR=true
 REACT_APP_NODE_ENV=development
 REACT_APP_LOCAL=localhost
-REACT_APP_MODEL_SERVICE=model
-REACT_APP_SERVER_PORT=5002
+REACT_APP_MODEL_SERVICE=host.docker.internal
+REACT_APP_MODEL_PORT=12434
+REACT_APP_MODEL_PATH=/engines/llama.cpp/v1/chat/completions
+REACT_APP_SERVER_PORT=5002
diff --git a/README.md b/README.md
@@ -21,6 +21,8 @@ It is a NodeJS app that uses the llama3.2 model to service prompt requests. It u
 ### Locally with Docker Compose
 I used compose to develop this locally. 
 
+- Install model on Docker Model Runner: `docker model pull ai/llama3.2`
+- Note: This uses the `DMR` flag in the `env.compose` file to interact with the Open AI API call and llama.cpp server
 - `docker compose up --build`
 - When done, `docker compose down`
 
diff --git a/compose.yaml b/compose.yaml
@@ -23,8 +23,6 @@ services:
       - 3000:3000
   backend:
     container_name: backend
-    depends_on:
-      - model
     build:
       context: .
       dockerfile: Dockerfile.backend
@@ -34,9 +32,3 @@ services:
       - .env.compose
     ports:
       - 5002:5002
-  model:
-    container_name: model
-    build:
-      context: ./ollama
-    ports:
-      - 11434:11434
diff --git a/server.js b/server.js
@@ -44,45 +44,169 @@ async function handleStreamRequest(req, res) {
 
     try {
         const host = ("REACT_APP_MODEL_SERVICE" in process.env) ? process.env.REACT_APP_MODEL_SERVICE : "model-published";
-        
-        // Make a streaming request to Ollama
-        const response = await axios({
-            method: 'post',
-            url: `http://${host}:11434/api/generate`,
-            data: {
-                model: model,
-                prompt: prompt,
-                stream: true
-            },
-            responseType: 'stream'
-        });
+        const port = ("REACT_APP_MODEL_PORT" in process.env) ? process.env.REACT_APP_MODEL_PORT : 11434;
+        const path = ("REACT_APP_MODEL_PATH" in process.env) ? process.env.REACT_APP_MODEL_PATH : "/api/generate";
+
+        const isDMR = "DMR" in process.env ? true : false;
 
+        // Add debug logging
+        console.log(`Making request to ${isDMR ? 'DMR' : 'Ollama'} model service at host: ${host}`);
+        
+        let response;
+        
+        if (isDMR) {
+            // Docker Model Runner (OpenAI format)
+            console.log(`DMR endpoint: http://${host}:${port}${path}`);
+            console.log(`Model: ${model}`)
+            response = await axios({
+                method: 'post',
+                url: `http://${host}:${port}${path}`,
+                data: {
+                    model: 'ai/' + model,
+                    messages: [{ role: "user", content: prompt }],
+                    stream: true
+                },
+                headers: {
+                    'Content-Type': 'application/json',
+                    'Accept': 'text/event-stream'
+                },
+                responseType: 'stream'
+            });
+        } else {
+            // Ollama format
+            console.log(`Ollama endpoint: http://${host}:11434/api/generate`);
+            response = await axios({
+                method: 'post',
+                url: `http://${host}:11434/api/generate`,
+                data: {
+                    model: model,
+                    prompt: prompt,
+                    stream: true
+                },
+                responseType: 'stream'
+            });
+        }
+        
+        console.log("Connection established, processing stream...");
+        
         // Forward the stream to the client
         response.data.on('data', (chunk) => {
             try {
-                const data = JSON.parse(chunk.toString());
-                // Send each chunk as an SSE event
-                res.write(`data: ${JSON.stringify(data)}\n\n`);
+                const chunkStr = chunk.toString();
+                console.log("Received chunk:", chunkStr.substring(0, 50) + (chunkStr.length > 50 ? '...' : ''));
                 
-                // If this is the final response, end the connection
-                if (data.done) {
-                    res.end();
+                // Handle DMR (OpenAI) format - may contain multiple SSE events
+                if (isDMR) {
+                    // Split by double newlines to handle multiple SSE events in one chunk
+                    const events = chunkStr.split('\n\n').filter(event => event.trim());
+                    console.log(`Found ${events.length} events in chunk`);
+                    
+                    for (const event of events) {
+                        if (event.startsWith('data: ')) {
+                            const dataContent = event.replace('data: ', '');
+                            
+                            // Check for "[DONE]" signal
+                            if (dataContent.trim() === '[DONE]') {
+                                console.log("Received [DONE] signal");
+                                res.end();
+                                return;
+                            }
+                            
+                            try {
+                                const data = JSON.parse(dataContent);
+                                
+                                // Debug the received data structure
+                                console.log("Parsed DMR data:", JSON.stringify(data).substring(0, 100));
+                                
+                                // Extract content based on what's available
+                                let content = '';
+                                if (data.choices && data.choices.length > 0) {
+                                    // For chat completions delta format
+                                    if (data.choices[0].delta && data.choices[0].delta.content) {
+                                        content = data.choices[0].delta.content;
+                                    }
+                                    // For text completions format
+                                    else if (data.choices[0].text) {
+                                        content = data.choices[0].text;
+                                    }
+                                }
+                                
+                                // Format to match Ollama response structure that the client expects
+                                const responseData = {
+                                    response: content,  // Use 'response' field to match Ollama format
+                                    done: false
+                                };
+                                
+                                if (content) {
+                                    console.log(`Sending content: ${content.substring(0, 20)}${content.length > 20 ? '...' : ''}`);
+                                    // Send to client
+                                    res.write(`data: ${JSON.stringify(responseData)}\n\n`);
+                                }
+                                
+                                // Check if it's the final chunk
+                                if (data.choices && data.choices[0] && data.choices[0].finish_reason === 'stop') {
+                                    console.log("Detected finish_reason=stop, ending stream");
+                                    res.write(`data: ${JSON.stringify({ done: true })}\n\n`);
+                                    res.end();
+                                }
+                            } catch (err) {
+                                console.error("Error parsing DMR chunk:", err, "Raw data:", dataContent);
+                                // Don't end the connection on parse error, just log it
+                            }
+                        }
+                    }
+                } 
+                // Handle Ollama format
+                else {
+                    try {
+                        const data = JSON.parse(chunkStr);
+                        console.log(`Ollama response: ${data.response ? data.response.substring(0, 20) + '...' : '[no response field]'}, done=${data.done}`);
+                        
+                        // Send each chunk as an SSE event
+                        res.write(`data: ${JSON.stringify(data)}\n\n`);
+                        
+                        // If this is the final response, end the connection
+                        if (data.done) {
+                            console.log("Ollama stream complete");
+                            res.end();
+                        }
+                    } catch (err) {
+                        console.error("Error parsing Ollama chunk:", err);
+                        // Try to continue processing even if one chunk fails
+                    }
                 }
             } catch (err) {
-                console.error("Error parsing chunk:", err);
-                res.write(`data: ${JSON.stringify({ error: "Parse error" })}\n\n`);
+                console.error("Error processing chunk:", err);
+                res.write(`data: ${JSON.stringify({ error: "Parse error", message: err.message })}\n\n`);
+                // Don't end the stream on parse error unless it's critical
             }
         });
-
+        
         // Handle errors in the stream
         response.data.on('error', (err) => {
             console.error("Stream error:", err);
-            res.write(`data: ${JSON.stringify({ error: "Stream error" })}\n\n`);
+            res.write(`data: ${JSON.stringify({ error: "Stream error", message: err.message, stack: err.stack })}\n\n`);
             res.end();
         });
+        
+        // Make sure we handle the end of the stream properly
+        response.data.on('end', () => {
+            console.log("Stream ended naturally");
+            // Only end the response if it hasn't been ended already
+            if (!res.writableEnded) {
+                res.write(`data: ${JSON.stringify({ done: true })}\n\n`);
+                res.end();
+            }
+        });
     } catch (err) {
-        console.error("Streaming error: ", err);
-        res.write(`data: ${JSON.stringify({ error: "Server error", message: err.message })}\n\n`);
+        console.error("Connection error: ", err.message, err.stack);
+        res.write(`data: ${JSON.stringify({ 
+            error: "Server error", 
+            message: err.message,
+            url: err.config?.url || 'unknown',
+            status: err.response?.status || 'unknown',
+            statusText: err.response?.statusText || 'unknown'
+        })}\n\n`);
         res.end();
     }
 }
diff --git a/tests/server.test.js b/tests/server.test.js
@@ -1,54 +1,48 @@
 const { GenericContainer } = require('testcontainers');
-const { getResponse } = require('../server');
-const SECONDS = 1000;
+const axios = require('axios');
 
-describe('Ollama Container Tests', () => {
+jest.setTimeout(60000);
+
+describe("Server Endpoints", () => {
   let container;
+  let serverUrl;
 
-  // Use beforeAll instead of beforeEach if you want to reuse the container
   beforeAll(async () => {
-    // Initialize container before tests run
-    container = await new GenericContainer("samanthamorris684/ollama@sha256:78a199fa9652a16429037726943a82bd4916975fecf2b105d06e140ae70a1420")
-      .withExposedPorts(11434)
+    // Start your container
+    container = await new GenericContainer("samanthamorris684/catbot-backend@sha256:6e2bf0ca7fa13fee68e5aa5a85f3c179c5c44bf3a50e3c271c04d64f7cd9e063")
+      .withExposedPorts(5001) // Your server inside listens on 5001
       .start();
 
-    // Get logs
-    (await container.logs())
-        .on("data", line => console.log(line))
-        .on("err", line => console.error(line))
-        .on("end", () => console.log("Stream closed"));
-    
-    // Set environment variables after container is started
-    process.env.REACT_APP_MODEL_SERVICE = container.getHost();
-    process.env.REACT_APP_MODEL_PORT = container.getMappedPort(11434);
-    
-    console.log(`Container running at ${process.env.REACT_APP_MODEL_SERVICE}:${process.env.REACT_APP_MODEL_PORT}`);
+    const host = container.getHost();
+    const port = container.getMappedPort(5001);
+    serverUrl = `http://${host}:${port}`;
+  });
 
-    console.log()
+  afterAll(async () => {
+    await container.stop();
+  });
 
-  }, 180 * SECONDS); // Timeout increased for startup and model pull
+  test("POST /execute should return 400 if missing model or prompt", async () => {
+    const response = await axios.post(`${serverUrl}/execute`, { model: "test-model" }, { validateStatus: () => true });
+    expect(response.status).toBe(400);
+    expect(response.data).toEqual({ error: "Model name and prompt are required" });
+  });
 
-  afterAll(async () => {
-    // Clean up container after tests
-    try {
-        await container.stop({
-            force: true,
-            timeout: 0
-        });
-        console.log("Container forcefully stopped");
-    }
-    catch (error) {
-        console.error("Error stopping container: ", error);
-    }
+  test("POST /execute should return 500 if backend model is unreachable", async () => {
+    const response = await axios.post(`${serverUrl}/execute`, { model: "nonexistent", prompt: "hello" }, { validateStatus: () => true });
+    expect(response.status).toBe(500);
+    expect(response.data.error).toBeDefined();
+  });
+
+  test("POST /api/stream should return 400 if missing model or prompt", async () => {
+    const response = await axios.post(`${serverUrl}/api/stream`, { model: "test-model" }, { validateStatus: () => true });
+    expect(response.status).toBe(400);
+    expect(response.data).toEqual({ error: "Model name and prompt are required" });
   });
 
-  test('test nonstreaming response', async () => {
-    const model = "llama3.2";
-    const prompt = "How are you?";
-    
-    const result = await getResponse(model, prompt);
-    console.log('Response from Ollama:', result);
-    
-    expect(result["done"]).toBe(true)
-  }, 60 * SECONDS);
-});
+  test("GET /api/stream should return 400 if missing model or prompt", async () => {
+    const response = await axios.get(`${serverUrl}/api/stream?model=test-model`, { validateStatus: () => true });
+    expect(response.status).toBe(400);
+    expect(response.data).toEqual({ error: "Model name and prompt are required" });
+  });
+});