@@ -3,10 +3,6 @@ const cors = require("cors");
33const axios = require ( "axios" ) ;
44const app = express ( ) ;
55
6- // Stream timeout settings
7- const STREAM_TIMEOUT = 60000 ; // 60 seconds max for streaming responses
8- const HTTP_TIMEOUT = 30000 ; // 30 seconds for HTTP requests
9-
106app . use ( cors ( ) ) ;
117app . use ( express . json ( ) ) ;
128
@@ -31,17 +27,12 @@ app.post("/execute", async (req, res) => {
3127app . post ( "/stream" , handleStreamRequest ) ;
3228app . get ( "/stream" , handleStreamRequest ) ;
3329
34- // Handler function for stream requests with performance optimizations
30+ // Handler function for stream requests
3531async function handleStreamRequest ( req , res ) {
3632 // Get parameters from either query (GET) or body (POST)
3733 const model = req . method === 'GET' ? req . query . model : req . body . model ;
3834 const prompt = req . method === 'GET' ? req . query . prompt : req . body . prompt ;
39-
40- // Get optional performance parameters
41- const temperature = parseFloat ( ( req . method === 'GET' ? req . query . temperature : req . body . temperature ) || "0.7" ) ;
42- const maxTokens = parseInt ( ( req . method === 'GET' ? req . query . max_tokens : req . body . max_tokens ) || "2048" ) ;
43- const topP = parseFloat ( ( req . method === 'GET' ? req . query . top_p : req . body . top_p ) || "0.9" ) ;
44-
35+
4536 if ( ! model || ! prompt ) {
4637 return res . status ( 400 ) . json ( { error : "Model name and prompt are required" } ) ;
4738 }
@@ -50,82 +41,31 @@ async function handleStreamRequest(req, res) {
5041 res . setHeader ( 'Content-Type' , 'text/event-stream' ) ;
5142 res . setHeader ( 'Cache-Control' , 'no-cache' ) ;
5243 res . setHeader ( 'Connection' , 'keep-alive' ) ;
53-
54- // Setup the timeout for the stream
55- let streamTimeout = setTimeout ( ( ) => {
56- console . warn ( "Stream timeout reached" ) ;
57- res . write ( `data: ${ JSON . stringify ( { error : "Stream timeout" , done : true } ) } \n\n` ) ;
58- res . end ( ) ;
59- } , STREAM_TIMEOUT ) ;
6044
6145 try {
6246 const host = ( "REACT_APP_MODEL_SERVICE" in process . env ) ? process . env . REACT_APP_MODEL_SERVICE : "model-published" ;
63- const startTime = Date . now ( ) ;
6447
65- // Make a streaming request to Ollama with optimized parameters
48+ // Make a streaming request to Ollama
6649 const response = await axios ( {
6750 method : 'post' ,
6851 url : `http://${ host } :11434/api/generate` ,
6952 data : {
7053 model : model ,
7154 prompt : prompt ,
72- stream : true ,
73- options : {
74- temperature : temperature ,
75- top_p : topP ,
76- max_tokens : maxTokens ,
77- num_ctx : 2048 , // Context window size
78- num_gpu : 1 , // Use 1 GPU if available
79- num_thread : 4 // Use 4 CPU threads
80- }
55+ stream : true
8156 } ,
82- responseType : 'stream' ,
83- timeout : HTTP_TIMEOUT // HTTP request timeout
57+ responseType : 'stream'
8458 } ) ;
8559
86- console . log ( `Stream started for model ${ model } at ${ new Date ( ) . toISOString ( ) } ` ) ;
87-
88- // Forward the stream to the client with performance monitoring
89- let tokensGenerated = 0 ;
90- let firstTokenTime = null ;
91-
60+ // Forward the stream to the client
9261 response . data . on ( 'data' , ( chunk ) => {
9362 try {
94- // Clear the timeout on each received chunk
95- clearTimeout ( streamTimeout ) ;
96- // Reset timeout
97- streamTimeout = setTimeout ( ( ) => {
98- console . warn ( "Stream timeout between chunks" ) ;
99- res . write ( `data: ${ JSON . stringify ( { error : "Stream timeout between responses" , done : true } ) } \n\n` ) ;
100- res . end ( ) ;
101- } , STREAM_TIMEOUT ) ;
102-
10363 const data = JSON . parse ( chunk . toString ( ) ) ;
104-
105- // Track first token time
106- if ( data . response && tokensGenerated === 0 ) {
107- firstTokenTime = Date . now ( ) ;
108- console . log ( `First token after ${ firstTokenTime - startTime } ms` ) ;
109- }
110-
111- // Count tokens
112- if ( data . response ) {
113- tokensGenerated += 1 ;
114- }
115-
11664 // Send each chunk as an SSE event
11765 res . write ( `data: ${ JSON . stringify ( data ) } \n\n` ) ;
11866
119- // If this is the final response, end the connection and log performance
67+ // If this is the final response, end the connection
12068 if ( data . done ) {
121- const endTime = Date . now ( ) ;
122- const totalTime = endTime - startTime ;
123- const tokensPerSecond = tokensGenerated / ( totalTime / 1000 ) ;
124-
125- console . log ( `Stream completed: ${ tokensGenerated } tokens in ${ totalTime } ms (${ tokensPerSecond . toFixed ( 2 ) } tokens/sec)` ) ;
126-
127- // Clean up the timeout
128- clearTimeout ( streamTimeout ) ;
12969 res . end ( ) ;
13070 }
13171 } catch ( err ) {
@@ -138,13 +78,11 @@ async function handleStreamRequest(req, res) {
13878 response . data . on ( 'error' , ( err ) => {
13979 console . error ( "Stream error:" , err ) ;
14080 res . write ( `data: ${ JSON . stringify ( { error : "Stream error" } ) } \n\n` ) ;
141- clearTimeout ( streamTimeout ) ;
14281 res . end ( ) ;
14382 } ) ;
14483 } catch ( err ) {
14584 console . error ( "Streaming error: " , err ) ;
14685 res . write ( `data: ${ JSON . stringify ( { error : "Server error" , message : err . message } ) } \n\n` ) ;
147- clearTimeout ( streamTimeout ) ;
14886 res . end ( ) ;
14987 }
15088}
@@ -154,17 +92,7 @@ function getResponse(model, prompt) {
15492 return axios . post ( `http://${ host } :11434/api/generate` , {
15593 model : model ,
15694 prompt : prompt ,
157- stream : false ,
158- options : {
159- temperature : 0.7 ,
160- top_p : 0.9 ,
161- max_tokens : 2048 ,
162- num_ctx : 2048 , // Context window size
163- num_gpu : 1 , // Use 1 GPU if available
164- num_thread : 4 // Use 4 CPU threads
165- }
166- } , {
167- timeout : HTTP_TIMEOUT
95+ stream : false
16896 } )
16997 . then ( res => {
17098 console . log ( "Output received" ) ;
0 commit comments