@@ -68,13 +68,44 @@ function wrapMessageElement(messageElement, role) {
6868 if ( role == "assistant" ) {
6969 const controlContainer = document . createElement ( "div" ) ;
7070 controlContainer . appendChild ( createCopyButton ( ( ) => messageElement . textContent , ( ) => messageElement . innerHTML ) ) ;
71+ controlContainer . appendChild ( infoButton ( ) ) ;
7172 controlContainer . classList . add ( "message-controls" ) ;
7273 wrapper . appendChild ( controlContainer ) ;
7374 }
7475 wrapper . classList . add ( "message-wrapper" , role ) ;
7576 return wrapper ;
7677}
7778
79+ function infoButton ( container , stats ) {
80+ let button = container ?. querySelector ( "#stats" ) ;
81+ if ( ! button ) {
82+ button = document . createElement ( "button" ) ;
83+ button . id = "stats" ;
84+ button . innerText = "i" ;
85+ button . style . fontFamily = "monospace" ;
86+ }
87+ button . style . display = stats ? "" : "none" ;
88+ if ( stats ) {
89+ const parts = [ ] ;
90+ const promptDurationMs = stats . firstContentTime - stats . startTime ;
91+ const responseDurationMs = stats . endTime - stats . firstContentTime ;
92+ if ( promptDurationMs > 0 && stats . promptTokenCount > 0 ) {
93+ const tokensPerSecond = ( stats . promptTokenCount / ( promptDurationMs / 1000 ) ) . toFixed ( 2 ) ;
94+ const durationString = promptDurationMs >= 1000 ? `${ ( promptDurationMs / 1000 ) . toFixed ( 2 ) } s` : `${ promptDurationMs } ms` ;
95+ parts . push ( `Processed ${ stats . promptTokenCount } input tokens in ${ durationString } (${ tokensPerSecond } tokens/s)` ) ;
96+ }
97+ if ( responseDurationMs > 0 && stats . reponseTokenCount > 0 ) {
98+ const tokensPerSecond = ( stats . reponseTokenCount / ( responseDurationMs / 1000 ) ) . toFixed ( 2 ) ;
99+ const durationString = responseDurationMs >= 1000 ? `${ ( responseDurationMs / 1000 ) . toFixed ( 2 ) } s` : `${ promptDurationMs } ms` ;
100+ parts . push ( `Generated ${ stats . reponseTokenCount } tokens in ${ durationString } (${ tokensPerSecond } tokens/s)` )
101+ } else {
102+ parts . push ( "Incomplete" ) ;
103+ }
104+ button . title = parts . join ( "\n" ) ;
105+ }
106+ return button ;
107+ }
108+
78109function createMessageElement ( content ) {
79110 const messageDiv = document . createElement ( "div" ) ;
80111 messageDiv . classList . add ( "message" ) ;
@@ -126,6 +157,13 @@ async function handleChatStream(response) {
126157 streamingMessageContent = [ ] ;
127158 const prefillStatus = document . getElementById ( 'prefill-status' ) ;
128159 const progressBar = prefillStatus . querySelector ( '.progress-bar' ) ;
160+ const stats = {
161+ startTime : Date . now ( ) , // Timestamp when the request started
162+ firstContentTime : null , // Timestamp when the first content was received
163+ endTime : null , // Timestamp when the response was fully received
164+ promptTokenCount : 0 , // Number of tokens in the prompt
165+ reponseTokenCount : 0 // Number of tokens in the response
166+ } ;
129167
130168 try {
131169 while ( true ) {
@@ -154,7 +192,11 @@ async function handleChatStream(response) {
154192 prefillStatus . style . display = "flex" ;
155193 progressBar . style . width = `${ parsed . x_prefill_progress * 100 } %` ;
156194 } else {
157- prefillStatus . style . display = "none" ;
195+ if ( content && ! stats . firstContentTime ) {
196+ // Finished parsing the prompt
197+ stats . firstContentTime = Date . now ( ) ;
198+ prefillStatus . style . display = "none" ;
199+ }
158200 }
159201
160202 if ( content && ! messageAppended ) {
@@ -171,6 +213,11 @@ async function handleChatStream(response) {
171213 high . feed ( content ) ;
172214 scrollToBottom ( ) ;
173215 }
216+ if ( parsed . usage ) {
217+ stats . endTime = Date . now ( )
218+ stats . promptTokenCount = parsed . usage . prompt_tokens
219+ stats . reponseTokenCount = parsed . usage . completion_tokens
220+ }
174221 } catch ( e ) {
175222 console . error ( "Error parsing JSON:" , e ) ;
176223 }
@@ -186,6 +233,9 @@ async function handleChatStream(response) {
186233 }
187234 } finally {
188235 if ( messageAppended ) {
236+ stats . firstContentTime = stats . firstContentTime ?? Date . now ( ) ;
237+ stats . endTime = stats . endTime ?? Date . now ( ) ;
238+ infoButton ( currentMessageWrapper , stats ) ;
189239 high . flush ( ) ;
190240 // we don't supply max_tokens, so "length" can
191241 // only mean that we ran out of context window
@@ -257,7 +307,10 @@ async function sendMessage() {
257307 top_p : settings . top_p ,
258308 presence_penalty : settings . presence_penalty ,
259309 frequency_penalty : settings . frequency_penalty ,
260- stream : true
310+ stream : true ,
311+ stream_options : {
312+ include_usage : true
313+ }
261314 } ) ,
262315 signal : abortController . signal
263316 } ) ;
0 commit comments