@@ -14,6 +14,10 @@ import (
1414 "github.com/docker/model-runner/pkg/logging"
1515)
1616
17+ // maximumRecordsPerModel is the maximum number of records that will be stored
18+ // per model.
19+ const maximumRecordsPerModel = 10
20+
1721type responseRecorder struct {
1822 http.ResponseWriter
1923 body * bytes.Buffer
@@ -107,17 +111,28 @@ func (r *OpenAIRecorder) RecordRequest(model string, req *http.Request, body []b
107111 UserAgent : req .UserAgent (),
108112 }
109113
110- if r .records [modelID ] == nil {
111- r .records [modelID ] = & ModelData {
112- Records : make ([]* RequestResponsePair , 0 , 10 ),
114+ modelData := r .records [modelID ]
115+ if modelData == nil {
116+ modelData = & ModelData {
117+ Records : make ([]* RequestResponsePair , 0 , maximumRecordsPerModel ),
113118 Config : inference.BackendConfiguration {},
114119 }
120+ r .records [modelID ] = modelData
115121 }
116122
117- r .records [modelID ].Records = append (r .records [modelID ].Records , record )
118-
119- if len (r .records [modelID ].Records ) > 10 {
120- r .records [modelID ].Records = r .records [modelID ].Records [1 :]
123+ // Ideally we would use a ring buffer or a linked list for storing records,
124+ // but we want this data returnable as JSON, so we have to live with this
125+ // slightly inefficieny memory shuffle. Note that truncating the front of
126+ // the slice and continually appending would cause the slice's capacity to
127+ // grow unbounded.
128+ if len (modelData .Records ) == maximumRecordsPerModel {
129+ copy (
130+ modelData .Records [:maximumRecordsPerModel - 1 ],
131+ modelData .Records [1 :],
132+ )
133+ modelData .Records [maximumRecordsPerModel - 1 ] = record
134+ } else {
135+ modelData .Records = append (modelData .Records , record )
121136 }
122137
123138 return recordID
0 commit comments