@@ -161,15 +161,39 @@ func TestEvaluate(t *testing.T) {
161161
162162 {
163163 languageGolang := & golang.Language {}
164- mockedModel := modeltesting .NewMockCapabilityWriteTestsNamed (t , "empty-response-model" )
164+ mockedModelID := "testing-provider/empty-response-model"
165+ mockedQuery := providertesting .NewMockQuery (t )
166+ mockedModel := llm .NewModel (mockedQuery , mockedModelID )
165167 repositoryPath := filepath .Join ("golang" , "plain" )
166168
167169 validate (t , & testCase {
168- Name : "Empty model responses are errors " ,
170+ Name : "Empty model response " ,
169171
170172 Before : func (t * testing.T , logger * log.Logger , resultPath string ) {
173+ queryResult1 := & provider.QueryResult {
174+ Message : "" ,
175+ GenerationInfo : & provider.GenerationInfo {
176+ TotalCost : 0.111111111 ,
177+ NativeTokensPrompt : 111 ,
178+ NativeTokensCompletion : 222 ,
179+ },
180+ }
181+ // Set up mocks, when test is running.
182+ mockedQuery .On ("Query" , mock .Anything , mock .Anything , mock .Anything ).Return (queryResult1 , nil ).Once ().After (10 * time .Millisecond ) // Simulate a model response delay because our internal safety measures trigger when a query is done in 0 milliseconds.
183+
184+ queryResult2 := & provider.QueryResult {
185+ Message : "" ,
186+ GenerationInfo : & provider.GenerationInfo {
187+ TotalCost : 0.222222222 ,
188+ NativeTokensPrompt : 333 ,
189+ NativeTokensCompletion : 444 ,
190+ },
191+ }
171192 // Set up mocks, when test is running.
172- mockedModel .MockCapabilityWriteTests .On ("WriteTests" , mock .Anything ).Return (nil , ErrEmptyResponseFromModel )
193+ mockedQuery .On ("Query" , mock .Anything , mock .Anything , mock .Anything ).Return (queryResult2 , nil ).Once ().After (10 * time .Millisecond ) // Simulate a model response delay because our internal safety measures trigger when a query is done in 0 milliseconds.
194+ },
195+ After : func (t * testing.T , logger * log.Logger , resultPath string ) {
196+ mockedQuery .AssertNumberOfCalls (t , "Query" , 2 )
173197 },
174198
175199 Context : & Context {
@@ -180,6 +204,11 @@ func TestEvaluate(t *testing.T) {
180204 Models : []evalmodel.Model {
181205 mockedModel ,
182206 },
207+ QueryAttempts : 3 ,
208+
209+ RepositoryPaths : []string {
210+ repositoryPath ,
211+ },
183212 },
184213
185214 ExpectedAssessments : []* metricstesting.AssessmentTuple {
@@ -189,8 +218,12 @@ func TestEvaluate(t *testing.T) {
189218 RepositoryPath : repositoryPath ,
190219 Case : "plain.go" ,
191220 Task : evaluatetask .IdentifierWriteTests ,
192- Assessment : metrics.Assessments {
221+ Assessment : map [ metrics.AssessmentKey ] float64 {
193222 metrics .AssessmentKeyFilesExecutedMaximumReachable : 1 ,
223+ metrics .AssessmentKeyResponseNoError : 1 ,
224+ metrics .AssessmentKeyCostsTokenActual : 0.111111111 ,
225+ metrics .AssessmentKeyNativeTokenInput : 111 ,
226+ metrics .AssessmentKeyNativeTokenOutput : 222 ,
194227 },
195228 },
196229 & metricstesting.AssessmentTuple {
@@ -199,8 +232,12 @@ func TestEvaluate(t *testing.T) {
199232 RepositoryPath : repositoryPath ,
200233 Case : "plain.go" ,
201234 Task : evaluatetask .IdentifierWriteTestsSymflowerFix ,
202- Assessment : metrics.Assessments {
235+ Assessment : map [ metrics.AssessmentKey ] float64 {
203236 metrics .AssessmentKeyFilesExecutedMaximumReachable : 1 ,
237+ metrics .AssessmentKeyResponseNoError : 1 ,
238+ metrics .AssessmentKeyCostsTokenActual : 0.111111111 ,
239+ metrics .AssessmentKeyNativeTokenInput : 111 ,
240+ metrics .AssessmentKeyNativeTokenOutput : 222 ,
204241 },
205242 },
206243 & metricstesting.AssessmentTuple {
@@ -209,8 +246,12 @@ func TestEvaluate(t *testing.T) {
209246 RepositoryPath : repositoryPath ,
210247 Case : "plain.go" ,
211248 Task : evaluatetask .IdentifierWriteTestsSymflowerTemplate ,
212- Assessment : metrics.Assessments {
249+ Assessment : map [ metrics.AssessmentKey ] float64 {
213250 metrics .AssessmentKeyFilesExecutedMaximumReachable : 1 ,
251+ metrics .AssessmentKeyResponseNoError : 1 ,
252+ metrics .AssessmentKeyCostsTokenActual : 0.222222222 ,
253+ metrics .AssessmentKeyNativeTokenInput : 333 ,
254+ metrics .AssessmentKeyNativeTokenOutput : 444 ,
214255 },
215256 },
216257 & metricstesting.AssessmentTuple {
@@ -219,15 +260,23 @@ func TestEvaluate(t *testing.T) {
219260 RepositoryPath : repositoryPath ,
220261 Case : "plain.go" ,
221262 Task : evaluatetask .IdentifierWriteTestsSymflowerTemplateSymflowerFix ,
222- Assessment : metrics.Assessments {
263+ Assessment : map [ metrics.AssessmentKey ] float64 {
223264 metrics .AssessmentKeyFilesExecutedMaximumReachable : 1 ,
265+ metrics .AssessmentKeyResponseNoError : 1 ,
266+ metrics .AssessmentKeyCostsTokenActual : 0.222222222 ,
267+ metrics .AssessmentKeyNativeTokenInput : 333 ,
268+ metrics .AssessmentKeyNativeTokenOutput : 444 ,
224269 },
225270 },
226271 },
227272 ExpectedResultFiles : map [string ]func (t * testing.T , filePath string , data string ){
228273 "evaluation.log" : nil ,
229- filepath .Join (string (evaluatetask .IdentifierWriteTests ), mockedModel .ID (), "golang" , "golang" , "plain" , "evaluation.log" ): nil ,
230- "evaluation.csv" : nil ,
274+ filepath .Join (string (evaluatetask .IdentifierWriteTests ), log .CleanModelNameForFileSystem (mockedModelID ), "golang" , "golang" , "plain" , "evaluation.log" ): func (t * testing.T , filePath , data string ) {
275+ assert .Equal (t , 4 , strings .Count (data , "no test files found" ), "number of ocurrences of \" no test files found\" not matched" )
276+ },
277+ "evaluation.csv" : func (t * testing.T , filePath , data string ) {
278+ assert .Lenf (t , strings .Split (data , "\n " ), 6 , "expected 6 lines: header, 4x entries and final new line:\n %s" , data )
279+ },
231280 },
232281 })
233282 }
0 commit comments