Skip to content

Commit 9cd59b9

Browse files
committed
Generalize assessments to be of type float64 instead of uint64
1 parent d9b0914 commit 9cd59b9

File tree

9 files changed

+138
-138
lines changed

9 files changed

+138
-138
lines changed

cmd/eval-dev-quality/cmd/evaluate_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ func validateMetrics(t *testing.T, csvData string, expectedAssessments []metrics
3434
actualAssessmentTuples := reporttesting.ParseMetrics(t, csvData)
3535
actual = make([]metrics.Assessments, len(actualAssessmentTuples))
3636
for i, tuple := range actualAssessmentTuples {
37-
assert.Greater(t, tuple.Assessment[metrics.AssessmentKeyProcessingTime], uint64(0))
37+
assert.Greater(t, tuple.Assessment[metrics.AssessmentKeyProcessingTime], float64(0))
3838
actual[i] = tuple.Assessment
3939
}
4040

evaluate/evaluate_test.go

Lines changed: 108 additions & 108 deletions
Large diffs are not rendered by default.

evaluate/metrics/assessment.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -62,11 +62,11 @@ var (
6262
)
6363

6464
// Assessments holds a collection of numerical assessment metrics.
65-
type Assessments map[AssessmentKey]uint64
65+
type Assessments map[AssessmentKey]float64
6666

6767
// NewAssessments creates a new assessment collection.
6868
func NewAssessments() Assessments {
69-
return map[AssessmentKey]uint64{}
69+
return map[AssessmentKey]float64{}
7070
}
7171

7272
// Add adds the given assessment collection to the current one.
@@ -98,7 +98,7 @@ func (a Assessments) Award(key AssessmentKey) {
9898

9999
// AwardMultiple yields multiple score points.
100100
func (a Assessments) AwardMultiple(key AssessmentKey, count uint64) {
101-
a[key] += count
101+
a[key] += float64(count)
102102
}
103103

104104
// String returns a string representation of the metrics.
@@ -109,7 +109,7 @@ func (a Assessments) String() string {
109109
entries := make([]string, len(AllAssessmentKeys))
110110

111111
for i, key := range AllAssessmentKeys {
112-
entries[i] = fmt.Sprintf("%s=%d", key, a[key])
112+
entries[i] = fmt.Sprintf("%s=%v", key, a[key])
113113
}
114114

115115
return strings.Join(entries, ", ")
@@ -123,7 +123,7 @@ func (a Assessments) StringCSV() (row []string) {
123123

124124
row = make([]string, len(AllAssessmentKeys))
125125
for i, key := range AllAssessmentKeys {
126-
row[i] = fmt.Sprintf("%d", a[key])
126+
row[i] = fmt.Sprintf("%v", a[key])
127127
}
128128

129129
return row

evaluate/metrics/assessment_test.go

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -37,26 +37,26 @@ func TestAssessmentsAdd(t *testing.T) {
3737
Name: "Non existing key",
3838

3939
Assessments: NewAssessments(),
40-
X: map[AssessmentKey]uint64{
40+
X: map[AssessmentKey]float64{
4141
AssessmentKeyResponseNoExcess: 1,
4242
},
4343

44-
ExpectedAssessments: map[AssessmentKey]uint64{
44+
ExpectedAssessments: map[AssessmentKey]float64{
4545
AssessmentKeyResponseNoExcess: 1,
4646
},
4747
})
4848

4949
validate(t, &testCase{
5050
Name: "Existing key",
5151

52-
Assessments: map[AssessmentKey]uint64{
52+
Assessments: map[AssessmentKey]float64{
5353
AssessmentKeyResponseNoExcess: 1,
5454
},
55-
X: map[AssessmentKey]uint64{
55+
X: map[AssessmentKey]float64{
5656
AssessmentKeyResponseNoExcess: 1,
5757
},
5858

59-
ExpectedAssessments: map[AssessmentKey]uint64{
59+
ExpectedAssessments: map[AssessmentKey]float64{
6060
AssessmentKeyResponseNoExcess: 2,
6161
},
6262
})
@@ -209,7 +209,7 @@ func TestCombineModelAndSymflowerFixAssessments(t *testing.T) {
209209

210210
ModelAssessment: Assessments{
211211
AssessmentKeyFilesExecuted: 1,
212-
AssessmentKeyProcessingTime: uint64(200),
212+
AssessmentKeyProcessingTime: float64(200),
213213
AssessmentKeyCoverage: 0,
214214
AssessmentKeyResponseCharacterCount: 100,
215215
AssessmentKeyGenerateTestsForFileCharacterCount: 50,
@@ -219,15 +219,15 @@ func TestCombineModelAndSymflowerFixAssessments(t *testing.T) {
219219
},
220220
SymflowerFixAssessments: Assessments{
221221
AssessmentKeyFilesExecuted: 1,
222-
AssessmentKeyProcessingTime: uint64(100),
222+
AssessmentKeyProcessingTime: float64(100),
223223
AssessmentKeyCoverage: 1,
224224
AssessmentKeyResponseNoError: 1,
225225
AssessmentKeyTestsPassing: 10,
226226
},
227227

228228
ExpectedAssessments: Assessments{
229229
AssessmentKeyFilesExecuted: 1,
230-
AssessmentKeyProcessingTime: uint64(300),
230+
AssessmentKeyProcessingTime: float64(300),
231231
AssessmentKeyCoverage: 1,
232232
AssessmentKeyResponseCharacterCount: 100,
233233
AssessmentKeyGenerateTestsForFileCharacterCount: 50,

evaluate/report/testing/csv.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,11 @@ import (
1111
"github.com/symflower/eval-dev-quality/task"
1212
)
1313

14-
func atoiUint64(t *testing.T, s string) uint64 {
15-
value, err := strconv.ParseUint(s, 10, 64)
14+
func parseFloat64(t *testing.T, s string) float64 {
15+
value, err := strconv.ParseFloat(s, 64)
1616
assert.NoErrorf(t, err, "parsing unsigned integer from: %q", s)
1717

18-
return uint64(value)
18+
return value
1919
}
2020

2121
// ParseMetrics extracts multiple assessment metrics from the given string.
@@ -37,7 +37,7 @@ func ParseMetrics(t *testing.T, data string) (assessments metricstesting.Assessm
3737
Assessment: metrics.Assessments{},
3838
}
3939
for i, key := range metrics.AllAssessmentKeys {
40-
tuple.Assessment[key] = atoiUint64(t, cells[i+6])
40+
tuple.Assessment[key] = parseFloat64(t, cells[i+6])
4141
}
4242

4343
assessments = append(assessments, tuple)

evaluate/task/symflower.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ func runModelAndSymflowerFix(ctx evaltask.Context, modelCtx model.Context, runMo
109109

110110
// Symflower was able to fix a failure so now update the assessment with the improved results.
111111
withSymflowerFix := metrics.NewAssessments()
112-
withSymflowerFix[metrics.AssessmentKeyProcessingTime] = processingTime
112+
withSymflowerFix[metrics.AssessmentKeyProcessingTime] = float64(processingTime)
113113
withSymflowerFix.Award(metrics.AssessmentKeyFilesExecuted)
114114
withSymflowerFix.AwardMultiple(metrics.AssessmentKeyCoverage, withSymflowerFixTestResult.Coverage)
115115

evaluate/task/transpile.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,8 @@ func (t *Transpile) Run(ctx evaltask.Context) (repositoryAssessment map[string]m
6969
modelAssessments := metrics.NewAssessments()
7070
withSymflowerAssessments := metrics.NewAssessments()
7171
maximumReachableFiles := uint64(len(language.Languages) - 1) // Transpile repositories contain sub-tasks to transpile from every other supported language minus the one we are transpiling to.
72-
modelAssessments[metrics.AssessmentKeyFilesExecutedMaximumReachable] = maximumReachableFiles
73-
withSymflowerAssessments[metrics.AssessmentKeyFilesExecutedMaximumReachable] = maximumReachableFiles
72+
modelAssessments[metrics.AssessmentKeyFilesExecutedMaximumReachable] = float64(maximumReachableFiles)
73+
withSymflowerAssessments[metrics.AssessmentKeyFilesExecutedMaximumReachable] = float64(maximumReachableFiles)
7474
repositoryAssessment[packagePath] = map[evaltask.Identifier]metrics.Assessments{
7575
IdentifierTranspile: modelAssessments,
7676
IdentifierTranspileSymflowerFix: withSymflowerAssessments,
@@ -131,7 +131,7 @@ func (t *Transpile) Run(ctx evaltask.Context) (repositoryAssessment map[string]m
131131

132132
// Symflower was able to fix a failure so now update the assessment with the improved results.
133133
withSymflowerFixAssessments := metrics.NewAssessments()
134-
withSymflowerFixAssessments[metrics.AssessmentKeyProcessingTime] = processingTime
134+
withSymflowerFixAssessments[metrics.AssessmentKeyProcessingTime] = float64(processingTime)
135135
withSymflowerFixAssessments.Award(metrics.AssessmentKeyFilesExecuted)
136136
withSymflowerFixAssessments.AwardMultiple(metrics.AssessmentKeyTestsPassing, uint64(testsPassing))
137137

model/llm/llm.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -491,11 +491,11 @@ func handleQueryResult(queryResult *provider.QueryResult, filePathAbsolute strin
491491
if err != nil {
492492
return nil, pkgerrors.WithStack(err)
493493
}
494-
assessment[metrics.AssessmentKeyProcessingTime] = uint64(queryResult.Duration.Milliseconds())
495-
assessment[metrics.AssessmentKeyResponseCharacterCount] = uint64(len(queryResult.Message))
496-
assessment[metrics.AssessmentKeyGenerateTestsForFileCharacterCount] = uint64(len(sourceFileContent))
497-
assessment[metrics.AssessmentKeyTokenInput] = uint64(queryResult.Usage.PromptTokens)
498-
assessment[metrics.AssessmentKeyTokenOutput] = uint64(queryResult.Usage.CompletionTokens)
494+
assessment[metrics.AssessmentKeyProcessingTime] = float64(queryResult.Duration.Milliseconds())
495+
assessment[metrics.AssessmentKeyResponseCharacterCount] = float64(len(queryResult.Message))
496+
assessment[metrics.AssessmentKeyGenerateTestsForFileCharacterCount] = float64(len(sourceFileContent))
497+
assessment[metrics.AssessmentKeyTokenInput] = float64(queryResult.Usage.PromptTokens)
498+
assessment[metrics.AssessmentKeyTokenOutput] = float64(queryResult.Usage.CompletionTokens)
499499

500500
if err := os.MkdirAll(filepath.Dir(filePathAbsolute), 0755); err != nil {
501501
return nil, pkgerrors.WithStack(err)

model/symflower/symflower.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ func (m *Model) WriteTests(ctx model.Context) (assessment metrics.Assessments, e
121121
return nil, pkgerrors.WithStack(err)
122122
}
123123

124-
processingTime := uint64(time.Since(start).Milliseconds())
124+
processingTime := float64(time.Since(start).Milliseconds())
125125

126126
characterCount, err := countCharactersOfGeneratedFiles(ctx.RepositoryPath, extractGeneratedFilePaths(output))
127127
if err != nil {
@@ -130,8 +130,8 @@ func (m *Model) WriteTests(ctx model.Context) (assessment metrics.Assessments, e
130130

131131
return metrics.Assessments{ // Symflower always generates just source code when it does not fail, so no need to check the assessment properties.
132132
metrics.AssessmentKeyProcessingTime: processingTime,
133-
metrics.AssessmentKeyGenerateTestsForFileCharacterCount: characterCount,
134-
metrics.AssessmentKeyResponseCharacterCount: characterCount,
133+
metrics.AssessmentKeyGenerateTestsForFileCharacterCount: float64(characterCount),
134+
metrics.AssessmentKeyResponseCharacterCount: float64(characterCount),
135135
metrics.AssessmentKeyResponseNoExcess: 1,
136136
metrics.AssessmentKeyResponseWithCode: 1,
137137
}, nil

0 commit comments

Comments
 (0)