Skip to content

Commit 62a8300

Browse files
authored
pkg/query: Modify source report to return matched filenames (#6157)
* pkg/query/sources.go: Allow empty buildID in source report builder Updated the record filtering logic in sourceReportBuilder to treat an empty buildID as matching all records. So the new behaviour is: When buildId is empty → it skips buildId filtering, aggregates metrics from all builds matching the filename When buildId is provided → filters to that specific build * Add matched_filenames to Source report Introduces a matched_filenames field to the Source proto and related code, allowing clients to see all filenames that matched a query * Refactor Source report to use filename column and remove matchedFilenames The Source report Arrow record now includes a dictionary-encoded filename column, along with line_number, cumulative, and flat columns. * Refactor sorting in sourceReportBuilder using cmp package * Refactor source report line data structure
1 parent 9cbbf0a commit 62a8300

File tree

3 files changed

+191
-41
lines changed

3 files changed

+191
-41
lines changed

pkg/query/sources.go

Lines changed: 85 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ package query
1515

1616
import (
1717
"bytes"
18+
"cmp"
1819
"context"
1920
"fmt"
2021
"slices"
@@ -89,6 +90,7 @@ func generateSourceReportRecord(
8990
}
9091

9192
type lineMetrics struct {
93+
lineNumber int64
9294
cumulative int64
9395
flat int64
9496
}
@@ -99,10 +101,27 @@ type sourceReportBuilder struct {
99101
filename []byte
100102
buildID []byte
101103

102-
lineData map[int64]*lineMetrics
104+
lineData map[string][]lineMetrics
103105
cumulative int64
104106
}
105107

108+
// filenameMatches checks if profileFilename matches queryFilename using suffix matching.
109+
// It returns true if:
110+
// - They are exactly equal, OR
111+
// - profileFilename ends with queryFilename AND is preceded by a '/' (path boundary).
112+
func filenameMatches(profileFilename, queryFilename []byte) bool {
113+
if bytes.Equal(profileFilename, queryFilename) {
114+
return true
115+
}
116+
if len(queryFilename) > 0 && bytes.HasSuffix(profileFilename, queryFilename) {
117+
idx := len(profileFilename) - len(queryFilename) - 1
118+
if idx >= 0 && profileFilename[idx] == '/' {
119+
return true
120+
}
121+
}
122+
return false
123+
}
124+
106125
func newSourceReportBuilder(
107126
pool memory.Allocator,
108127
ref *pb.SourceReference,
@@ -111,35 +130,52 @@ func newSourceReportBuilder(
111130
pool: pool,
112131
filename: []byte(ref.Filename),
113132
buildID: []byte(ref.BuildId),
114-
lineData: make(map[int64]*lineMetrics),
133+
lineData: make(map[string][]lineMetrics),
115134
}
116135
}
117136

118137
func (b *sourceReportBuilder) finish() (arrow.RecordBatch, int64) {
119-
lineNumbers := make([]int64, 0, len(b.lineData))
120-
for ln := range b.lineData {
121-
lineNumbers = append(lineNumbers, ln)
138+
filenames := make([]string, 0, len(b.lineData))
139+
for filename := range b.lineData {
140+
filenames = append(filenames, filename)
122141
}
123-
slices.Sort(lineNumbers)
142+
slices.Sort(filenames)
124143

144+
totalRows := 0
145+
for _, filename := range filenames {
146+
metrics := b.lineData[filename]
147+
slices.SortFunc(metrics, func(a, b lineMetrics) int {
148+
return cmp.Compare(a.lineNumber, b.lineNumber)
149+
})
150+
totalRows += len(metrics)
151+
}
152+
153+
filenameDictType := &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int32, ValueType: arrow.BinaryTypes.String}
154+
filenameBuilder := array.NewBuilder(b.pool, filenameDictType).(*array.BinaryDictionaryBuilder)
155+
defer filenameBuilder.Release()
125156
lineNumBuilder := array.NewInt64Builder(b.pool)
126157
defer lineNumBuilder.Release()
127158
cumuBuilder := array.NewInt64Builder(b.pool)
128159
defer cumuBuilder.Release()
129160
flatBuilder := array.NewInt64Builder(b.pool)
130161
defer flatBuilder.Release()
131162

132-
lineNumBuilder.Reserve(len(lineNumbers))
133-
cumuBuilder.Reserve(len(lineNumbers))
134-
flatBuilder.Reserve(len(lineNumbers))
163+
filenameBuilder.Reserve(totalRows)
164+
lineNumBuilder.Reserve(totalRows)
165+
cumuBuilder.Reserve(totalRows)
166+
flatBuilder.Reserve(totalRows)
135167

136-
for _, ln := range lineNumbers {
137-
metrics := b.lineData[ln]
138-
lineNumBuilder.Append(ln)
139-
cumuBuilder.Append(metrics.cumulative)
140-
flatBuilder.Append(metrics.flat)
168+
for _, filename := range filenames {
169+
for _, metrics := range b.lineData[filename] {
170+
_ = filenameBuilder.AppendString(filename)
171+
lineNumBuilder.Append(metrics.lineNumber)
172+
cumuBuilder.Append(metrics.cumulative)
173+
flatBuilder.Append(metrics.flat)
174+
}
141175
}
142176

177+
filenameArr := filenameBuilder.NewDictionaryArray()
178+
defer filenameArr.Release()
143179
lineNumArr := lineNumBuilder.NewInt64Array()
144180
defer lineNumArr.Release()
145181
cumuArr := cumuBuilder.NewInt64Array()
@@ -150,18 +186,20 @@ func (b *sourceReportBuilder) finish() (arrow.RecordBatch, int64) {
150186
return array.NewRecordBatch(
151187
arrow.NewSchema(
152188
[]arrow.Field{
189+
{Name: "filename", Type: filenameDictType},
153190
{Name: "line_number", Type: arrow.PrimitiveTypes.Int64},
154191
{Name: "cumulative", Type: arrow.PrimitiveTypes.Int64},
155192
{Name: "flat", Type: arrow.PrimitiveTypes.Int64},
156193
},
157194
nil,
158195
),
159196
[]arrow.Array{
197+
filenameArr,
160198
lineNumArr,
161199
cumuArr,
162200
flatArr,
163201
},
164-
int64(len(lineNumbers)),
202+
int64(totalRows),
165203
), b.cumulative
166204
}
167205

@@ -178,25 +216,43 @@ func (b *sourceReportBuilder) addRecord(rec arrow.RecordBatch) error {
178216
if !r.Locations.ListValues().IsValid(j) {
179217
continue // Skip null locations; they have been filtered out
180218
}
181-
if r.MappingStart.IsValid(j) && bytes.Equal(r.MappingBuildIDDict.Value(int(r.MappingBuildIDIndices.Value(j))), b.buildID) {
219+
buildIDMatches := len(b.buildID) == 0 || bytes.Equal(r.MappingBuildIDDict.Value(int(r.MappingBuildIDIndices.Value(j))), b.buildID)
220+
if r.MappingStart.IsValid(j) && buildIDMatches {
182221
llOffsetStart, llOffsetEnd := r.Lines.ValueOffsets(j)
183222

184223
for k := int(llOffsetStart); k < int(llOffsetEnd); k++ {
185-
if r.Line.IsValid(k) &&
186-
r.LineFunctionNameIndices.IsValid(k) &&
187-
bytes.Equal(r.LineFunctionFilenameDict.Value(int(r.LineFunctionFilenameIndices.Value(k))), b.filename) {
188-
lineNum := r.LineNumber.Value(k)
189-
metrics, exists := b.lineData[lineNum]
190-
if !exists {
191-
metrics = &lineMetrics{}
192-
b.lineData[lineNum] = metrics
193-
}
224+
if r.Line.IsValid(k) && r.LineFunctionNameIndices.IsValid(k) {
225+
profileFilename := r.LineFunctionFilenameDict.Value(int(r.LineFunctionFilenameIndices.Value(k)))
226+
if filenameMatches(profileFilename, b.filename) {
227+
lineNum := r.LineNumber.Value(k)
228+
filename := string(profileFilename)
229+
value := r.Value.Value(i)
194230

195-
metrics.cumulative += r.Value.Value(i)
231+
isLeaf := isFirstNonNil(i, j, r.Locations) && isFirstNonNil(j, k, r.Lines)
196232

197-
isLeaf := isFirstNonNil(i, j, r.Locations) && isFirstNonNil(j, k, r.Lines)
198-
if isLeaf {
199-
metrics.flat += r.Value.Value(i)
233+
metrics := b.lineData[filename]
234+
found := false
235+
for idx := range metrics {
236+
if metrics[idx].lineNumber == lineNum {
237+
metrics[idx].cumulative += value
238+
if isLeaf {
239+
metrics[idx].flat += value
240+
}
241+
found = true
242+
break
243+
}
244+
}
245+
if !found {
246+
flat := int64(0)
247+
if isLeaf {
248+
flat = value
249+
}
250+
b.lineData[filename] = append(metrics, lineMetrics{
251+
lineNumber: lineNum,
252+
cumulative: value,
253+
flat: flat,
254+
})
255+
}
200256
}
201257
}
202258
}

pkg/query/sources_test.go

Lines changed: 104 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -131,36 +131,130 @@ func TestSourceReportArrowSchema(t *testing.T) {
131131
Filename: "test.go",
132132
})
133133

134-
builder.lineData[10] = &lineMetrics{cumulative: 100, flat: 50}
135-
builder.lineData[25] = &lineMetrics{cumulative: 200, flat: 75}
136-
builder.lineData[5] = &lineMetrics{cumulative: 50, flat: 25}
134+
builder.lineData["/app/test.go"] = []lineMetrics{
135+
{lineNumber: 10, cumulative: 100, flat: 50},
136+
{lineNumber: 25, cumulative: 200, flat: 75},
137+
{lineNumber: 5, cumulative: 50, flat: 25},
138+
}
137139

138140
record, cumulative := builder.finish()
139141
defer record.Release()
140142

141143
require.Equal(t, int64(0), cumulative)
142144

143145
schema := record.Schema()
144-
require.Equal(t, 3, schema.NumFields())
146+
require.Equal(t, 4, schema.NumFields())
145147

146-
require.Equal(t, "line_number", schema.Field(0).Name)
147-
require.Equal(t, "cumulative", schema.Field(1).Name)
148-
require.Equal(t, "flat", schema.Field(2).Name)
148+
require.Equal(t, "filename", schema.Field(0).Name)
149+
require.Equal(t, "line_number", schema.Field(1).Name)
150+
require.Equal(t, "cumulative", schema.Field(2).Name)
151+
require.Equal(t, "flat", schema.Field(3).Name)
149152

150153
require.Equal(t, int64(3), record.NumRows())
151154

152-
lineNumbers := record.Column(0)
155+
// Filename column (dictionary encoded)
156+
filenameCol := record.Column(0).(*array.Dictionary)
157+
require.Equal(t, "/app/test.go", filenameCol.Dictionary().(*array.String).Value(int(filenameCol.GetValueIndex(0))))
158+
require.Equal(t, "/app/test.go", filenameCol.Dictionary().(*array.String).Value(int(filenameCol.GetValueIndex(1))))
159+
require.Equal(t, "/app/test.go", filenameCol.Dictionary().(*array.String).Value(int(filenameCol.GetValueIndex(2))))
160+
161+
lineNumbers := record.Column(1)
153162
require.Equal(t, int64(5), lineNumbers.(*array.Int64).Value(0))
154163
require.Equal(t, int64(10), lineNumbers.(*array.Int64).Value(1))
155164
require.Equal(t, int64(25), lineNumbers.(*array.Int64).Value(2))
156165

157-
cumulativeCol := record.Column(1)
166+
cumulativeCol := record.Column(2)
158167
require.Equal(t, int64(50), cumulativeCol.(*array.Int64).Value(0))
159168
require.Equal(t, int64(100), cumulativeCol.(*array.Int64).Value(1))
160169
require.Equal(t, int64(200), cumulativeCol.(*array.Int64).Value(2))
161170

162-
flatCol := record.Column(2)
171+
flatCol := record.Column(3)
163172
require.Equal(t, int64(25), flatCol.(*array.Int64).Value(0))
164173
require.Equal(t, int64(50), flatCol.(*array.Int64).Value(1))
165174
require.Equal(t, int64(75), flatCol.(*array.Int64).Value(2))
166175
}
176+
177+
func TestFilenameMatches(t *testing.T) {
178+
tests := []struct {
179+
name string
180+
profileFilename string
181+
queryFilename string
182+
want bool
183+
}{
184+
{"exact match", "pkg/query/sources.go", "pkg/query/sources.go", true},
185+
{"exact match full path", "/home/ci/build/main.go", "/home/ci/build/main.go", true},
186+
187+
{"suffix match with /", "/home/ci/build/pkg/query/sources.go", "pkg/query/sources.go", true},
188+
{"suffix match single dir", "/app/main.go", "main.go", true},
189+
{"suffix match deep path", "github.com/parca-dev/parca/pkg/query/sources.go", "pkg/query/sources.go", true},
190+
191+
{"no match - no path boundary", "/home/ci/buildpkg/query/sources.go", "pkg/query/sources.go", false},
192+
{"no match - partial filename", "xsources.go", "sources.go", false},
193+
194+
{"no match - different file", "pkg/query/other.go", "sources.go", false},
195+
{"no match - different path", "pkg/other/sources.go", "pkg/query/sources.go", false},
196+
197+
{"empty query", "pkg/query/sources.go", "", false},
198+
{"empty profile", "", "sources.go", false},
199+
{"both empty", "", "", true},
200+
}
201+
202+
for _, tt := range tests {
203+
t.Run(tt.name, func(t *testing.T) {
204+
got := filenameMatches([]byte(tt.profileFilename), []byte(tt.queryFilename))
205+
require.Equal(t, tt.want, got)
206+
})
207+
}
208+
}
209+
210+
func TestSourceReportMultipleFilenames(t *testing.T) {
211+
allocator := memory.NewCheckedAllocator(memory.DefaultAllocator)
212+
defer allocator.AssertSize(t, 0)
213+
214+
builder := newSourceReportBuilder(allocator, &pb.SourceReference{
215+
BuildId: "",
216+
Filename: "sources.go",
217+
})
218+
219+
// Simulate lines from multiple files matching the suffix
220+
builder.lineData["/home/ci/build/pkg/query/sources.go"] = []lineMetrics{
221+
{lineNumber: 42, cumulative: 100, flat: 50},
222+
{lineNumber: 100, cumulative: 50, flat: 25},
223+
}
224+
builder.lineData["github.com/parca/pkg/query/sources.go"] = []lineMetrics{
225+
{lineNumber: 42, cumulative: 200, flat: 75},
226+
}
227+
228+
record, cumulative := builder.finish()
229+
defer record.Release()
230+
231+
require.Equal(t, int64(0), cumulative)
232+
require.Equal(t, int64(3), record.NumRows())
233+
234+
// Verify filename column contains both filenames
235+
filenameCol := record.Column(0).(*array.Dictionary)
236+
filenames := make(map[string]bool)
237+
for i := 0; i < int(record.NumRows()); i++ {
238+
idx := filenameCol.GetValueIndex(i)
239+
filename := filenameCol.Dictionary().(*array.String).Value(int(idx))
240+
filenames[filename] = true
241+
}
242+
require.Contains(t, filenames, "/home/ci/build/pkg/query/sources.go")
243+
require.Contains(t, filenames, "github.com/parca/pkg/query/sources.go")
244+
}
245+
246+
func TestSourceReportBuilderEmptyBuildID(t *testing.T) {
247+
allocator := memory.NewCheckedAllocator(memory.DefaultAllocator)
248+
defer allocator.AssertSize(t, 0)
249+
250+
// When buildId is empty, the builder should be configured to match all buildIds
251+
builder := newSourceReportBuilder(allocator, &pb.SourceReference{
252+
BuildId: "",
253+
Filename: "test.go",
254+
})
255+
256+
require.Equal(t, 0, len(builder.buildID))
257+
require.Equal(t, []byte("test.go"), builder.filename)
258+
require.NotNil(t, builder.lineData)
259+
require.Len(t, builder.lineData, 0)
260+
}

proto/buf.lock

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,5 +9,5 @@ deps:
99
- remote: buf.build
1010
owner: grpc-ecosystem
1111
repository: grpc-gateway
12-
commit: 4c5ba75caaf84e928b7137ae5c18c26a
13-
digest: shake256:e174ad9408f3e608f6157907153ffec8d310783ee354f821f57178ffbeeb8faa6bb70b41b61099c1783c82fe16210ebd1279bc9c9ee6da5cffba9f0e675b8b99
12+
commit: 6467306b4f624747aaf6266762ee7a1c
13+
digest: shake256:833d648b99b9d2c18b6882ef41aaeb113e76fc38de20dda810c588d133846e6593b4da71b388bcd921b1c7ab41c7acf8f106663d7301ae9e82ceab22cf64b1b7

0 commit comments

Comments
 (0)