Skip to content

Commit 6641d4b

Browse files
authored
refactor: use HandleFile for Jenkins build log processing to improve chunking (#4225)
1 parent 00b3060 commit 6641d4b

File tree

2 files changed

+304
-10
lines changed

2 files changed

+304
-10
lines changed

pkg/sources/jenkins/jenkins.go

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,13 @@ import (
1111
"time"
1212

1313
"github.com/go-errors/errors"
14-
"github.com/trufflesecurity/trufflehog/v3/pkg/log"
1514
"google.golang.org/protobuf/proto"
1615
"google.golang.org/protobuf/types/known/anypb"
1716

1817
"github.com/trufflesecurity/trufflehog/v3/pkg/common"
1918
"github.com/trufflesecurity/trufflehog/v3/pkg/context"
19+
"github.com/trufflesecurity/trufflehog/v3/pkg/handlers"
20+
"github.com/trufflesecurity/trufflehog/v3/pkg/log"
2021
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/source_metadatapb"
2122
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/sourcespb"
2223
"github.com/trufflesecurity/trufflehog/v3/pkg/roundtripper"
@@ -411,13 +412,7 @@ func (s *Source) chunkBuild(
411412
buildLogURL.String())
412413
}
413414

414-
buildLog, err := io.ReadAll(resp.Body)
415-
if err != nil {
416-
return fmt.Errorf("error reading build log response body from %q: %w", buildLogURL.String(), err)
417-
}
418-
419-
ctx.Logger().V(4).Info("scanning build log")
420-
chunksChan <- &sources.Chunk{
415+
chunkSkel := &sources.Chunk{
421416
SourceName: s.name,
422417
SourceID: s.SourceID(),
423418
SourceType: s.Type(),
@@ -431,11 +426,11 @@ func (s *Source) chunkBuild(
431426
},
432427
},
433428
},
434-
Data: buildLog,
435429
Verify: s.verify,
436430
}
437431

438-
return nil
432+
ctx.Logger().V(4).Info("scanning build log")
433+
return handlers.HandleFile(ctx, resp.Body, chunkSkel, sources.ChanReporter{Ch: chunksChan})
439434
}
440435

441436
type JenkinsJobResponse struct {

pkg/sources/jenkins/jenkins_test.go

Lines changed: 299 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,299 @@
1+
package jenkins
2+
3+
import (
4+
"fmt"
5+
"net/http"
6+
"net/http/httptest"
7+
"runtime"
8+
"strings"
9+
"testing"
10+
"time"
11+
12+
"github.com/stretchr/testify/assert"
13+
"github.com/stretchr/testify/require"
14+
"google.golang.org/protobuf/types/known/anypb"
15+
16+
"github.com/trufflesecurity/trufflehog/v3/pkg/context"
17+
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/credentialspb"
18+
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/sourcespb"
19+
"github.com/trufflesecurity/trufflehog/v3/pkg/sources"
20+
)
21+
22+
const (
23+
KB = 1024
24+
MB = 1024 * KB
25+
)
26+
27+
// generateTestData creates a string of exactly the specified size using the given pattern.
28+
func generateTestData(size int, pattern string) string {
29+
if len(pattern) == 0 {
30+
pattern = "X" // fallback pattern
31+
}
32+
33+
var builder strings.Builder
34+
builder.Grow(size)
35+
36+
for builder.Len() < size {
37+
remaining := size - builder.Len()
38+
if remaining >= len(pattern) {
39+
builder.WriteString(pattern)
40+
} else {
41+
// Truncate the pattern to fill exactly the remaining bytes.
42+
builder.WriteString(pattern[:remaining])
43+
}
44+
}
45+
46+
return builder.String()
47+
}
48+
49+
// createMockJenkinsServer creates a test HTTP server that simulates Jenkins API responses.
50+
func createMockJenkinsServer(jobName string, buildNumber int, logContent string) *httptest.Server {
51+
mux := http.NewServeMux()
52+
server := httptest.NewServer(mux)
53+
54+
// Mock the main Jenkins API endpoint that lists jobs.
55+
mux.HandleFunc("/api/json", func(w http.ResponseWriter, r *http.Request) {
56+
if strings.Contains(r.URL.RawQuery, "tree=jobs") {
57+
w.Header().Set("Content-Type", "application/json")
58+
response := fmt.Sprintf(
59+
`{"jobs":[{"_class":"org.jenkinsci.plugins.workflow.job.WorkflowJob",`+
60+
`"name":"%s","url":"%s/job/%s/"}]}`, jobName, server.URL, jobName)
61+
fmt.Fprint(w, response)
62+
} else {
63+
w.Header().Set("Content-Type", "application/json")
64+
fmt.Fprint(w, `{"jobs":[]}`)
65+
}
66+
})
67+
68+
// Mock the job-specific API endpoint that lists builds for a particular job.
69+
mux.HandleFunc(fmt.Sprintf("/job/%s/api/json", jobName), func(w http.ResponseWriter, r *http.Request) {
70+
if strings.Contains(r.URL.RawQuery, "tree=builds") {
71+
w.Header().Set("Content-Type", "application/json")
72+
response := fmt.Sprintf(
73+
`{"builds":[{"number":%d,"url":"%s/job/%s/%d/"}]}`, buildNumber, server.URL, jobName, buildNumber)
74+
fmt.Fprint(w, response)
75+
} else {
76+
w.Header().Set("Content-Type", "application/json")
77+
fmt.Fprint(w, `{"builds":[]}`)
78+
}
79+
})
80+
81+
// Mock the console text endpoint that returns the actual build log content.
82+
// This is where the test data payload is served to verify chunking behavior.
83+
mux.HandleFunc(fmt.Sprintf("/job/%s/%d/consoleText", jobName, buildNumber), func(w http.ResponseWriter, r *http.Request) {
84+
w.Header().Set("Content-Type", "text/plain")
85+
fmt.Fprint(w, logContent)
86+
})
87+
88+
return server
89+
}
90+
91+
// TestJenkinsVariousSizes verifies that Jenkins build logs are properly chunked
92+
// across different data sizes that represent real-world scenarios from small
93+
// logs to large CI/CD outputs.
94+
func TestJenkinsVariousSizes(t *testing.T) {
95+
testCases := []struct {
96+
name string
97+
dataSize int
98+
pattern string
99+
jobName string
100+
buildNumber int
101+
}{
102+
{
103+
name: "small_60KB",
104+
dataSize: 60 * KB,
105+
pattern: "This is a line in the build log with some sensitive data\n",
106+
jobName: "test-job",
107+
buildNumber: 42,
108+
},
109+
{
110+
name: "large_1MB",
111+
dataSize: 1 * MB,
112+
pattern: "Line with potential secrets like api_key=abc123def456\n",
113+
jobName: "large-job",
114+
buildNumber: 1,
115+
},
116+
{
117+
name: "medium_80KB",
118+
dataSize: 80 * KB,
119+
pattern: "Line with secret: api_key=sk-123abc456def\n",
120+
jobName: "medium-job",
121+
buildNumber: 123,
122+
},
123+
}
124+
125+
for _, tc := range testCases {
126+
t.Run(tc.name, func(t *testing.T) {
127+
t.Parallel()
128+
129+
logContent := generateTestData(tc.dataSize, tc.pattern)
130+
t.Logf("Generated %d bytes (%.2f KB) of test data", tc.dataSize, float64(tc.dataSize)/float64(KB))
131+
132+
server := createMockJenkinsServer(tc.jobName, tc.buildNumber, logContent)
133+
defer server.Close()
134+
135+
s := new(Source)
136+
conn, err := anypb.New(&sourcespb.Jenkins{
137+
Endpoint: server.URL,
138+
Credential: &sourcespb.Jenkins_BasicAuth{
139+
BasicAuth: &credentialspb.BasicAuth{
140+
Username: "testuser",
141+
Password: "testpass",
142+
},
143+
},
144+
})
145+
require.NoError(t, err)
146+
147+
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
148+
defer cancel()
149+
150+
err = s.Init(ctx, "test-jenkins-"+tc.name, 0, 1, false, conn, runtime.NumCPU())
151+
require.NoError(t, err)
152+
153+
jobs, err := s.GetJenkinsJobs(ctx)
154+
require.NoError(t, err)
155+
require.NotEmpty(t, jobs.Jobs, "No jobs found. This indicates a mock server setup issue.")
156+
157+
chunksChan := make(chan *sources.Chunk, 200)
158+
done := make(chan error, 1)
159+
160+
go func() {
161+
defer close(chunksChan)
162+
done <- s.Chunks(ctx, chunksChan)
163+
}()
164+
165+
var chunks []*sources.Chunk
166+
var totalDataSize int
167+
maxChunkSize := 0
168+
for chunk := range chunksChan {
169+
chunks = append(chunks, chunk)
170+
totalDataSize += len(chunk.Data)
171+
if len(chunk.Data) > maxChunkSize {
172+
maxChunkSize = len(chunk.Data)
173+
}
174+
}
175+
176+
require.NoError(t, <-done)
177+
require.NotEmpty(t, chunks, "No chunks were received.")
178+
179+
// Verify that large logs are actually being split into multiple chunks.
180+
// This catches regressions where chunking logic might not be working.
181+
// Data larger than a single chunk should result in multiple chunks.
182+
if tc.dataSize > sources.ChunkSize && len(chunks) <= 1 {
183+
t.Logf("Got only %d chunk for data size %d bytes (chunk size: %d bytes), may indicate chunking not working as expected",
184+
len(chunks), tc.dataSize, sources.ChunkSize)
185+
}
186+
187+
// Ensure no individual chunk exceeds the maximum allowed size.
188+
// This validates that the chunking mechanism respects size limits.
189+
assert.LessOrEqual(t, maxChunkSize, sources.TotalChunkSize,
190+
"Found chunk larger than expected: %d bytes (max expected %d bytes)",
191+
maxChunkSize, sources.TotalChunkSize)
192+
193+
// Validate data integrity by checking that total output matches input size.
194+
// Lower bound ensures no data loss; upper bound catches excessive duplication
195+
// from overlapping peek data between adjacent chunks.
196+
assert.GreaterOrEqual(t, totalDataSize, tc.dataSize,
197+
"Total data size %d is less than original %d - suggests data loss",
198+
totalDataSize, tc.dataSize)
199+
assert.LessOrEqual(t, totalDataSize, tc.dataSize*3,
200+
"Total data size %d is much larger than original %d - suggests excessive duplication",
201+
totalDataSize, tc.dataSize)
202+
203+
chunk := chunks[0]
204+
assert.Equal(t, "test-jenkins-"+tc.name, chunk.SourceName)
205+
206+
jenkinsMetadata := chunk.SourceMetadata.GetJenkins()
207+
require.NotNil(t, jenkinsMetadata, "Missing Jenkins metadata")
208+
assert.Equal(t, tc.jobName, jenkinsMetadata.ProjectName)
209+
assert.Equal(t, int64(tc.buildNumber), jenkinsMetadata.BuildNumber)
210+
211+
expectedLink := fmt.Sprintf("%s/job/%s/%d/consoleText", server.URL, tc.jobName, tc.buildNumber)
212+
assert.Equal(t, expectedLink, jenkinsMetadata.Link)
213+
})
214+
}
215+
}
216+
217+
// TestJenkinsChunkBuildDirect tests the chunkBuild method in isolation to verify
218+
// that build log chunking works correctly without the overhead of the full source
219+
// initialization and job discovery process.
220+
func TestJenkinsChunkBuildDirect(t *testing.T) {
221+
// Use a size that will definitely require chunking to test the splitting logic.
222+
largeLogContent := generateTestData(500*KB, "Line with secret: api_key=sk-123abc456def\n")
223+
224+
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
225+
if strings.HasSuffix(r.URL.Path, "/consoleText") {
226+
w.Header().Set("Content-Type", "text/plain")
227+
fmt.Fprint(w, largeLogContent)
228+
} else {
229+
http.NotFound(w, r)
230+
}
231+
}))
232+
defer server.Close()
233+
234+
s := new(Source)
235+
conn, err := anypb.New(&sourcespb.Jenkins{
236+
Endpoint: server.URL,
237+
Credential: &sourcespb.Jenkins_BasicAuth{
238+
BasicAuth: &credentialspb.BasicAuth{Username: "test", Password: "test"},
239+
},
240+
})
241+
require.NoError(t, err)
242+
243+
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
244+
defer cancel()
245+
246+
err = s.Init(ctx, "test-chunk-build", 0, 1, false, conn, runtime.NumCPU())
247+
require.NoError(t, err)
248+
249+
mockBuild := JenkinsBuild{
250+
Number: 123,
251+
Url: server.URL + "/job/test-project/123/",
252+
}
253+
254+
chunksChan := make(chan *sources.Chunk, 200)
255+
256+
go func() {
257+
defer close(chunksChan)
258+
err := s.chunkBuild(ctx, mockBuild, "test-project", chunksChan)
259+
assert.NoError(t, err)
260+
}()
261+
262+
var chunks []*sources.Chunk
263+
var totalDataSize int
264+
maxChunkSize := 0
265+
for chunk := range chunksChan {
266+
chunks = append(chunks, chunk)
267+
totalDataSize += len(chunk.Data)
268+
if len(chunk.Data) > maxChunkSize {
269+
maxChunkSize = len(chunk.Data)
270+
}
271+
}
272+
273+
require.NotEmpty(t, chunks, "No chunks were received from chunkBuild.")
274+
275+
assert.LessOrEqual(t, maxChunkSize, sources.TotalChunkSize,
276+
"Found chunk larger than expected: %d bytes (max expected %d bytes)",
277+
maxChunkSize, sources.TotalChunkSize)
278+
279+
// Ensure that direct chunking maintains data integrity with the same
280+
// bounds checking as the full integration test.
281+
originalSize := len(largeLogContent)
282+
assert.GreaterOrEqual(t, totalDataSize, originalSize,
283+
"Total data size %d is less than original %d - suggests data loss",
284+
totalDataSize, originalSize)
285+
assert.LessOrEqual(t, totalDataSize, originalSize*3,
286+
"Total data size %d is much larger than original %d - suggests excessive duplication",
287+
totalDataSize, originalSize)
288+
289+
chunk := chunks[0]
290+
assert.Equal(t, "test-chunk-build", chunk.SourceName)
291+
292+
jenkinsMetadata := chunk.SourceMetadata.GetJenkins()
293+
require.NotNil(t, jenkinsMetadata, "Missing Jenkins metadata")
294+
assert.Equal(t, "test-project", jenkinsMetadata.ProjectName)
295+
assert.Equal(t, int64(123), jenkinsMetadata.BuildNumber)
296+
297+
expectedLink := server.URL + "/job/test-project/123/consoleText"
298+
assert.Equal(t, expectedLink, jenkinsMetadata.Link)
299+
}

0 commit comments

Comments
 (0)