Skip to content

Commit 4ad4b1e

Browse files
authored
[receiver/github] add job queue spans (#39511)
#### Description The current implementation of the GitHub receiver doesn't capture directly the time a job is stuck in queue. The queue duration is a solid indicator of issues in the build system and necessary for good diagnosis. This pull request adds queue spans for the workflows jobs (workflow runs have to be handled separately) and a new `cicd.pipeline.run.queue.duration` attribute associated with each queue span. It predates finalization of the semantic conventions, but is in line with current WIP. #### Link to tracking issue Relates to #39081 #### Testing Updated golden tests. Screenshot of actual traces below. ![image](https://github.com/user-attachments/assets/e1d8dc12-4b9e-4171-ab66-606592bb9f4e) #### Documentation n/a
1 parent 81a426a commit 4ad4b1e

File tree

4 files changed

+105
-29
lines changed

4 files changed

+105
-29
lines changed

.chloggen/gh-queue-spans.yaml

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# Use this changelog template to create an entry for release notes.
2+
3+
# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
4+
change_type: enhancement
5+
6+
# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
7+
component: githubreceiver
8+
9+
# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
10+
note: add dedicated job queue spans with cicd.pipeline.run.queue.duration attribute
11+
12+
# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
13+
issues: [39081]
14+
15+
# (Optional) One or more lines of additional information to render under the primary note.
16+
# These lines will be padded with 2 spaces and then inserted directly into the document.
17+
# Use pipe (|) for multiline entries.
18+
subtext:
19+
20+
# If your change doesn't affect end users or the exported elements of any package,
21+
# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
22+
# Optional: The change log or logs in which this entry should be included.
23+
# e.g. '[user]' or '[user, api]'
24+
# Include 'user' if the change is relevant to end users.
25+
# Include 'api' if there is a change to a library API.
26+
# Default: '[user]'
27+
change_logs: []

receiver/githubreceiver/model.go

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -94,12 +94,13 @@ const (
9494
AttributeCICDPipelineTaskRunSenderLogin = "cicd.pipeline.task.run.sender.login" // GitHub's Task Sender Login
9595
AttributeCICDPipelineFilePath = "cicd.pipeline.file.path" // GitHub's Path in workflow_run
9696
AttributeCICDPipelinePreviousAttemptURLFull = "cicd.pipeline.run.previous_attempt.url.full"
97-
AttributeCICDPipelineWorkerID = "cicd.pipeline.worker.id" // GitHub's Runner ID
98-
AttributeCICDPipelineWorkerGroupID = "cicd.pipeline.worker.group.id" // GitHub's Runner Group ID
99-
AttributeCICDPipelineWorkerName = "cicd.pipeline.worker.name" // GitHub's Runner Name
100-
AttributeCICDPipelineWorkerGroupName = "cicd.pipeline.worker.group.name" // GitHub's Runner Group Name
101-
AttributeCICDPipelineWorkerNodeID = "cicd.pipeline.worker.node.id" // GitHub's Runner Node ID
102-
AttributeCICDPipelineWorkerLabels = "cicd.pipeline.worker.labels" // GitHub's Runner Labels
97+
AttributeCICDPipelineWorkerID = "cicd.pipeline.worker.id" // GitHub's Runner ID
98+
AttributeCICDPipelineWorkerGroupID = "cicd.pipeline.worker.group.id" // GitHub's Runner Group ID
99+
AttributeCICDPipelineWorkerName = "cicd.pipeline.worker.name" // GitHub's Runner Name
100+
AttributeCICDPipelineWorkerGroupName = "cicd.pipeline.worker.group.name" // GitHub's Runner Group Name
101+
AttributeCICDPipelineWorkerNodeID = "cicd.pipeline.worker.node.id" // GitHub's Runner Node ID
102+
AttributeCICDPipelineWorkerLabels = "cicd.pipeline.worker.labels" // GitHub's Runner Labels
103+
AttributeCICDPipelineRunQueueDuration = "cicd.pipeline.run.queue.duration" // GitHub's Queue Duration
103104

104105
// The following attributes are exclusive to GitHub but not listed under
105106
// Vendor Extensions within Semantic Conventions yet.

receiver/githubreceiver/testdata/workflow-job-expected.yaml

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -57,16 +57,30 @@ resourceSpans:
5757
scopeSpans:
5858
- scope: {}
5959
spans:
60-
- endTimeUnixNano: "1744837823000000000"
60+
- endTimeUnixNano: "1744837825000000000"
6161
kind: 2
6262
name: test (1.23)
6363
parentSpanId: aba151af7cfbcf0f
6464
spanId: e68a36b3514c8634
65-
startTimeUnixNano: "1744837742000000000"
65+
startTimeUnixNano: "1744837738000000000"
6666
status:
6767
code: 1
6868
message: success
6969
traceId: 731ec8a47fd7450f753a812a4a8aa5a0
70+
- scope: {}
71+
spans:
72+
- attributes:
73+
- key: cicd.pipeline.run.queue.duration
74+
value:
75+
doubleValue: 5e+09
76+
endTimeUnixNano: "1744837743000000000"
77+
kind: 2
78+
name: queue-test (1.23)
79+
parentSpanId: e68a36b3514c8634
80+
spanId: d328504edfc4f5dc
81+
startTimeUnixNano: "1744837738000000000"
82+
status: {}
83+
traceId: 731ec8a47fd7450f753a812a4a8aa5a0
7084
- scope: {}
7185
spans:
7286
- attributes:
@@ -79,7 +93,7 @@ resourceSpans:
7993
endTimeUnixNano: "1744837744000000000"
8094
kind: 2
8195
name: Set up job
82-
parentSpanId: e68a36b3514c8634
96+
parentSpanId: d328504edfc4f5dc
8397
spanId: 103b52e85d34ae1c
8498
startTimeUnixNano: "1744837742000000000"
8599
status:
@@ -98,7 +112,7 @@ resourceSpans:
98112
endTimeUnixNano: "1744837745000000000"
99113
kind: 2
100114
name: Run actions/checkout@v4
101-
parentSpanId: e68a36b3514c8634
115+
parentSpanId: d328504edfc4f5dc
102116
spanId: b0a2cdd89199b9a5
103117
startTimeUnixNano: "1744837744000000000"
104118
status:
@@ -117,7 +131,7 @@ resourceSpans:
117131
endTimeUnixNano: "1744837745000000000"
118132
kind: 2
119133
name: Set up Go
120-
parentSpanId: e68a36b3514c8634
134+
parentSpanId: d328504edfc4f5dc
121135
spanId: c04460ffd6bf3917
122136
startTimeUnixNano: "1744837745000000000"
123137
status:
@@ -136,7 +150,7 @@ resourceSpans:
136150
endTimeUnixNano: "1744837820000000000"
137151
kind: 2
138152
name: Make test-all
139-
parentSpanId: e68a36b3514c8634
153+
parentSpanId: d328504edfc4f5dc
140154
spanId: 1684627ec86a096c
141155
startTimeUnixNano: "1744837746000000000"
142156
status:
@@ -155,7 +169,7 @@ resourceSpans:
155169
endTimeUnixNano: "1744837823000000000"
156170
kind: 2
157171
name: Upload coverage to Codecov
158-
parentSpanId: e68a36b3514c8634
172+
parentSpanId: d328504edfc4f5dc
159173
spanId: 25aeeca484f9f7e5
160174
startTimeUnixNano: "1744837820000000000"
161175
status:
@@ -174,7 +188,7 @@ resourceSpans:
174188
endTimeUnixNano: "1744837823000000000"
175189
kind: 2
176190
name: Post Set up Go
177-
parentSpanId: e68a36b3514c8634
191+
parentSpanId: d328504edfc4f5dc
178192
spanId: 0715a13bb338e46e
179193
startTimeUnixNano: "1744837823000000000"
180194
status:
@@ -193,7 +207,7 @@ resourceSpans:
193207
endTimeUnixNano: "1744837825000000000"
194208
kind: 2
195209
name: Post Run actions/checkout@v4
196-
parentSpanId: e68a36b3514c8634
210+
parentSpanId: d328504edfc4f5dc
197211
spanId: e231dce10420723b
198212
startTimeUnixNano: "1744837825000000000"
199213
status:
@@ -212,7 +226,7 @@ resourceSpans:
212226
endTimeUnixNano: "1744837823000000000"
213227
kind: 2
214228
name: Complete job
215-
parentSpanId: e68a36b3514c8634
229+
parentSpanId: d328504edfc4f5dc
216230
spanId: bb94c07ba3717632
217231
startTimeUnixNano: "1744837823000000000"
218232
status:

receiver/githubreceiver/trace_event_handling.go

Lines changed: 47 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,13 @@ func (gtr *githubTracesReceiver) handleWorkflowJob(e *github.WorkflowJobEvent) (
6767
return ptrace.Traces{}, errors.New("failed to create parent span")
6868
}
6969

70-
err = gtr.createStepSpans(r, e, traceID, parentID)
70+
queueSpanID, err := gtr.createJobQueueSpan(r, e, traceID, parentID)
71+
if err != nil {
72+
gtr.logger.Sugar().Error("failed to create job queue span", zap.Error(err))
73+
return ptrace.Traces{}, errors.New("failed to create job queue span")
74+
}
75+
76+
err = gtr.createStepSpans(r, e, traceID, queueSpanID)
7177
if err != nil {
7278
gtr.logger.Sugar().Error("failed to create step spans", zap.Error(err))
7379
return ptrace.Traces{}, errors.New("failed to create step spans")
@@ -190,18 +196,8 @@ func (gtr *githubTracesReceiver) createParentSpan(
190196
span.SetName(event.GetWorkflowJob().GetName())
191197
span.SetKind(ptrace.SpanKindServer)
192198

193-
// Workflow Job event start times provided by GitHub do not always match the
194-
// start time of the actual job. Generally they are reported a second after
195-
// the actual step start time. Thus, we use the first step start time as the
196-
// span start time while using the normal completedAt time for the end time.
197-
steps := event.GetWorkflowJob().Steps
198-
if len(steps) > 0 {
199-
span.SetStartTimestamp(pcommon.NewTimestampFromTime(steps[0].GetStartedAt().Time))
200-
span.SetEndTimestamp(pcommon.NewTimestampFromTime(steps[len(steps)-1].GetCompletedAt().Time))
201-
} else {
202-
span.SetStartTimestamp(pcommon.NewTimestampFromTime(event.GetWorkflowJob().GetStartedAt().Time))
203-
span.SetEndTimestamp(pcommon.NewTimestampFromTime(event.GetWorkflowJob().GetCompletedAt().Time))
204-
}
199+
span.SetStartTimestamp(pcommon.NewTimestampFromTime(event.GetWorkflowJob().GetCreatedAt().Time))
200+
span.SetEndTimestamp(pcommon.NewTimestampFromTime(event.GetWorkflowJob().GetCompletedAt().Time))
205201

206202
switch strings.ToLower(event.WorkflowJob.GetConclusion()) {
207203
case "success":
@@ -361,3 +357,41 @@ func newStepSpanID(runID int64, runAttempt int, jobName string, stepName string,
361357

362358
return spanID, nil
363359
}
360+
361+
// createJobQueueSpan creates a span for the job queue based on the provided
362+
// event by using the delta between the job created and completed times.
363+
func (gtr *githubTracesReceiver) createJobQueueSpan(
364+
resourceSpans ptrace.ResourceSpans,
365+
event *github.WorkflowJobEvent,
366+
traceID pcommon.TraceID,
367+
parentSpanID pcommon.SpanID,
368+
) (pcommon.SpanID, error) {
369+
scopeSpans := resourceSpans.ScopeSpans().AppendEmpty()
370+
span := scopeSpans.Spans().AppendEmpty()
371+
jobName := event.GetWorkflowJob().GetName()
372+
spanName := fmt.Sprintf("queue-%s", jobName)
373+
374+
span.SetName(spanName)
375+
span.SetKind(ptrace.SpanKindServer)
376+
span.SetTraceID(traceID)
377+
span.SetParentSpanID(parentSpanID)
378+
379+
runID := event.GetWorkflowJob().GetRunID()
380+
runAttempt := int(event.GetWorkflowJob().GetRunAttempt())
381+
spanID, err := newStepSpanID(runID, runAttempt, jobName, spanName, 1)
382+
if err != nil {
383+
return pcommon.SpanID{}, fmt.Errorf("failed to generate step span ID: %w", err)
384+
}
385+
386+
span.SetSpanID(spanID)
387+
388+
time := event.GetWorkflowJob().GetStartedAt().Sub(event.GetWorkflowJob().GetCreatedAt().Time)
389+
390+
attrs := span.Attributes()
391+
attrs.PutDouble(AttributeCICDPipelineRunQueueDuration, float64(time.Nanoseconds()))
392+
393+
span.SetStartTimestamp(pcommon.NewTimestampFromTime(event.GetWorkflowJob().GetCreatedAt().Time))
394+
span.SetEndTimestamp(pcommon.NewTimestampFromTime(event.GetWorkflowJob().GetStartedAt().Time))
395+
396+
return spanID, nil
397+
}

0 commit comments

Comments
 (0)