Skip to content

Commit 317eee0

Browse files
committed
feat: ditributed tracing span error
Signed-off-by: Jaeyeon Park <[email protected]>
1 parent d37e04c commit 317eee0

File tree

47 files changed

+1000
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+1000
-0
lines changed

experiments/aws-ssm/aws-ssm-chaos-by-id/experiment/aws-ssm-chaos-by-id.go

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,13 @@ import (
1818
"github.com/litmuschaos/litmus-go/pkg/types"
1919
"github.com/litmuschaos/litmus-go/pkg/utils/common"
2020
"github.com/sirupsen/logrus"
21+
"go.opentelemetry.io/otel/codes"
22+
"go.opentelemetry.io/otel/trace"
2123
)
2224

2325
// AWSSSMChaosByID inject the ssm chaos on ec2 instance
2426
func AWSSSMChaosByID(ctx context.Context, clients clients.ClientSets) {
27+
span := trace.SpanFromContext(ctx)
2528

2629
experimentsDetails := experimentTypes.ExperimentDetails{}
2730
resultDetails := types.ResultDetails{}
@@ -42,6 +45,8 @@ func AWSSSMChaosByID(ctx context.Context, clients clients.ClientSets) {
4245
// Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet
4346
if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil {
4447
log.Errorf("Unable to initialize the probes: %v", err)
48+
span.SetStatus(codes.Error, "Unable to initialize the probes")
49+
span.RecordError(err)
4550
return
4651
}
4752
}
@@ -51,6 +56,8 @@ func AWSSSMChaosByID(ctx context.Context, clients clients.ClientSets) {
5156
if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil {
5257
log.Errorf("Unable to create the chaosresult: %v", err)
5358
result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails)
59+
span.SetStatus(codes.Error, "Unable to create the chaosresult")
60+
span.RecordError(err)
5461
return
5562
}
5663

@@ -89,6 +96,8 @@ func AWSSSMChaosByID(ctx context.Context, clients clients.ClientSets) {
8996
log.Errorf("Failed to create %v event inside chaosengine", types.PreChaosCheck)
9097
}
9198
result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails)
99+
span.SetStatus(codes.Error, "Probe Failed")
100+
span.RecordError(err)
92101
return
93102
}
94103
msg = "AUT: Running, Probes: Successful"
@@ -104,6 +113,8 @@ func AWSSSMChaosByID(ctx context.Context, clients clients.ClientSets) {
104113
if err := ssm.CheckInstanceInformation(&experimentsDetails); err != nil {
105114
log.Errorf("Failed perform ssm api calls: %v", err)
106115
result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails)
116+
span.SetStatus(codes.Error, "Failed to perform ssm api calls")
117+
span.RecordError(err)
107118
return
108119
}
109120

@@ -112,6 +123,8 @@ func AWSSSMChaosByID(ctx context.Context, clients clients.ClientSets) {
112123
if err := ec2.InstanceStatusCheckByID(experimentsDetails.EC2InstanceID, experimentsDetails.Region); err != nil {
113124
log.Errorf("Failed to get the ec2 instance status: %v", err)
114125
result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails)
126+
span.SetStatus(codes.Error, "Failed to get the ec2 instance status")
127+
span.RecordError(err)
115128
return
116129
}
117130
log.Info("[Status]: EC2 instance is in running state")
@@ -129,6 +142,8 @@ func AWSSSMChaosByID(ctx context.Context, clients clients.ClientSets) {
129142
log.Errorf("Failed to delete ssm doc: %v", err)
130143
}
131144
}
145+
span.SetStatus(codes.Error, "Chaos injection failed")
146+
span.RecordError(err)
132147
return
133148
}
134149

@@ -142,6 +157,8 @@ func AWSSSMChaosByID(ctx context.Context, clients clients.ClientSets) {
142157
if err := ec2.InstanceStatusCheckByID(experimentsDetails.EC2InstanceID, experimentsDetails.Region); err != nil {
143158
log.Errorf("Failed to get the ec2 instance status: %v", err)
144159
result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails)
160+
span.SetStatus(codes.Error, "Failed to get the ec2 instance status")
161+
span.RecordError(err)
145162
return
146163
}
147164
log.Info("[Status]: EC2 instance is in running state (post chaos)")
@@ -161,6 +178,8 @@ func AWSSSMChaosByID(ctx context.Context, clients clients.ClientSets) {
161178
log.Errorf("Failed to create %v event inside chaosengine", types.PostChaosCheck)
162179
}
163180
result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails)
181+
span.SetStatus(codes.Error, "Probes Failed")
182+
span.RecordError(err)
164183
return
165184
}
166185
msg = "AUT: Running, Probes: Successful"
@@ -177,6 +196,8 @@ func AWSSSMChaosByID(ctx context.Context, clients clients.ClientSets) {
177196
log.Infof("[The End]: Updating the chaos result of %v experiment (EOT)", experimentsDetails.ExperimentName)
178197
if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil {
179198
log.Errorf("Unable to update the chaosresult: %v", err)
199+
span.SetStatus(codes.Error, "Unable to Update the Chaos Result")
200+
span.RecordError(err)
180201
return
181202
}
182203

experiments/aws-ssm/aws-ssm-chaos-by-tag/experiment/aws-ssm-chaos-by-tag.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,13 @@ import (
1818
"github.com/litmuschaos/litmus-go/pkg/types"
1919
"github.com/litmuschaos/litmus-go/pkg/utils/common"
2020
"github.com/sirupsen/logrus"
21+
"go.opentelemetry.io/otel/codes"
22+
"go.opentelemetry.io/otel/trace"
2123
)
2224

2325
// AWSSSMChaosByTag inject the ssm chaos on ec2 instance
2426
func AWSSSMChaosByTag(ctx context.Context, clients clients.ClientSets) {
27+
span := trace.SpanFromContext(ctx)
2528

2629
experimentsDetails := experimentTypes.ExperimentDetails{}
2730
resultDetails := types.ResultDetails{}
@@ -42,6 +45,8 @@ func AWSSSMChaosByTag(ctx context.Context, clients clients.ClientSets) {
4245
// Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet
4346
if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil {
4447
log.Errorf("Unable to initialize the probes: %v", err)
48+
span.SetStatus(codes.Error, "Unable to initialize the probes")
49+
span.RecordError(err)
4550
return
4651
}
4752
}
@@ -51,6 +56,8 @@ func AWSSSMChaosByTag(ctx context.Context, clients clients.ClientSets) {
5156
if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil {
5257
log.Errorf("Unable to create the chaosresult: %v", err)
5358
result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails)
59+
span.SetStatus(codes.Error, "Unable to create the chaosresult")
60+
span.RecordError(err)
5461
return
5562
}
5663

@@ -79,6 +86,8 @@ func AWSSSMChaosByTag(ctx context.Context, clients clients.ClientSets) {
7986
if err := ssm.CheckInstanceInformation(&experimentsDetails); err != nil {
8087
log.Errorf("Target instance status check failed: %v", err)
8188
result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails)
89+
span.SetStatus(codes.Error, "Target instance status check failed")
90+
span.RecordError(err)
8291
return
8392
}
8493

@@ -97,6 +106,8 @@ func AWSSSMChaosByTag(ctx context.Context, clients clients.ClientSets) {
97106
log.Errorf("Failed to create %v event inside chaosengine", types.PreChaosCheck)
98107
}
99108
result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails)
109+
span.SetStatus(codes.Error, "Probe Failed")
110+
span.RecordError(err)
100111
return
101112
}
102113
msg = "AUT: Running, Probes: Successful"
@@ -120,6 +131,8 @@ func AWSSSMChaosByTag(ctx context.Context, clients clients.ClientSets) {
120131
log.Errorf("Failed to delete ssm document: %v", err)
121132
}
122133
}
134+
span.SetStatus(codes.Error, "Chaos injection failed")
135+
span.RecordError(err)
123136
return
124137
}
125138

@@ -133,6 +146,8 @@ func AWSSSMChaosByTag(ctx context.Context, clients clients.ClientSets) {
133146
if err := ec2.InstanceStatusCheck(experimentsDetails.TargetInstanceIDList, experimentsDetails.Region); err != nil {
134147
log.Errorf("Failed to get the ec2 instance status: %v", err)
135148
result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails)
149+
span.SetStatus(codes.Error, "Failed to get the ec2 instance status")
150+
span.RecordError(err)
136151
return
137152
}
138153
log.Info("[Status]: EC2 instance is in running state (post chaos)")
@@ -152,6 +167,8 @@ func AWSSSMChaosByTag(ctx context.Context, clients clients.ClientSets) {
152167
log.Errorf("Failed to create %v event inside chaosengine", types.PostChaosCheck)
153168
}
154169
result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails)
170+
span.SetStatus(codes.Error, "Probes Failed")
171+
span.RecordError(err)
155172
return
156173
}
157174
msg = "AUT: Running, Probes: Successful"
@@ -167,6 +184,8 @@ func AWSSSMChaosByTag(ctx context.Context, clients clients.ClientSets) {
167184
log.Infof("[The End]: Updating the chaos result of %v experiment (EOT)", experimentsDetails.ExperimentName)
168185
if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil {
169186
log.Errorf("Unable to update the chaosresult: %v", err)
187+
span.SetStatus(codes.Error, "Unable to Update the Chaos Result")
188+
span.RecordError(err)
170189
return
171190
}
172191

experiments/azure/azure-disk-loss/experiment/azure-disk-loss.go

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,13 @@ import (
1818
"github.com/litmuschaos/litmus-go/pkg/types"
1919
"github.com/litmuschaos/litmus-go/pkg/utils/common"
2020
"github.com/sirupsen/logrus"
21+
"go.opentelemetry.io/otel/codes"
22+
"go.opentelemetry.io/otel/trace"
2123
)
2224

2325
// AzureDiskLoss contains steps to inject chaos
2426
func AzureDiskLoss(ctx context.Context, clients clients.ClientSets) {
27+
span := trace.SpanFromContext(ctx)
2528

2629
var err error
2730
experimentsDetails := experimentTypes.ExperimentDetails{}
@@ -43,6 +46,8 @@ func AzureDiskLoss(ctx context.Context, clients clients.ClientSets) {
4346
// Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet
4447
if err = types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil {
4548
log.Errorf("Unable to initialize the probes: %v", err)
49+
span.SetStatus(codes.Error, "Unable to initialize the probes")
50+
span.RecordError(err)
4651
return
4752
}
4853
}
@@ -52,6 +57,8 @@ func AzureDiskLoss(ctx context.Context, clients clients.ClientSets) {
5257
if err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil {
5358
log.Errorf("Unable to create the chaosresult: %v", err)
5459
result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails)
60+
span.SetStatus(codes.Error, "Unable to create the chaosresult")
61+
span.RecordError(err)
5562
return
5663
}
5764

@@ -80,6 +87,8 @@ func AzureDiskLoss(ctx context.Context, clients clients.ClientSets) {
8087
if experimentsDetails.SubscriptionID, err = azureCommon.GetSubscriptionID(); err != nil {
8188
log.Errorf("fail to get the subscription id: %v", err)
8289
result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails)
90+
span.SetStatus(codes.Error, "fail to get the subscription id")
91+
span.RecordError(err)
8392
return
8493
}
8594

@@ -89,6 +98,8 @@ func AzureDiskLoss(ctx context.Context, clients clients.ClientSets) {
8998
if err = azureStatus.CheckVirtualDiskWithInstance(experimentsDetails.SubscriptionID, experimentsDetails.VirtualDiskNames, experimentsDetails.ResourceGroup); err != nil {
9099
log.Errorf("Virtual disk status check failed: %v", err)
91100
result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails)
101+
span.SetStatus(codes.Error, "Virtual disk status check failed")
102+
span.RecordError(err)
92103
return
93104
}
94105
}
@@ -108,6 +119,8 @@ func AzureDiskLoss(ctx context.Context, clients clients.ClientSets) {
108119
log.Errorf("Failed to create %v event inside chaosengine", types.PreChaosCheck)
109120
}
110121
result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails)
122+
span.SetStatus(codes.Error, "Probe Failed")
123+
span.RecordError(err)
111124
return
112125
}
113126
msg = "AUT: Running, Probes: Successful"
@@ -124,6 +137,8 @@ func AzureDiskLoss(ctx context.Context, clients clients.ClientSets) {
124137
if err = litmusLIB.PrepareChaos(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil {
125138
result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails)
126139
log.Errorf("Chaos injection failed: %v", err)
140+
span.SetStatus(codes.Error, "Chaos injection failed")
141+
span.RecordError(err)
127142
return
128143
}
129144

@@ -138,6 +153,8 @@ func AzureDiskLoss(ctx context.Context, clients clients.ClientSets) {
138153
if err = azureStatus.CheckVirtualDiskWithInstance(experimentsDetails.SubscriptionID, experimentsDetails.VirtualDiskNames, experimentsDetails.ResourceGroup); err != nil {
139154
log.Errorf("Virtual disk status check failed: %v", err)
140155
result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails)
156+
span.SetStatus(codes.Error, "Virtual disk status check failed")
157+
span.RecordError(err)
141158
return
142159
}
143160
}
@@ -156,6 +173,8 @@ func AzureDiskLoss(ctx context.Context, clients clients.ClientSets) {
156173
log.Errorf("Failed to create %v event inside chaosengine", types.PostChaosCheck)
157174
}
158175
result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails)
176+
span.SetStatus(codes.Error, "Probes Failed")
177+
span.RecordError(err)
159178
return
160179
}
161180
msg = "AUT: Running, Probes: Successful"
@@ -172,6 +191,8 @@ func AzureDiskLoss(ctx context.Context, clients clients.ClientSets) {
172191
log.Infof("[The End]: Updating the chaos result of %v experiment (EOT)", experimentsDetails.ExperimentName)
173192
if err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil {
174193
log.Errorf("Unable to update the chaosresult: %v", err)
194+
span.SetStatus(codes.Error, "Unable to Update the chaosresult")
195+
span.RecordError(err)
175196
return
176197
}
177198

experiments/azure/instance-stop/experiment/azure-instance-stop.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ import (
1111
"github.com/litmuschaos/litmus-go/pkg/clients"
1212
azureCommon "github.com/litmuschaos/litmus-go/pkg/cloud/azure/common"
1313
azureStatus "github.com/litmuschaos/litmus-go/pkg/cloud/azure/instance"
14+
"go.opentelemetry.io/otel/codes"
15+
"go.opentelemetry.io/otel/trace"
1416

1517
"github.com/litmuschaos/litmus-go/pkg/events"
1618
"github.com/litmuschaos/litmus-go/pkg/log"
@@ -23,6 +25,7 @@ import (
2325

2426
// AzureInstanceStop inject the azure instance stop chaos
2527
func AzureInstanceStop(ctx context.Context, clients clients.ClientSets) {
28+
span := trace.SpanFromContext(ctx)
2629

2730
var err error
2831
experimentsDetails := experimentTypes.ExperimentDetails{}
@@ -44,6 +47,8 @@ func AzureInstanceStop(ctx context.Context, clients clients.ClientSets) {
4447
// Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet
4548
if err = types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil {
4649
log.Errorf("Unable to initialize the probes: %v", err)
50+
span.SetStatus(codes.Error, "Unable to initialize the probes")
51+
span.RecordError(err)
4752
}
4853
}
4954

@@ -53,6 +58,8 @@ func AzureInstanceStop(ctx context.Context, clients clients.ClientSets) {
5358
if err != nil {
5459
log.Errorf("Unable to create the chaosresult: %v", err)
5560
result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails)
61+
span.SetStatus(codes.Error, "Unable to create the chaosresult")
62+
span.RecordError(err)
5663
return
5764
}
5865

@@ -74,6 +81,8 @@ func AzureInstanceStop(ctx context.Context, clients clients.ClientSets) {
7481
if experimentsDetails.SubscriptionID, err = azureCommon.GetSubscriptionID(); err != nil {
7582
log.Errorf("Failed to get the subscription id: %v", err)
7683
result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails)
84+
span.SetStatus(codes.Error, "fail to get the subscription id")
85+
span.RecordError(err)
7786
return
7887
}
7988

@@ -100,6 +109,8 @@ func AzureInstanceStop(ctx context.Context, clients clients.ClientSets) {
100109
log.Errorf("Failed to create %v event inside chaosengine", types.PreChaosCheck)
101110
}
102111
result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails)
112+
span.SetStatus(codes.Error, "Probe Failed")
113+
span.RecordError(err)
103114
return
104115
}
105116
msg = "AUT: Running, Probes: Successful"
@@ -116,6 +127,8 @@ func AzureInstanceStop(ctx context.Context, clients clients.ClientSets) {
116127
if err = azureStatus.InstanceStatusCheckByName(experimentsDetails.AzureInstanceNames, experimentsDetails.ScaleSet, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup); err != nil {
117128
log.Errorf("Azure instance status check failed: %v", err)
118129
result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails)
130+
span.SetStatus(codes.Error, "Azure instance status check failed")
131+
span.RecordError(err)
119132
return
120133
}
121134
log.Info("[Status]: Azure instance(s) is in running state (pre-chaos)")
@@ -126,6 +139,8 @@ func AzureInstanceStop(ctx context.Context, clients clients.ClientSets) {
126139
if err = litmusLIB.PrepareAzureStop(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil {
127140
log.Errorf("Chaos injection failed: %v", err)
128141
result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails)
142+
span.SetStatus(codes.Error, "Chaos injection failed")
143+
span.RecordError(err)
129144
return
130145
}
131146

@@ -139,6 +154,8 @@ func AzureInstanceStop(ctx context.Context, clients clients.ClientSets) {
139154
if err = azureStatus.InstanceStatusCheckByName(experimentsDetails.AzureInstanceNames, experimentsDetails.ScaleSet, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup); err != nil {
140155
log.Errorf("Azure instance status check failed: %v", err)
141156
result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails)
157+
span.SetStatus(codes.Error, "Azure instance status check failed")
158+
span.RecordError(err)
142159
return
143160
}
144161
log.Info("[Status]: Azure instance is in running state (post chaos)")
@@ -159,6 +176,8 @@ func AzureInstanceStop(ctx context.Context, clients clients.ClientSets) {
159176
log.Errorf("Failed to create %v event inside chaosengine", types.PostChaosCheck)
160177
}
161178
result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails)
179+
span.SetStatus(codes.Error, "Probes Failed")
180+
span.RecordError(err)
162181
return
163182
}
164183
msg = "AUT: Running, Probes: Successful"

0 commit comments

Comments
 (0)