From 86ab69f36c78849a2707c7b1ea8f58f211358fb3 Mon Sep 17 00:00:00 2001 From: Suhyen Im Date: Fri, 20 Dec 2024 17:46:24 +0900 Subject: [PATCH 1/2] feat: add span attributes for pod-delete Signed-off-by: Suhyen Im --- bin/experiment/experiment.go | 3 +++ chaoslib/litmus/pod-delete/lib/pod-delete.go | 9 +++++++++ pkg/probe/probe.go | 21 +++++++++++++++----- 3 files changed, 28 insertions(+), 5 deletions(-) diff --git a/bin/experiment/experiment.go b/bin/experiment/experiment.go index ef01e1f2f..ca09cc370 100755 --- a/bin/experiment/experiment.go +++ b/bin/experiment/experiment.go @@ -4,6 +4,7 @@ import ( "context" "errors" "flag" + "go.opentelemetry.io/otel/attribute" "os" // Uncomment to load all auth plugins @@ -109,6 +110,8 @@ func main() { return } + span.SetAttributes(attribute.String("experiment.name", *experimentName)) + log.Infof("Experiment Name: %v", *experimentName) // invoke the corresponding experiment based on the (-name) flag diff --git a/chaoslib/litmus/pod-delete/lib/pod-delete.go b/chaoslib/litmus/pod-delete/lib/pod-delete.go index aa4fec6e8..127e53b1f 100644 --- a/chaoslib/litmus/pod-delete/lib/pod-delete.go +++ b/chaoslib/litmus/pod-delete/lib/pod-delete.go @@ -3,6 +3,7 @@ package lib import ( "context" "fmt" + "go.opentelemetry.io/otel/attribute" "strconv" "strings" "time" @@ -113,6 +114,10 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment //Deleting the application pod for _, pod := range targetPodList.Items { + span.SetAttributes( + attribute.String("pod.name", pod.Name), + attribute.String("pod.namespace", pod.Namespace), + ) log.InfoWithValues("[Info]: Killing the following pods", logrus.Fields{ "PodName": pod.Name}) @@ -211,6 +216,10 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime //Deleting the application pod for _, pod := range targetPodList.Items { + span.SetAttributes( + attribute.String("pod.name", pod.Name), + attribute.String("pod.namespace", pod.Namespace), + ) log.InfoWithValues("[Info]: Killing the following pods", logrus.Fields{ "PodName": pod.Name}) diff --git a/pkg/probe/probe.go b/pkg/probe/probe.go index fe6e1a271..3800b6c6e 100644 --- a/pkg/probe/probe.go +++ b/pkg/probe/probe.go @@ -4,6 +4,8 @@ import ( "bytes" "context" "fmt" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" "html/template" "strings" "time" @@ -35,13 +37,15 @@ func RunProbes(ctx context.Context, chaosDetails *types.ChaosDetails, clients cl return err } + span.SetAttributes(attribute.String("probe.phase", phase)) + switch strings.ToLower(phase) { //execute probes for the prechaos phase case "prechaos": for _, probe := range probes { switch strings.ToLower(probe.Mode) { case "sot", "edge", "continuous": - if err := execute(probe, chaosDetails, clients, resultDetails, phase); err != nil { + if err := execute(ctx, probe, chaosDetails, clients, resultDetails, phase); err != nil { return err } } @@ -50,7 +54,7 @@ func RunProbes(ctx context.Context, chaosDetails *types.ChaosDetails, clients cl case "duringchaos": for _, probe := range probes { if strings.ToLower(probe.Mode) == "onchaos" { - if err := execute(probe, chaosDetails, clients, resultDetails, phase); err != nil { + if err := execute(ctx, probe, chaosDetails, clients, resultDetails, phase); err != nil { return err } } @@ -66,7 +70,7 @@ func RunProbes(ctx context.Context, chaosDetails *types.ChaosDetails, clients cl // evaluate continuous and onchaos probes switch strings.ToLower(probe.Mode) { case "onchaos", "continuous": - if err := execute(probe, chaosDetails, clients, resultDetails, phase); err != nil { + if err := execute(ctx, probe, chaosDetails, clients, resultDetails, phase); err != nil { probeError = append(probeError, stacktrace.RootCause(err).Error()) } } @@ -78,7 +82,7 @@ func RunProbes(ctx context.Context, chaosDetails *types.ChaosDetails, clients cl for _, probe := range probes { switch strings.ToLower(probe.Mode) { case "eot", "edge": - if err := execute(probe, chaosDetails, clients, resultDetails, phase); err != nil { + if err := execute(ctx, probe, chaosDetails, clients, resultDetails, phase); err != nil { return err } } @@ -330,7 +334,14 @@ func stopChaosEngine(probe v1alpha1.ProbeAttributes, clients clients.ClientSets, } // execute contains steps to execute & evaluate probes in different modes at different phases -func execute(probe v1alpha1.ProbeAttributes, chaosDetails *types.ChaosDetails, clients clients.ClientSets, resultDetails *types.ResultDetails, phase string) error { +func execute(ctx context.Context, probe v1alpha1.ProbeAttributes, chaosDetails *types.ChaosDetails, clients clients.ClientSets, resultDetails *types.ResultDetails, phase string) error { + span := trace.SpanFromContext(ctx) + span.SetAttributes( + attribute.String("probe.name", probe.Name), + attribute.String("probe.mode", probe.Mode), + attribute.String("probe.type", probe.Type), + ) + switch strings.ToLower(probe.Type) { case "k8sprobe": // it contains steps to prepare the k8s probe From 307a08437394cfe2c31cee1d40668fe5c9b13191 Mon Sep 17 00:00:00 2001 From: Suhyen Im Date: Sun, 23 Mar 2025 21:25:08 +0900 Subject: [PATCH 2/2] feat: add span attributes Signed-off-by: Suhyen Im --- bin/experiment/experiment.go | 3 +- bin/helper/helper.go | 2 + .../litmus/aws-ssm-chaos/lib/ssm-chaos.go | 26 +++++++++- .../lib/ssm/aws-ssm-chaos-by-id.go | 6 ++- .../lib/ssm/aws-ssm-chaos-by-tag.go | 6 ++- .../azure-disk-loss/lib/azure-disk-loss.go | 26 ++++++++-- .../lib/azure-instance-stop.go | 26 ++++++++-- .../container-kill/helper/container-kill.go | 6 +++ .../container-kill/lib/container-kill.go | 29 +++++++++-- chaoslib/litmus/disk-fill/helper/disk-fill.go | 3 ++ chaoslib/litmus/disk-fill/lib/disk-fill.go | 27 +++++++++-- .../lib/docker-service-kill.go | 20 +++++++- .../lib/ebs-loss-by-id/lib/ebs-loss-by-id.go | 6 ++- .../ebs-loss-by-tag/lib/ebs-loss-by-tag.go | 6 ++- chaoslib/litmus/ebs-loss/lib/ebs-loss.go | 24 +++++++++- .../lib/ec2-terminate-by-id.go | 26 ++++++++-- .../lib/ec2-terminate-by-tag.go | 26 ++++++++-- .../lib/gcp-vm-disk-loss-by-label.go | 28 +++++++++-- .../gcp-vm-disk-loss/lib/gcp-vm-disk-loss.go | 28 +++++++++-- .../lib/gcp-vm-instance-stop-by-label.go | 28 +++++++++-- .../lib/gcp-vm-instance-stop.go | 28 +++++++++-- .../litmus/http-chaos/helper/http-helper.go | 7 +++ .../litmus/http-chaos/lib/header/header.go | 6 ++- chaoslib/litmus/http-chaos/lib/http-chaos.go | 23 +++++++-- .../litmus/http-chaos/lib/latency/latency.go | 6 ++- .../http-chaos/lib/modify-body/modify-body.go | 6 ++- chaoslib/litmus/http-chaos/lib/reset/reset.go | 6 ++- .../http-chaos/lib/statuscode/status-code.go | 6 ++- chaoslib/litmus/k6-loadgen/lib/k6-loadgen.go | 22 +++++++-- .../lib/pod-delete.go | 20 ++++++-- .../lib/kubelet-service-kill.go | 20 +++++++- chaoslib/litmus/network-chaos/helper/netem.go | 7 +++ .../lib/corruption/corruption.go | 6 ++- .../lib/duplication/duplication.go | 6 ++- .../network-chaos/lib/latency/latency.go | 6 ++- .../litmus/network-chaos/lib/loss/loss.go | 6 ++- .../litmus/network-chaos/lib/network-chaos.go | 23 +++++++-- .../litmus/node-cpu-hog/lib/node-cpu-hog.go | 31 ++++++++++-- chaoslib/litmus/node-drain/lib/node-drain.go | 15 +++++- .../node-io-stress/lib/node-io-stress.go | 35 ++++++++++++-- .../node-memory-hog/lib/node-memory-hog.go | 33 +++++++++++-- .../litmus/node-restart/lib/node-restart.go | 21 +++++++- chaoslib/litmus/node-taint/lib/node-taint.go | 16 ++++++- .../pod-autoscaler/lib/pod-autoscaler.go | 6 ++- .../pod-cpu-hog-exec/lib/pod-cpu-hog-exec.go | 48 ++++++++++++++----- chaoslib/litmus/pod-delete/lib/pod-delete.go | 31 +++++++----- .../litmus/pod-dns-chaos/helper/dnschaos.go | 7 +++ .../litmus/pod-dns-chaos/lib/pod-dns-chaos.go | 27 +++++++++-- .../pod-fio-stress/lib/pod-fio-stress.go | 47 +++++++++++++----- .../lib/pod-memory-hog-exec.go | 47 +++++++++++++----- .../lib/pod-network-partition.go | 15 +++++- .../lib/redfish-node-restart.go | 13 ++++- .../lib/spring-boot-chaos.go | 26 ++++++++-- .../stress-chaos/helper/stress-helper.go | 4 +- .../litmus/stress-chaos/lib/stress-chaos.go | 27 +++++++++-- .../litmus/vm-poweroff/lib/vm-poweroff.go | 24 ++++++++-- pkg/probe/probe.go | 6 +-- 57 files changed, 876 insertions(+), 159 deletions(-) diff --git a/bin/experiment/experiment.go b/bin/experiment/experiment.go index ca09cc370..40dced230 100755 --- a/bin/experiment/experiment.go +++ b/bin/experiment/experiment.go @@ -103,6 +103,7 @@ func main() { // parse the experiment name experimentName := flag.String("name", "pod-delete", "name of the chaos experiment") + span.SetAttributes(attribute.String("experiment.name", *experimentName)) //Getting kubeConfig and Generate ClientSets if err := clients.GenerateClientSetFromKubeConfig(); err != nil { @@ -110,8 +111,6 @@ func main() { return } - span.SetAttributes(attribute.String("experiment.name", *experimentName)) - log.Infof("Experiment Name: %v", *experimentName) // invoke the corresponding experiment based on the (-name) flag diff --git a/bin/helper/helper.go b/bin/helper/helper.go index c2774a131..01775a965 100644 --- a/bin/helper/helper.go +++ b/bin/helper/helper.go @@ -4,6 +4,7 @@ import ( "context" "errors" "flag" + "go.opentelemetry.io/otel/attribute" "os" // Uncomment to load all auth plugins @@ -59,6 +60,7 @@ func main() { // parse the helper name helperName := flag.String("name", "", "name of the helper pod") + span.SetAttributes(attribute.String("helper.name", *helperName)) //Getting kubeConfig and Generate ClientSets if err := clients.GenerateClientSetFromKubeConfig(); err != nil { diff --git a/chaoslib/litmus/aws-ssm-chaos/lib/ssm-chaos.go b/chaoslib/litmus/aws-ssm-chaos/lib/ssm-chaos.go index 0a54489fd..334f43b19 100644 --- a/chaoslib/litmus/aws-ssm-chaos/lib/ssm-chaos.go +++ b/chaoslib/litmus/aws-ssm-chaos/lib/ssm-chaos.go @@ -2,6 +2,8 @@ package lib import ( "context" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" "os" "strings" "time" @@ -21,7 +23,17 @@ import ( // InjectChaosInSerialMode will inject the aws ssm chaos in serial mode that is one after other func InjectChaosInSerialMode(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, instanceIDList []string, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails, inject chan os.Signal) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectAWSSSMFaultInSerialMode") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectAWSSSMFaultInSerialMode", + trace.WithAttributes( + attribute.Int("chaos.duration", experimentsDetails.ChaosDuration), + attribute.Int("chaos.interval", experimentsDetails.ChaosInterval), + attribute.String("chaos.namespace", experimentsDetails.ChaosNamespace), + attribute.String("aws.ec2.id", experimentsDetails.EC2InstanceID), + attribute.String("aws.ec2.tag", experimentsDetails.EC2InstanceTag), + attribute.String("aws.ssm.document", experimentsDetails.DocumentName), + attribute.String("aws.region", experimentsDetails.Region), + ), + ) defer span.End() select { @@ -91,7 +103,17 @@ func InjectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // InjectChaosInParallelMode will inject the aws ssm chaos in parallel mode that is all at once func InjectChaosInParallelMode(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, instanceIDList []string, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails, inject chan os.Signal) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectAWSSSMFaultInParallelMode") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectAWSSSMFaultInParallelMode", + trace.WithAttributes( + attribute.Int("chaos.duration", experimentsDetails.ChaosDuration), + attribute.Int("chaos.interval", experimentsDetails.ChaosInterval), + attribute.String("chaos.namespace", experimentsDetails.ChaosNamespace), + attribute.String("aws.ec2.id", experimentsDetails.EC2InstanceID), + attribute.String("aws.ec2.tag", experimentsDetails.EC2InstanceTag), + attribute.String("aws.ssm.document", experimentsDetails.DocumentName), + attribute.String("aws.region", experimentsDetails.Region), + ), + ) defer span.End() select { diff --git a/chaoslib/litmus/aws-ssm-chaos/lib/ssm/aws-ssm-chaos-by-id.go b/chaoslib/litmus/aws-ssm-chaos/lib/ssm/aws-ssm-chaos-by-id.go index e4bb5a50b..aedeb1016 100644 --- a/chaoslib/litmus/aws-ssm-chaos/lib/ssm/aws-ssm-chaos-by-id.go +++ b/chaoslib/litmus/aws-ssm-chaos/lib/ssm/aws-ssm-chaos-by-id.go @@ -3,6 +3,8 @@ package ssm import ( "context" "fmt" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" "os" "os/signal" "strings" @@ -28,7 +30,9 @@ var ( // PrepareAWSSSMChaosByID contains the prepration and injection steps for the experiment func PrepareAWSSSMChaosByID(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PrepareAWSSSMFaultByID") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PrepareAWSSSMFaultByID", + trace.WithAttributes(attribute.Int("experiment.ramptime", experimentsDetails.RampTime)), + ) defer span.End() // inject channel is used to transmit signal notifications. diff --git a/chaoslib/litmus/aws-ssm-chaos/lib/ssm/aws-ssm-chaos-by-tag.go b/chaoslib/litmus/aws-ssm-chaos/lib/ssm/aws-ssm-chaos-by-tag.go index c7e872c7b..20d6eb37a 100644 --- a/chaoslib/litmus/aws-ssm-chaos/lib/ssm/aws-ssm-chaos-by-tag.go +++ b/chaoslib/litmus/aws-ssm-chaos/lib/ssm/aws-ssm-chaos-by-tag.go @@ -3,6 +3,8 @@ package ssm import ( "context" "fmt" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" "os" "os/signal" "strings" @@ -23,7 +25,9 @@ import ( // PrepareAWSSSMChaosByTag contains the prepration and injection steps for the experiment func PrepareAWSSSMChaosByTag(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectAWSSSMFaultByTag") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectAWSSSMFaultByTag", + trace.WithAttributes(attribute.Int("experiment.ramptime", experimentsDetails.RampTime)), + ) defer span.End() // inject channel is used to transmit signal notifications. diff --git a/chaoslib/litmus/azure-disk-loss/lib/azure-disk-loss.go b/chaoslib/litmus/azure-disk-loss/lib/azure-disk-loss.go index 210377809..ba1d994e5 100644 --- a/chaoslib/litmus/azure-disk-loss/lib/azure-disk-loss.go +++ b/chaoslib/litmus/azure-disk-loss/lib/azure-disk-loss.go @@ -3,6 +3,8 @@ package lib import ( "context" "fmt" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" "os" "os/signal" "strings" @@ -33,7 +35,9 @@ var ( // PrepareChaos contains the prepration and injection steps for the experiment func PrepareChaos(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PrepareAzureDiskLossFault") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PrepareAzureDiskLossFault", + trace.WithAttributes(attribute.Int("experiment.ramptime", experimentsDetails.RampTime)), + ) defer span.End() // inject channel is used to transmit signal notifications. @@ -106,7 +110,15 @@ func PrepareChaos(ctx context.Context, experimentsDetails *experimentTypes.Exper // injectChaosInParallelMode will inject the Azure disk loss chaos in parallel mode that is all at once func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, instanceNamesWithDiskNames map[string][]string, attachedDisksWithInstance map[string]*[]compute.DataDisk, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectAzureDiskLossFaultInParallelMode") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectAzureDiskLossFaultInParallelMode", + trace.WithAttributes( + attribute.Int("chaos.duration", experimentsDetails.ChaosDuration), + attribute.Int("chaos.interval", experimentsDetails.ChaosInterval), + attribute.String("chaos.namespace", experimentsDetails.ChaosNamespace), + attribute.String("azure.disk.name", experimentsDetails.VirtualDiskNames), + attribute.String("azure.resource.group", experimentsDetails.ResourceGroup), + ), + ) defer span.End() //ChaosStartTimeStamp contains the start timestamp, when the chaos injection begin @@ -186,7 +198,15 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime // injectChaosInSerialMode will inject the Azure disk loss chaos in serial mode that is one after other func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, instanceNamesWithDiskNames map[string][]string, attachedDisksWithInstance map[string]*[]compute.DataDisk, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectAzureDiskLossFaultInSerialMode") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectAzureDiskLossFaultInSerialMode", + trace.WithAttributes( + attribute.Int("chaos.duration", experimentsDetails.ChaosDuration), + attribute.Int("chaos.interval", experimentsDetails.ChaosInterval), + attribute.String("chaos.namespace", experimentsDetails.ChaosNamespace), + attribute.String("azure.disk.name", experimentsDetails.VirtualDiskNames), + attribute.String("azure.resource.group", experimentsDetails.ResourceGroup), + ), + ) defer span.End() //ChaosStartTimeStamp contains the start timestamp, when the chaos injection begin diff --git a/chaoslib/litmus/azure-instance-stop/lib/azure-instance-stop.go b/chaoslib/litmus/azure-instance-stop/lib/azure-instance-stop.go index eefd1c54a..fb2a6144b 100644 --- a/chaoslib/litmus/azure-instance-stop/lib/azure-instance-stop.go +++ b/chaoslib/litmus/azure-instance-stop/lib/azure-instance-stop.go @@ -3,6 +3,8 @@ package lib import ( "context" "fmt" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" "os" "os/signal" "strings" @@ -31,7 +33,9 @@ var ( // PrepareAzureStop will initialize instanceNameList and start chaos injection based on sequence method selected func PrepareAzureStop(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PrepareAzureInstanceStopFault") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PrepareAzureInstanceStopFault", + trace.WithAttributes(attribute.Int("experiment.ramptime", experimentsDetails.RampTime)), + ) defer span.End() // inject channel is used to transmit signal notifications @@ -81,7 +85,15 @@ func PrepareAzureStop(ctx context.Context, experimentsDetails *experimentTypes.E // injectChaosInSerialMode will inject the Azure instance termination in serial mode that is one after the other func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, instanceNameList []string, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectAzureInstanceStopFaultInSerialMode") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectAzureInstanceStopFaultInSerialMode", + trace.WithAttributes( + attribute.Int("chaos.duration", experimentsDetails.ChaosDuration), + attribute.Int("chaos.interval", experimentsDetails.ChaosInterval), + attribute.String("chaos.namespace", experimentsDetails.ChaosNamespace), + attribute.String("azure.vm.name", experimentsDetails.AzureInstanceNames), + attribute.String("azure.resource.group", experimentsDetails.ResourceGroup), + ), + ) defer span.End() select { @@ -162,7 +174,15 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // injectChaosInParallelMode will inject the Azure instance termination in parallel mode that is all at once func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, instanceNameList []string, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectAzureInstanceStopFaultInParallelMode") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectAzureInstanceStopFaultInParallelMode", + trace.WithAttributes( + attribute.Int("chaos.duration", experimentsDetails.ChaosDuration), + attribute.Int("chaos.interval", experimentsDetails.ChaosInterval), + attribute.String("chaos.namespace", experimentsDetails.ChaosNamespace), + attribute.String("azure.vm.name", experimentsDetails.AzureInstanceNames), + attribute.String("azure.resource.group", experimentsDetails.ResourceGroup), + ), + ) defer span.End() select { diff --git a/chaoslib/litmus/container-kill/helper/container-kill.go b/chaoslib/litmus/container-kill/helper/container-kill.go index 81e6b1a67..801d95bff 100644 --- a/chaoslib/litmus/container-kill/helper/container-kill.go +++ b/chaoslib/litmus/container-kill/helper/container-kill.go @@ -6,6 +6,7 @@ import ( "fmt" "github.com/litmuschaos/litmus-go/pkg/telemetry" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/attribute" "os/exec" "strconv" "time" @@ -49,6 +50,11 @@ func Helper(ctx context.Context, clients clients.ClientSets) { // Initialise Chaos Result Parameters types.SetResultAttributes(&resultDetails, chaosDetails) + span.SetAttributes( + attribute.String("container.runtime", experimentsDetails.ContainerRuntime), + attribute.String("kill.signal", experimentsDetails.Signal), + ) + if err := killContainer(&experimentsDetails, clients, &eventsDetails, &chaosDetails, &resultDetails); err != nil { // update failstep inside chaosresult if resultErr := result.UpdateFailedStepFromHelper(&resultDetails, &chaosDetails, clients, err); resultErr != nil { diff --git a/chaoslib/litmus/container-kill/lib/container-kill.go b/chaoslib/litmus/container-kill/lib/container-kill.go index 95d81bf96..003498732 100644 --- a/chaoslib/litmus/container-kill/lib/container-kill.go +++ b/chaoslib/litmus/container-kill/lib/container-kill.go @@ -3,6 +3,8 @@ package lib import ( "context" "fmt" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" "os" "strconv" "strings" @@ -27,7 +29,9 @@ import ( // PrepareContainerKill contains the preparation steps before chaos injection func PrepareContainerKill(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PrepareContainerKillFault") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PrepareContainerKillFault", + trace.WithAttributes(attribute.Int("experiment.ramptime", experimentsDetails.RampTime)), + ) defer span.End() var err error @@ -93,7 +97,13 @@ func PrepareContainerKill(ctx context.Context, experimentsDetails *experimentTyp // injectChaosInSerialMode kill the container of all target application serially (one by one) func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, targetPodList apiv1.PodList, clients clients.ClientSets, chaosDetails *types.ChaosDetails, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectContainerKillFaultInSerialMode") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectContainerKillFaultInSerialMode", + trace.WithAttributes( + attribute.Int("chaos.duration", experimentsDetails.ChaosDuration), + attribute.Int("chaos.interval", experimentsDetails.ChaosInterval), + attribute.String("chaos.namespace", experimentsDetails.ChaosNamespace), + ), + ) defer span.End() // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { @@ -145,7 +155,13 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // injectChaosInParallelMode kill the container of all target application in parallel mode (all at once) func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, targetPodList apiv1.PodList, clients clients.ClientSets, chaosDetails *types.ChaosDetails, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectContainerKillFaultInParallelMode") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectContainerKillFaultInParallelMode", + trace.WithAttributes( + attribute.Int("chaos.duration", experimentsDetails.ChaosDuration), + attribute.Int("chaos.interval", experimentsDetails.ChaosInterval), + attribute.String("chaos.namespace", experimentsDetails.ChaosNamespace), + ), + ) defer span.End() // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { @@ -262,10 +278,15 @@ func createHelperPod(ctx context.Context, experimentsDetails *experimentTypes.Ex helperPod.Spec.Volumes = append(helperPod.Spec.Volumes, common.GetSidecarVolumes(chaosDetails)...) } - _, err := clients.KubeClient.CoreV1().Pods(experimentsDetails.ChaosNamespace).Create(context.Background(), helperPod, v1.CreateOptions{}) + createdHelperPod, err := clients.KubeClient.CoreV1().Pods(experimentsDetails.ChaosNamespace).Create(context.Background(), helperPod, v1.CreateOptions{}) if err != nil { return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("unable to create helper pod: %s", err.Error())} } + span.SetAttributes( + attribute.String("helper.pod.name", createdHelperPod.Name), + attribute.String("helper.image.name", createdHelperPod.Spec.Containers[0].Image), + ) + return nil } diff --git a/chaoslib/litmus/disk-fill/helper/disk-fill.go b/chaoslib/litmus/disk-fill/helper/disk-fill.go index c851ba26f..75e1181f0 100644 --- a/chaoslib/litmus/disk-fill/helper/disk-fill.go +++ b/chaoslib/litmus/disk-fill/helper/disk-fill.go @@ -7,6 +7,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/telemetry" "github.com/palantir/stacktrace" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/attribute" "os" "os/exec" "os/signal" @@ -64,6 +65,8 @@ func Helper(ctx context.Context, clients clients.ClientSets) { // Set the chaos result uid result.SetResultUID(&resultDetails, clients, &chaosDetails) + span.SetAttributes(attribute.String("container.runtime", experimentsDetails.ContainerRuntime)) + if err := diskFill(&experimentsDetails, clients, &eventsDetails, &chaosDetails, &resultDetails); err != nil { // update failstep inside chaosresult if resultErr := result.UpdateFailedStepFromHelper(&resultDetails, &chaosDetails, clients, err); resultErr != nil { diff --git a/chaoslib/litmus/disk-fill/lib/disk-fill.go b/chaoslib/litmus/disk-fill/lib/disk-fill.go index 0c63f84b2..2d6065298 100644 --- a/chaoslib/litmus/disk-fill/lib/disk-fill.go +++ b/chaoslib/litmus/disk-fill/lib/disk-fill.go @@ -3,6 +3,8 @@ package lib import ( "context" "fmt" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" "os" "strconv" "strings" @@ -28,7 +30,9 @@ import ( // PrepareDiskFill contains the preparation steps before chaos injection func PrepareDiskFill(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PrepareDiskFillFault") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PrepareDiskFillFault", + trace.WithAttributes(attribute.Int("experiment.ramptime", experimentsDetails.RampTime)), + ) defer span.End() var err error @@ -98,7 +102,12 @@ func PrepareDiskFill(ctx context.Context, experimentsDetails *experimentTypes.Ex // injectChaosInSerialMode fill the ephemeral storage of all target application serially (one by one) func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, targetPodList apiv1.PodList, clients clients.ClientSets, chaosDetails *types.ChaosDetails, execCommandDetails exec.PodDetails, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectDiskFillFaultInSerialMode") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectDiskFillFaultInSerialMode", + trace.WithAttributes( + attribute.Int("chaos.duration", experimentsDetails.ChaosDuration), + attribute.String("chaos.namespace", experimentsDetails.ChaosNamespace), + ), + ) defer span.End() // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { @@ -151,7 +160,12 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // injectChaosInParallelMode fill the ephemeral storage of of all target application in parallel mode (all at once) func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, targetPodList apiv1.PodList, clients clients.ClientSets, chaosDetails *types.ChaosDetails, execCommandDetails exec.PodDetails, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectDiskFillFaultInParallelMode") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectDiskFillFaultInParallelMode", + trace.WithAttributes( + attribute.Int("chaos.duration", experimentsDetails.ChaosDuration), + attribute.String("chaos.namespace", experimentsDetails.ChaosNamespace), + ), + ) defer span.End() var err error // run the probes during chaos @@ -268,10 +282,15 @@ func createHelperPod(ctx context.Context, experimentsDetails *experimentTypes.Ex helperPod.Spec.Volumes = append(helperPod.Spec.Volumes, common.GetSidecarVolumes(chaosDetails)...) } - _, err := clients.KubeClient.CoreV1().Pods(experimentsDetails.ChaosNamespace).Create(context.Background(), helperPod, v1.CreateOptions{}) + createdHelperPod, err := clients.KubeClient.CoreV1().Pods(experimentsDetails.ChaosNamespace).Create(context.Background(), helperPod, v1.CreateOptions{}) if err != nil { return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("unable to create helper pod: %s", err.Error())} } + span.SetAttributes( + attribute.String("helper.pod.name", createdHelperPod.Name), + attribute.String("helper.image.name", createdHelperPod.Spec.Containers[0].Image), + ) + return nil } diff --git a/chaoslib/litmus/docker-service-kill/lib/docker-service-kill.go b/chaoslib/litmus/docker-service-kill/lib/docker-service-kill.go index f1fef9c9e..6a690f0ef 100644 --- a/chaoslib/litmus/docker-service-kill/lib/docker-service-kill.go +++ b/chaoslib/litmus/docker-service-kill/lib/docker-service-kill.go @@ -3,6 +3,8 @@ package lib import ( "context" "fmt" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" "strconv" "github.com/litmuschaos/litmus-go/pkg/cerrors" @@ -26,7 +28,9 @@ import ( // PrepareDockerServiceKill contains prepration steps before chaos injection func PrepareDockerServiceKill(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PrepareDockerServiceKillFault") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PrepareDockerServiceKillFault", + trace.WithAttributes(attribute.Int("experiment.ramptime", experimentsDetails.RampTime)), + ) defer span.End() var err error @@ -50,6 +54,13 @@ func PrepareDockerServiceKill(ctx context.Context, experimentsDetails *experimen common.WaitForDuration(experimentsDetails.RampTime) } + span.SetAttributes( + attribute.Int("chaos.duration", experimentsDetails.ChaosDuration), + attribute.String("chaos.namespace", experimentsDetails.ChaosNamespace), + attribute.String("node.name", experimentsDetails.TargetNode), + attribute.String("node.label", experimentsDetails.NodeLabel), + ) + if experimentsDetails.EngineName != "" { msg := "Injecting " + experimentsDetails.ExperimentName + " chaos on " + experimentsDetails.TargetNode + " node" types.SetEngineEventAttributes(eventsDetails, types.ChaosInject, msg, "Normal", chaosDetails) @@ -202,10 +213,15 @@ func createHelperPod(ctx context.Context, experimentsDetails *experimentTypes.Ex helperPod.Spec.Volumes = append(helperPod.Spec.Volumes, common.GetSidecarVolumes(chaosDetails)...) } - _, err := clients.KubeClient.CoreV1().Pods(experimentsDetails.ChaosNamespace).Create(context.Background(), helperPod, v1.CreateOptions{}) + createdHelperPod, err := clients.KubeClient.CoreV1().Pods(experimentsDetails.ChaosNamespace).Create(context.Background(), helperPod, v1.CreateOptions{}) if err != nil { return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("unable to create helper pod: %s", err.Error())} } + span.SetAttributes( + attribute.String("helper.pod.name", createdHelperPod.Name), + attribute.String("helper.image.name", createdHelperPod.Spec.Containers[0].Image), + ) + return nil } diff --git a/chaoslib/litmus/ebs-loss/lib/ebs-loss-by-id/lib/ebs-loss-by-id.go b/chaoslib/litmus/ebs-loss/lib/ebs-loss-by-id/lib/ebs-loss-by-id.go index dbc504628..82028ad22 100644 --- a/chaoslib/litmus/ebs-loss/lib/ebs-loss-by-id/lib/ebs-loss-by-id.go +++ b/chaoslib/litmus/ebs-loss/lib/ebs-loss-by-id/lib/ebs-loss-by-id.go @@ -3,6 +3,8 @@ package lib import ( "context" "fmt" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" "os" "os/signal" "strings" @@ -27,7 +29,9 @@ var ( // PrepareEBSLossByID contains the prepration and injection steps for the experiment func PrepareEBSLossByID(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PrepareAWSEBSLossFaultByID") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PrepareAWSEBSLossFaultByID", + trace.WithAttributes(attribute.Int("experiment.ramptime", experimentsDetails.RampTime)), + ) defer span.End() // inject channel is used to transmit signal notifications. diff --git a/chaoslib/litmus/ebs-loss/lib/ebs-loss-by-tag/lib/ebs-loss-by-tag.go b/chaoslib/litmus/ebs-loss/lib/ebs-loss-by-tag/lib/ebs-loss-by-tag.go index 6e8589129..936e2654d 100644 --- a/chaoslib/litmus/ebs-loss/lib/ebs-loss-by-tag/lib/ebs-loss-by-tag.go +++ b/chaoslib/litmus/ebs-loss/lib/ebs-loss-by-tag/lib/ebs-loss-by-tag.go @@ -3,6 +3,8 @@ package lib import ( "context" "fmt" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" "os" "os/signal" "strings" @@ -27,7 +29,9 @@ var ( // PrepareEBSLossByTag contains the prepration and injection steps for the experiment func PrepareEBSLossByTag(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PrepareAWSEBSLossFaultByTag") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PrepareAWSEBSLossFaultByTag", + trace.WithAttributes(attribute.Int("experiment.ramptime", experimentsDetails.RampTime)), + ) defer span.End() // inject channel is used to transmit signal notifications. diff --git a/chaoslib/litmus/ebs-loss/lib/ebs-loss.go b/chaoslib/litmus/ebs-loss/lib/ebs-loss.go index 8fa9bb0e4..0f92dff9d 100644 --- a/chaoslib/litmus/ebs-loss/lib/ebs-loss.go +++ b/chaoslib/litmus/ebs-loss/lib/ebs-loss.go @@ -3,6 +3,8 @@ package lib import ( "context" "fmt" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" "os" "time" @@ -22,7 +24,16 @@ import ( // InjectChaosInSerialMode will inject the ebs loss chaos in serial mode which means one after other func InjectChaosInSerialMode(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, targetEBSVolumeIDList []string, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectAWSEBSLossFaultInSerialMode") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectAWSEBSLossFaultInSerialMode", + trace.WithAttributes( + attribute.Int("chaos.duration", experimentsDetails.ChaosDuration), + attribute.Int("chaos.interval", experimentsDetails.ChaosInterval), + attribute.String("chaos.namespace", experimentsDetails.ChaosNamespace), + attribute.String("aws.ebs.id", experimentsDetails.EBSVolumeID), + attribute.String("aws.ebs.tag", experimentsDetails.VolumeTag), + attribute.String("aws.region", experimentsDetails.Region), + ), + ) defer span.End() //ChaosStartTimeStamp contains the start timestamp, when the chaos injection begin @@ -101,7 +112,16 @@ func InjectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // InjectChaosInParallelMode will inject the chaos in parallel mode that means all at once func InjectChaosInParallelMode(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, targetEBSVolumeIDList []string, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectAWSEBSLossFaultInParallelMode") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectAWSEBSLossFaultInParallelMode", + trace.WithAttributes( + attribute.Int("chaos.duration", experimentsDetails.ChaosDuration), + attribute.Int("chaos.interval", experimentsDetails.ChaosInterval), + attribute.String("chaos.namespace", experimentsDetails.ChaosNamespace), + attribute.String("aws.ebs.id", experimentsDetails.EBSVolumeID), + attribute.String("aws.ebs.tag", experimentsDetails.VolumeTag), + attribute.String("aws.region", experimentsDetails.Region), + ), + ) defer span.End() var ec2InstanceIDList, deviceList []string diff --git a/chaoslib/litmus/ec2-terminate-by-id/lib/ec2-terminate-by-id.go b/chaoslib/litmus/ec2-terminate-by-id/lib/ec2-terminate-by-id.go index 5a844099a..aeb2175f7 100644 --- a/chaoslib/litmus/ec2-terminate-by-id/lib/ec2-terminate-by-id.go +++ b/chaoslib/litmus/ec2-terminate-by-id/lib/ec2-terminate-by-id.go @@ -3,6 +3,8 @@ package lib import ( "context" "fmt" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" "os" "os/signal" "strings" @@ -30,7 +32,9 @@ var ( // PrepareEC2TerminateByID contains the prepration and injection steps for the experiment func PrepareEC2TerminateByID(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PrepareAWSEC2TerminateFaultByID") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PrepareAWSEC2TerminateFaultByID", + trace.WithAttributes(attribute.Int("experiment.ramptime", experimentsDetails.RampTime)), + ) defer span.End() // inject channel is used to transmit signal notifications. @@ -81,7 +85,15 @@ func PrepareEC2TerminateByID(ctx context.Context, experimentsDetails *experiment // injectChaosInSerialMode will inject the ec2 instance termination in serial mode that is one after other func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, instanceIDList []string, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectAWSEC2TerminateFaultByIDInSerialMode") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectAWSEC2TerminateFaultByIDInSerialMode", + trace.WithAttributes( + attribute.Int("chaos.duration", experimentsDetails.ChaosDuration), + attribute.Int("chaos.interval", experimentsDetails.ChaosInterval), + attribute.String("chaos.namespace", experimentsDetails.ChaosNamespace), + attribute.String("aws.ec2.id", experimentsDetails.Ec2InstanceID), + attribute.String("aws.region", experimentsDetails.Region), + ), + ) defer span.End() select { @@ -155,7 +167,15 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // injectChaosInParallelMode will inject the ec2 instance termination in parallel mode that is all at once func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, instanceIDList []string, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectAWSEC2TerminateFaultByIDInParallelMode") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectAWSEC2TerminateFaultByIDInParallelMode", + trace.WithAttributes( + attribute.Int("chaos.duration", experimentsDetails.ChaosDuration), + attribute.Int("chaos.interval", experimentsDetails.ChaosInterval), + attribute.String("chaos.namespace", experimentsDetails.ChaosNamespace), + attribute.String("aws.ec2.id", experimentsDetails.Ec2InstanceID), + attribute.String("aws.region", experimentsDetails.Region), + ), + ) defer span.End() select { diff --git a/chaoslib/litmus/ec2-terminate-by-tag/lib/ec2-terminate-by-tag.go b/chaoslib/litmus/ec2-terminate-by-tag/lib/ec2-terminate-by-tag.go index 2c34b83b4..7f047366d 100644 --- a/chaoslib/litmus/ec2-terminate-by-tag/lib/ec2-terminate-by-tag.go +++ b/chaoslib/litmus/ec2-terminate-by-tag/lib/ec2-terminate-by-tag.go @@ -3,6 +3,8 @@ package lib import ( "context" "fmt" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" "os" "os/signal" "strings" @@ -28,7 +30,9 @@ var inject, abort chan os.Signal // PrepareEC2TerminateByTag contains the prepration and injection steps for the experiment func PrepareEC2TerminateByTag(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PrepareAWSEC2TerminateFaultByTag") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PrepareAWSEC2TerminateFaultByTag", + trace.WithAttributes(attribute.Int("experiment.ramptime", experimentsDetails.RampTime)), + ) defer span.End() // inject channel is used to transmit signal notifications. @@ -76,7 +80,15 @@ func PrepareEC2TerminateByTag(ctx context.Context, experimentsDetails *experimen // injectChaosInSerialMode will inject the ce2 instance termination in serial mode that is one after other func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, instanceIDList []string, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectAWSEC2TerminateFaultByTagInSerialMode") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectAWSEC2TerminateFaultByTagInSerialMode", + trace.WithAttributes( + attribute.Int("chaos.duration", experimentsDetails.ChaosDuration), + attribute.Int("chaos.interval", experimentsDetails.ChaosInterval), + attribute.String("chaos.namespace", experimentsDetails.ChaosNamespace), + attribute.String("aws.ec2.tag", experimentsDetails.Ec2InstanceTag), + attribute.String("aws.region", experimentsDetails.Region), + ), + ) defer span.End() select { @@ -150,7 +162,15 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // injectChaosInParallelMode will inject the ce2 instance termination in parallel mode that is all at once func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, instanceIDList []string, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectAWSEC2TerminateFaultByTagInParallelMode") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectAWSEC2TerminateFaultByTagInParallelMode", + trace.WithAttributes( + attribute.Int("chaos.duration", experimentsDetails.ChaosDuration), + attribute.Int("chaos.interval", experimentsDetails.ChaosInterval), + attribute.String("chaos.namespace", experimentsDetails.ChaosNamespace), + attribute.String("aws.ec2.tag", experimentsDetails.Ec2InstanceTag), + attribute.String("aws.region", experimentsDetails.Region), + ), + ) defer span.End() select { diff --git a/chaoslib/litmus/gcp-vm-disk-loss-by-label/lib/gcp-vm-disk-loss-by-label.go b/chaoslib/litmus/gcp-vm-disk-loss-by-label/lib/gcp-vm-disk-loss-by-label.go index 42efdf8bd..5ff731dfa 100644 --- a/chaoslib/litmus/gcp-vm-disk-loss-by-label/lib/gcp-vm-disk-loss-by-label.go +++ b/chaoslib/litmus/gcp-vm-disk-loss-by-label/lib/gcp-vm-disk-loss-by-label.go @@ -3,6 +3,8 @@ package lib import ( "context" "fmt" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" "os" "os/signal" "strings" @@ -31,7 +33,9 @@ var ( // PrepareDiskVolumeLossByLabel contains the prepration and injection steps for the experiment func PrepareDiskVolumeLossByLabel(ctx context.Context, computeService *compute.Service, experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PrepareGCPDiskVolumeLossFaultByLabel") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PrepareGCPDiskVolumeLossFaultByLabel", + trace.WithAttributes(attribute.Int("experiment.ramptime", experimentsDetails.RampTime)), + ) defer span.End() // inject channel is used to transmit signal notifications. @@ -91,7 +95,16 @@ func PrepareDiskVolumeLossByLabel(ctx context.Context, computeService *compute.S // injectChaosInSerialMode will inject the disk loss chaos in serial mode which means one after the other func injectChaosInSerialMode(ctx context.Context, computeService *compute.Service, experimentsDetails *experimentTypes.ExperimentDetails, targetDiskVolumeNamesList, instanceNamesList []string, zone string, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectGCPDiskVolumeLossFaultByLabelInSerialMode") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectGCPDiskVolumeLossFaultByLabelInSerialMode", + trace.WithAttributes( + attribute.Int("chaos.duration", experimentsDetails.ChaosDuration), + attribute.Int("chaos.interval", experimentsDetails.ChaosInterval), + attribute.String("chaos.namespace", experimentsDetails.ChaosNamespace), + attribute.String("gcp.project.id", experimentsDetails.GCPProjectID), + attribute.String("gcp.disk.tag", experimentsDetails.DiskVolumeLabel), + attribute.String("gcp.zone", experimentsDetails.Zones), + ), + ) defer span.End() //ChaosStartTimeStamp contains the start timestamp, when the chaos injection begin @@ -168,7 +181,16 @@ func injectChaosInSerialMode(ctx context.Context, computeService *compute.Servic // injectChaosInParallelMode will inject the disk loss chaos in parallel mode that means all at once func injectChaosInParallelMode(ctx context.Context, computeService *compute.Service, experimentsDetails *experimentTypes.ExperimentDetails, targetDiskVolumeNamesList, instanceNamesList []string, zone string, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectGCPDiskVolumeLossFaultByLabelInParallelMode") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectGCPDiskVolumeLossFaultByLabelInParallelMode", + trace.WithAttributes( + attribute.Int("chaos.duration", experimentsDetails.ChaosDuration), + attribute.Int("chaos.interval", experimentsDetails.ChaosInterval), + attribute.String("chaos.namespace", experimentsDetails.ChaosNamespace), + attribute.String("gcp.project.id", experimentsDetails.GCPProjectID), + attribute.String("gcp.disk.tag", experimentsDetails.DiskVolumeLabel), + attribute.String("gcp.zone", experimentsDetails.Zones), + ), + ) defer span.End() //ChaosStartTimeStamp contains the start timestamp, when the chaos injection begin diff --git a/chaoslib/litmus/gcp-vm-disk-loss/lib/gcp-vm-disk-loss.go b/chaoslib/litmus/gcp-vm-disk-loss/lib/gcp-vm-disk-loss.go index 6a99010d9..9b72a3a9c 100644 --- a/chaoslib/litmus/gcp-vm-disk-loss/lib/gcp-vm-disk-loss.go +++ b/chaoslib/litmus/gcp-vm-disk-loss/lib/gcp-vm-disk-loss.go @@ -3,6 +3,8 @@ package lib import ( "context" "fmt" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" "os" "os/signal" "strings" @@ -32,7 +34,9 @@ var ( // PrepareDiskVolumeLoss contains the prepration and injection steps for the experiment func PrepareDiskVolumeLoss(ctx context.Context, computeService *compute.Service, experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PrepareVMDiskLossFault") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PrepareVMDiskLossFault", + trace.WithAttributes(attribute.Int("experiment.ramptime", experimentsDetails.RampTime)), + ) defer span.End() // inject channel is used to transmit signal notifications. @@ -96,7 +100,16 @@ func PrepareDiskVolumeLoss(ctx context.Context, computeService *compute.Service, // injectChaosInSerialMode will inject the disk loss chaos in serial mode which means one after the other func injectChaosInSerialMode(ctx context.Context, computeService *compute.Service, experimentsDetails *experimentTypes.ExperimentDetails, targetDiskVolumeNamesList, diskZonesList []string, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectVMDiskLossFaultInSerialMode") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectVMDiskLossFaultInSerialMode", + trace.WithAttributes( + attribute.Int("chaos.duration", experimentsDetails.ChaosDuration), + attribute.Int("chaos.interval", experimentsDetails.ChaosInterval), + attribute.String("chaos.namespace", experimentsDetails.ChaosNamespace), + attribute.String("gcp.project.id", experimentsDetails.GCPProjectID), + attribute.String("gcp.disk.name", experimentsDetails.DiskVolumeNames), + attribute.String("gcp.zone", experimentsDetails.Zones), + ), + ) defer span.End() //ChaosStartTimeStamp contains the start timestamp, when the chaos injection begin ChaosStartTimeStamp := time.Now() @@ -168,7 +181,16 @@ func injectChaosInSerialMode(ctx context.Context, computeService *compute.Servic // injectChaosInParallelMode will inject the disk loss chaos in parallel mode that means all at once func injectChaosInParallelMode(ctx context.Context, computeService *compute.Service, experimentsDetails *experimentTypes.ExperimentDetails, targetDiskVolumeNamesList, diskZonesList []string, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectVMDiskLossFaultInParallelMode") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectVMDiskLossFaultInParallelMode", + trace.WithAttributes( + attribute.Int("chaos.duration", experimentsDetails.ChaosDuration), + attribute.Int("chaos.interval", experimentsDetails.ChaosInterval), + attribute.String("chaos.namespace", experimentsDetails.ChaosNamespace), + attribute.String("gcp.project.id", experimentsDetails.GCPProjectID), + attribute.String("gcp.disk.name", experimentsDetails.DiskVolumeNames), + attribute.String("gcp.zone", experimentsDetails.Zones), + ), + ) defer span.End() //ChaosStartTimeStamp contains the start timestamp, when the chaos injection begin diff --git a/chaoslib/litmus/gcp-vm-instance-stop-by-label/lib/gcp-vm-instance-stop-by-label.go b/chaoslib/litmus/gcp-vm-instance-stop-by-label/lib/gcp-vm-instance-stop-by-label.go index 644a02137..5893de4cd 100644 --- a/chaoslib/litmus/gcp-vm-instance-stop-by-label/lib/gcp-vm-instance-stop-by-label.go +++ b/chaoslib/litmus/gcp-vm-instance-stop-by-label/lib/gcp-vm-instance-stop-by-label.go @@ -3,6 +3,8 @@ package lib import ( "context" "fmt" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" "os" "os/signal" "strings" @@ -28,7 +30,9 @@ var inject, abort chan os.Signal // PrepareVMStopByLabel executes the experiment steps by injecting chaos into target VM instances func PrepareVMStopByLabel(ctx context.Context, computeService *compute.Service, experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PrepareGCPVMInstanceStopFaultByLabel") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PrepareGCPVMInstanceStopFaultByLabel", + trace.WithAttributes(attribute.Int("experiment.ramptime", experimentsDetails.RampTime)), + ) defer span.End() // inject channel is used to transmit signal notifications. @@ -77,7 +81,16 @@ func PrepareVMStopByLabel(ctx context.Context, computeService *compute.Service, // injectChaosInSerialMode stops VM instances in serial mode i.e. one after the other func injectChaosInSerialMode(ctx context.Context, computeService *compute.Service, experimentsDetails *experimentTypes.ExperimentDetails, instanceNamesList []string, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectGCPVMInstanceStopFaultByLabelInSerialMode") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectGCPVMInstanceStopFaultByLabelInSerialMode", + trace.WithAttributes( + attribute.Int("chaos.duration", experimentsDetails.ChaosDuration), + attribute.Int("chaos.interval", experimentsDetails.ChaosInterval), + attribute.String("chaos.namespace", experimentsDetails.ChaosNamespace), + attribute.String("gcp.project.id", experimentsDetails.GCPProjectID), + attribute.String("gcp.vm.label", experimentsDetails.InstanceLabel), + attribute.String("gcp.zone", experimentsDetails.Zones), + ), + ) defer span.End() select { @@ -164,7 +177,16 @@ func injectChaosInSerialMode(ctx context.Context, computeService *compute.Servic // injectChaosInParallelMode will inject the VM instance termination in serial mode that is one after other func injectChaosInParallelMode(ctx context.Context, computeService *compute.Service, experimentsDetails *experimentTypes.ExperimentDetails, instanceNamesList []string, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectGCPVMInstanceStopFaultByLabelInParallelMode") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectGCPVMInstanceStopFaultByLabelInParallelMode", + trace.WithAttributes( + attribute.Int("chaos.duration", experimentsDetails.ChaosDuration), + attribute.Int("chaos.interval", experimentsDetails.ChaosInterval), + attribute.String("chaos.namespace", experimentsDetails.ChaosNamespace), + attribute.String("gcp.project.id", experimentsDetails.GCPProjectID), + attribute.String("gcp.vm.label", experimentsDetails.InstanceLabel), + attribute.String("gcp.zone", experimentsDetails.Zones), + ), + ) defer span.End() select { case <-inject: diff --git a/chaoslib/litmus/gcp-vm-instance-stop/lib/gcp-vm-instance-stop.go b/chaoslib/litmus/gcp-vm-instance-stop/lib/gcp-vm-instance-stop.go index 281e1c211..b4f3d7c89 100644 --- a/chaoslib/litmus/gcp-vm-instance-stop/lib/gcp-vm-instance-stop.go +++ b/chaoslib/litmus/gcp-vm-instance-stop/lib/gcp-vm-instance-stop.go @@ -3,6 +3,8 @@ package lib import ( "context" "fmt" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" "os" "os/signal" "strings" @@ -31,7 +33,9 @@ var ( // PrepareVMStop contains the prepration and injection steps for the experiment func PrepareVMStop(ctx context.Context, computeService *compute.Service, experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PrepareVMInstanceStopFault") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PrepareVMInstanceStopFault", + trace.WithAttributes(attribute.Int("experiment.ramptime", experimentsDetails.RampTime)), + ) defer span.End() // inject channel is used to transmit signal notifications. @@ -82,7 +86,16 @@ func PrepareVMStop(ctx context.Context, computeService *compute.Service, experim // injectChaosInSerialMode stops VM instances in serial mode i.e. one after the other func injectChaosInSerialMode(ctx context.Context, computeService *compute.Service, experimentsDetails *experimentTypes.ExperimentDetails, instanceNamesList []string, instanceZonesList []string, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectVMInstanceStopFaultInSerialMode") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectVMInstanceStopFaultInSerialMode", + trace.WithAttributes( + attribute.Int("chaos.duration", experimentsDetails.ChaosDuration), + attribute.Int("chaos.interval", experimentsDetails.ChaosInterval), + attribute.String("chaos.namespace", experimentsDetails.ChaosNamespace), + attribute.String("gcp.project.id", experimentsDetails.GCPProjectID), + attribute.String("gcp.vm.name", experimentsDetails.VMInstanceName), + attribute.String("gcp.zone", experimentsDetails.Zones), + ), + ) defer span.End() select { @@ -169,7 +182,16 @@ func injectChaosInSerialMode(ctx context.Context, computeService *compute.Servic // injectChaosInParallelMode stops VM instances in parallel mode i.e. all at once func injectChaosInParallelMode(ctx context.Context, computeService *compute.Service, experimentsDetails *experimentTypes.ExperimentDetails, instanceNamesList []string, instanceZonesList []string, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectVMInstanceStopFaultInParallelMode") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectVMInstanceStopFaultInParallelMode", + trace.WithAttributes( + attribute.Int("chaos.duration", experimentsDetails.ChaosDuration), + attribute.Int("chaos.interval", experimentsDetails.ChaosInterval), + attribute.String("chaos.namespace", experimentsDetails.ChaosNamespace), + attribute.String("gcp.project.id", experimentsDetails.GCPProjectID), + attribute.String("gcp.vm.name", experimentsDetails.VMInstanceName), + attribute.String("gcp.zone", experimentsDetails.Zones), + ), + ) defer span.End() select { diff --git a/chaoslib/litmus/http-chaos/helper/http-helper.go b/chaoslib/litmus/http-chaos/helper/http-helper.go index b544df448..bb54c1a91 100644 --- a/chaoslib/litmus/http-chaos/helper/http-helper.go +++ b/chaoslib/litmus/http-chaos/helper/http-helper.go @@ -7,6 +7,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/telemetry" "github.com/palantir/stacktrace" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/attribute" "os" "os/signal" "strconv" @@ -63,6 +64,12 @@ func Helper(ctx context.Context, clients clients.ClientSets) { // Set the chaos result uid result.SetResultUID(&resultDetails, clients, &chaosDetails) + span.SetAttributes( + attribute.String("container.runtime", experimentsDetails.ContainerRuntime), + attribute.Int("http.port", experimentsDetails.TargetServicePort), + attribute.Int("proxy.port", experimentsDetails.ProxyPort), + ) + err := prepareK8sHttpChaos(&experimentsDetails, clients, &eventsDetails, &chaosDetails, &resultDetails) if err != nil { // update failstep inside chaosresult diff --git a/chaoslib/litmus/http-chaos/lib/header/header.go b/chaoslib/litmus/http-chaos/lib/header/header.go index 1c822d505..574c0ee43 100644 --- a/chaoslib/litmus/http-chaos/lib/header/header.go +++ b/chaoslib/litmus/http-chaos/lib/header/header.go @@ -2,6 +2,8 @@ package header import ( "context" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" http_chaos "github.com/litmuschaos/litmus-go/chaoslib/litmus/http-chaos/lib" "github.com/litmuschaos/litmus-go/pkg/clients" @@ -15,7 +17,9 @@ import ( // PodHttpModifyHeaderChaos contains the steps to prepare and inject http modify header chaos func PodHttpModifyHeaderChaos(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PreparePodHTTPModifyHeaderFault") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PreparePodHTTPModifyHeaderFault", + trace.WithAttributes(attribute.Int("experiment.ramptime", experimentsDetails.RampTime)), + ) defer span.End() log.InfoWithValues("[Info]: The chaos tunables are:", logrus.Fields{ diff --git a/chaoslib/litmus/http-chaos/lib/http-chaos.go b/chaoslib/litmus/http-chaos/lib/http-chaos.go index 59323f0b8..498b4186e 100644 --- a/chaoslib/litmus/http-chaos/lib/http-chaos.go +++ b/chaoslib/litmus/http-chaos/lib/http-chaos.go @@ -3,6 +3,8 @@ package lib import ( "context" "fmt" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" "os" "strconv" "strings" @@ -82,7 +84,12 @@ func PrepareAndInjectChaos(ctx context.Context, experimentsDetails *experimentTy // injectChaosInSerialMode inject the http chaos in all target application serially (one by one) func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, targetPodList apiv1.PodList, args string, clients clients.ClientSets, chaosDetails *types.ChaosDetails, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectPodHTTPFaultInSerialMode") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectPodHTTPFaultInSerialMode", + trace.WithAttributes( + attribute.Int("chaos.duration", experimentsDetails.ChaosDuration), + attribute.String("chaos.namespace", experimentsDetails.ChaosNamespace), + ), + ) defer span.End() // run the probes during chaos @@ -141,7 +148,12 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // injectChaosInParallelMode inject the http chaos in all target application in parallel mode (all at once) func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, targetPodList apiv1.PodList, args string, clients clients.ClientSets, chaosDetails *types.ChaosDetails, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectPodHTTPFaultInParallelMode") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectPodHTTPFaultInParallelMode", + trace.WithAttributes( + attribute.Int("chaos.duration", experimentsDetails.ChaosDuration), + attribute.String("chaos.namespace", experimentsDetails.ChaosNamespace), + ), + ) defer span.End() // run the probes during chaos @@ -264,10 +276,15 @@ func createHelperPod(ctx context.Context, experimentsDetails *experimentTypes.Ex helperPod.Spec.Volumes = append(helperPod.Spec.Volumes, common.GetSidecarVolumes(chaosDetails)...) } - _, err := clients.KubeClient.CoreV1().Pods(experimentsDetails.ChaosNamespace).Create(context.Background(), helperPod, v1.CreateOptions{}) + createdHelperPod, err := clients.KubeClient.CoreV1().Pods(experimentsDetails.ChaosNamespace).Create(context.Background(), helperPod, v1.CreateOptions{}) if err != nil { return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("unable to create helper pod: %s", err.Error())} } + span.SetAttributes( + attribute.String("helper.pod.name", createdHelperPod.Name), + attribute.String("helper.image.name", createdHelperPod.Spec.Containers[0].Image), + ) + return nil } diff --git a/chaoslib/litmus/http-chaos/lib/latency/latency.go b/chaoslib/litmus/http-chaos/lib/latency/latency.go index 7cd7f4816..21edb00ee 100644 --- a/chaoslib/litmus/http-chaos/lib/latency/latency.go +++ b/chaoslib/litmus/http-chaos/lib/latency/latency.go @@ -2,6 +2,8 @@ package latency import ( "context" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" "strconv" http_chaos "github.com/litmuschaos/litmus-go/chaoslib/litmus/http-chaos/lib" @@ -16,7 +18,9 @@ import ( // PodHttpLatencyChaos contains the steps to prepare and inject http latency chaos func PodHttpLatencyChaos(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PreparePodHttpLatencyFault") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PreparePodHttpLatencyFault", + trace.WithAttributes(attribute.Int("experiment.ramptime", experimentsDetails.RampTime)), + ) defer span.End() log.InfoWithValues("[Info]: The chaos tunables are:", logrus.Fields{ diff --git a/chaoslib/litmus/http-chaos/lib/modify-body/modify-body.go b/chaoslib/litmus/http-chaos/lib/modify-body/modify-body.go index 86c136def..692c571ad 100644 --- a/chaoslib/litmus/http-chaos/lib/modify-body/modify-body.go +++ b/chaoslib/litmus/http-chaos/lib/modify-body/modify-body.go @@ -3,6 +3,8 @@ package modifybody import ( "context" "fmt" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" "math" "strings" @@ -18,7 +20,9 @@ import ( // PodHttpModifyBodyChaos contains the steps to prepare and inject http modify body chaos func PodHttpModifyBodyChaos(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PreparePodHTTPModifyBodyFault") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PreparePodHTTPModifyBodyFault", + trace.WithAttributes(attribute.Int("experiment.ramptime", experimentsDetails.RampTime)), + ) defer span.End() // responseBodyMaxLength defines the max length of response body string to be printed. It is taken as diff --git a/chaoslib/litmus/http-chaos/lib/reset/reset.go b/chaoslib/litmus/http-chaos/lib/reset/reset.go index 9bff4e09d..267695908 100644 --- a/chaoslib/litmus/http-chaos/lib/reset/reset.go +++ b/chaoslib/litmus/http-chaos/lib/reset/reset.go @@ -2,6 +2,8 @@ package reset import ( "context" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" "strconv" http_chaos "github.com/litmuschaos/litmus-go/chaoslib/litmus/http-chaos/lib" @@ -16,7 +18,9 @@ import ( // PodHttpResetPeerChaos contains the steps to prepare and inject http reset peer chaos func PodHttpResetPeerChaos(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PreparePodHTTPResetPeerFault") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PreparePodHTTPResetPeerFault", + trace.WithAttributes(attribute.Int("experiment.ramptime", experimentsDetails.RampTime)), + ) defer span.End() log.InfoWithValues("[Info]: The chaos tunables are:", logrus.Fields{ diff --git a/chaoslib/litmus/http-chaos/lib/statuscode/status-code.go b/chaoslib/litmus/http-chaos/lib/statuscode/status-code.go index 228e1072d..baf7d7208 100644 --- a/chaoslib/litmus/http-chaos/lib/statuscode/status-code.go +++ b/chaoslib/litmus/http-chaos/lib/statuscode/status-code.go @@ -3,6 +3,8 @@ package statuscode import ( "context" "fmt" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" "math" "math/rand" "strconv" @@ -31,7 +33,9 @@ var acceptedStatusCodes = []string{ // PodHttpStatusCodeChaos contains the steps to prepare and inject http status code chaos func PodHttpStatusCodeChaos(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PreparePodHttpStatusCodeFault") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PreparePodHttpStatusCodeFault", + trace.WithAttributes(attribute.Int("experiment.ramptime", experimentsDetails.RampTime)), + ) defer span.End() // responseBodyMaxLength defines the max length of response body string to be printed. It is taken as diff --git a/chaoslib/litmus/k6-loadgen/lib/k6-loadgen.go b/chaoslib/litmus/k6-loadgen/lib/k6-loadgen.go index 79ce56b30..8ebbccc5e 100644 --- a/chaoslib/litmus/k6-loadgen/lib/k6-loadgen.go +++ b/chaoslib/litmus/k6-loadgen/lib/k6-loadgen.go @@ -3,6 +3,8 @@ package lib import ( "context" "fmt" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" "os" "strconv" @@ -24,7 +26,14 @@ import ( ) func experimentExecution(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectK6LoadGenFault") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectK6LoadGenFault", + trace.WithAttributes( + attribute.Int("chaos.duration", experimentsDetails.ChaosDuration), + attribute.Int("chaos.interval", experimentsDetails.ChaosInterval), + attribute.String("chaos.namespace", experimentsDetails.ChaosNamespace), + attribute.String("k6.script", experimentsDetails.ScriptSecretName), + ), + ) defer span.End() if experimentsDetails.EngineName != "" { @@ -75,7 +84,9 @@ func experimentExecution(ctx context.Context, experimentsDetails *experimentType // PrepareChaos contains the preparation steps before chaos injection func PrepareChaos(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PrepareK6LoadGenFault") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PrepareK6LoadGenFault", + trace.WithAttributes(attribute.Int("experiment.ramptime", experimentsDetails.RampTime)), + ) defer span.End() // Waiting for the ramp time before chaos injection @@ -176,9 +187,14 @@ func createHelperPod(ctx context.Context, experimentsDetails *experimentTypes.Ex }, } - _, err := clients.KubeClient.CoreV1().Pods(experimentsDetails.ChaosNamespace).Create(context.Background(), helperPod, v1.CreateOptions{}) + createdHelperPod, err := clients.KubeClient.CoreV1().Pods(experimentsDetails.ChaosNamespace).Create(context.Background(), helperPod, v1.CreateOptions{}) if err != nil { return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("unable to create helper pod: %s", err.Error())} } + span.SetAttributes( + attribute.String("helper.pod.name", createdHelperPod.Name), + attribute.String("helper.image.name", createdHelperPod.Spec.Containers[0].Image), + ) + return nil } diff --git a/chaoslib/litmus/kafka-broker-pod-failure/lib/pod-delete.go b/chaoslib/litmus/kafka-broker-pod-failure/lib/pod-delete.go index d828f614d..67e74c332 100644 --- a/chaoslib/litmus/kafka-broker-pod-failure/lib/pod-delete.go +++ b/chaoslib/litmus/kafka-broker-pod-failure/lib/pod-delete.go @@ -3,6 +3,8 @@ package lib import ( "context" "fmt" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" "strconv" "strings" "time" @@ -27,7 +29,9 @@ import ( // PreparePodDelete contains the prepration steps before chaos injection func PreparePodDelete(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PrepareKafkaPodDeleteFault") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PrepareKafkaPodDeleteFault", + trace.WithAttributes(attribute.Int("experiment.ramptime", experimentsDetails.ChaoslibDetail.RampTime)), + ) defer span.End() //Waiting for the ramp time before chaos injection @@ -59,7 +63,12 @@ func PreparePodDelete(ctx context.Context, experimentsDetails *experimentTypes.E // injectChaosInSerialMode delete the kafka broker pods in serial mode(one by one) func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, chaosDetails *types.ChaosDetails, eventsDetails *types.EventDetails, resultDetails *types.ResultDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectKafkaPodDeleteFaultInSerialMode") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectKafkaPodDeleteFaultInSerialMode", + trace.WithAttributes( + attribute.Int("chaos.duration", experimentsDetails.ChaoslibDetail.ChaosDuration), + attribute.String("chaos.interval", experimentsDetails.ChaoslibDetail.ChaosInterval), + ), + ) defer span.End() // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { @@ -155,7 +164,12 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // injectChaosInParallelMode delete the kafka broker pods in parallel mode (all at once) func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, chaosDetails *types.ChaosDetails, eventsDetails *types.EventDetails, resultDetails *types.ResultDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectKafkaPodDeleteFaultInParallelMode") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectKafkaPodDeleteFaultInParallelMode", + trace.WithAttributes( + attribute.Int("chaos.duration", experimentsDetails.ChaoslibDetail.ChaosDuration), + attribute.String("chaos.interval", experimentsDetails.ChaoslibDetail.ChaosInterval), + ), + ) defer span.End() // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { diff --git a/chaoslib/litmus/kubelet-service-kill/lib/kubelet-service-kill.go b/chaoslib/litmus/kubelet-service-kill/lib/kubelet-service-kill.go index 350a8b390..45dba409a 100644 --- a/chaoslib/litmus/kubelet-service-kill/lib/kubelet-service-kill.go +++ b/chaoslib/litmus/kubelet-service-kill/lib/kubelet-service-kill.go @@ -3,6 +3,8 @@ package lib import ( "context" "fmt" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" "strconv" "github.com/litmuschaos/litmus-go/pkg/cerrors" @@ -26,7 +28,9 @@ import ( // PrepareKubeletKill contains prepration steps before chaos injection func PrepareKubeletKill(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PrepareKubeletServiceKillFault") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PrepareKubeletServiceKillFault", + trace.WithAttributes(attribute.Int("experiment.ramptime", experimentsDetails.RampTime)), + ) defer span.End() var err error @@ -50,6 +54,13 @@ func PrepareKubeletKill(ctx context.Context, experimentsDetails *experimentTypes common.WaitForDuration(experimentsDetails.RampTime) } + span.SetAttributes( + attribute.Int("chaos.duration", experimentsDetails.ChaosDuration), + attribute.String("chaos.namespace", experimentsDetails.ChaosNamespace), + attribute.String("node.name", experimentsDetails.TargetNode), + attribute.String("node.label", experimentsDetails.NodeLabel), + ) + if experimentsDetails.EngineName != "" { msg := "Injecting " + experimentsDetails.ExperimentName + " chaos on " + experimentsDetails.TargetNode + " node" types.SetEngineEventAttributes(eventsDetails, types.ChaosInject, msg, "Normal", chaosDetails) @@ -204,10 +215,15 @@ func createHelperPod(ctx context.Context, experimentsDetails *experimentTypes.Ex helperPod.Spec.Volumes = append(helperPod.Spec.Volumes, common.GetSidecarVolumes(chaosDetails)...) } - _, err := clients.KubeClient.CoreV1().Pods(experimentsDetails.ChaosNamespace).Create(context.Background(), helperPod, v1.CreateOptions{}) + createdHelperPod, err := clients.KubeClient.CoreV1().Pods(experimentsDetails.ChaosNamespace).Create(context.Background(), helperPod, v1.CreateOptions{}) if err != nil { return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("unable to create helper pod: %s", err.Error())} } + span.SetAttributes( + attribute.String("helper.pod.name", createdHelperPod.Name), + attribute.String("helper.image.name", createdHelperPod.Spec.Containers[0].Image), + ) + return nil } diff --git a/chaoslib/litmus/network-chaos/helper/netem.go b/chaoslib/litmus/network-chaos/helper/netem.go index b5d200c24..5a445c2f8 100644 --- a/chaoslib/litmus/network-chaos/helper/netem.go +++ b/chaoslib/litmus/network-chaos/helper/netem.go @@ -8,6 +8,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/telemetry" "github.com/palantir/stacktrace" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/attribute" "os" "os/exec" "os/signal" @@ -70,6 +71,12 @@ func Helper(ctx context.Context, clients clients.ClientSets) { // Set the chaos result uid result.SetResultUID(&resultDetails, clients, &chaosDetails) + span.SetAttributes( + attribute.String("container.runtime", experimentsDetails.ContainerRuntime), + attribute.String("network.interface", experimentsDetails.NetworkInterface), + attribute.String("tc.image.name", "gaiadocker/iproute2"), + ) + err := preparePodNetworkChaos(&experimentsDetails, clients, &eventsDetails, &chaosDetails, &resultDetails) if err != nil { // update failstep inside chaosresult diff --git a/chaoslib/litmus/network-chaos/lib/corruption/corruption.go b/chaoslib/litmus/network-chaos/lib/corruption/corruption.go index 4d78450da..86bb4f61d 100644 --- a/chaoslib/litmus/network-chaos/lib/corruption/corruption.go +++ b/chaoslib/litmus/network-chaos/lib/corruption/corruption.go @@ -2,6 +2,8 @@ package corruption import ( "context" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" network_chaos "github.com/litmuschaos/litmus-go/chaoslib/litmus/network-chaos/lib" "github.com/litmuschaos/litmus-go/pkg/clients" @@ -13,7 +15,9 @@ import ( // PodNetworkCorruptionChaos contains the steps to prepare and inject chaos func PodNetworkCorruptionChaos(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PreparePodNetworkCorruptionFault") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PreparePodNetworkCorruptionFault", + trace.WithAttributes(attribute.Int("experiment.ramptime", experimentsDetails.RampTime)), + ) defer span.End() args := "corrupt " + experimentsDetails.NetworkPacketCorruptionPercentage diff --git a/chaoslib/litmus/network-chaos/lib/duplication/duplication.go b/chaoslib/litmus/network-chaos/lib/duplication/duplication.go index 9ceae9fe6..ef9c8c1eb 100644 --- a/chaoslib/litmus/network-chaos/lib/duplication/duplication.go +++ b/chaoslib/litmus/network-chaos/lib/duplication/duplication.go @@ -2,6 +2,8 @@ package duplication import ( "context" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" network_chaos "github.com/litmuschaos/litmus-go/chaoslib/litmus/network-chaos/lib" "github.com/litmuschaos/litmus-go/pkg/clients" @@ -13,7 +15,9 @@ import ( // PodNetworkDuplicationChaos contains the steps to prepare and inject chaos func PodNetworkDuplicationChaos(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PreparePodNetworkDuplicationFault") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PreparePodNetworkDuplicationFault", + trace.WithAttributes(attribute.Int("experiment.ramptime", experimentsDetails.RampTime)), + ) defer span.End() args := "duplicate " + experimentsDetails.NetworkPacketDuplicationPercentage diff --git a/chaoslib/litmus/network-chaos/lib/latency/latency.go b/chaoslib/litmus/network-chaos/lib/latency/latency.go index e3f849fbb..62bdecd1d 100644 --- a/chaoslib/litmus/network-chaos/lib/latency/latency.go +++ b/chaoslib/litmus/network-chaos/lib/latency/latency.go @@ -2,6 +2,8 @@ package latency import ( "context" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" "strconv" network_chaos "github.com/litmuschaos/litmus-go/chaoslib/litmus/network-chaos/lib" @@ -14,7 +16,9 @@ import ( // PodNetworkLatencyChaos contains the steps to prepare and inject chaos func PodNetworkLatencyChaos(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PreparePodNetworkLatencyFault") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PreparePodNetworkLatencyFault", + trace.WithAttributes(attribute.Int("experiment.ramptime", experimentsDetails.RampTime)), + ) defer span.End() args := "delay " + strconv.Itoa(experimentsDetails.NetworkLatency) + "ms " + strconv.Itoa(experimentsDetails.Jitter) + "ms" diff --git a/chaoslib/litmus/network-chaos/lib/loss/loss.go b/chaoslib/litmus/network-chaos/lib/loss/loss.go index 336c57a66..edaa6ac52 100644 --- a/chaoslib/litmus/network-chaos/lib/loss/loss.go +++ b/chaoslib/litmus/network-chaos/lib/loss/loss.go @@ -2,6 +2,8 @@ package loss import ( "context" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" network_chaos "github.com/litmuschaos/litmus-go/chaoslib/litmus/network-chaos/lib" "github.com/litmuschaos/litmus-go/pkg/clients" @@ -13,7 +15,9 @@ import ( // PodNetworkLossChaos contains the steps to prepare and inject chaos func PodNetworkLossChaos(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PreparePodNetworkLossFault") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PreparePodNetworkLossFault", + trace.WithAttributes(attribute.Int("experiment.ramptime", experimentsDetails.RampTime)), + ) defer span.End() args := "loss " + experimentsDetails.NetworkPacketLossPercentage diff --git a/chaoslib/litmus/network-chaos/lib/network-chaos.go b/chaoslib/litmus/network-chaos/lib/network-chaos.go index dc73645f8..f165ae8e1 100644 --- a/chaoslib/litmus/network-chaos/lib/network-chaos.go +++ b/chaoslib/litmus/network-chaos/lib/network-chaos.go @@ -3,6 +3,8 @@ package lib import ( "context" "fmt" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" "net" "os" "strconv" @@ -88,7 +90,12 @@ func PrepareAndInjectChaos(ctx context.Context, experimentsDetails *experimentTy // injectChaosInSerialMode inject the network chaos in all target application serially (one by one) func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, targetPodList apiv1.PodList, clients clients.ClientSets, chaosDetails *types.ChaosDetails, args string, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectPodNetworkFaultInSerialMode") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectPodNetworkFaultInSerialMode", + trace.WithAttributes( + attribute.Int("chaos.duration", experimentsDetails.ChaosDuration), + attribute.String("chaos.namespace", experimentsDetails.ChaosNamespace), + ), + ) defer span.End() // run the probes during chaos @@ -147,7 +154,12 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // injectChaosInParallelMode inject the network chaos in all target application in parallel mode (all at once) func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, targetPodList apiv1.PodList, clients clients.ClientSets, chaosDetails *types.ChaosDetails, args string, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectPodNetworkFaultInParallelMode") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectPodNetworkFaultInParallelMode", + trace.WithAttributes( + attribute.Int("chaos.duration", experimentsDetails.ChaosDuration), + attribute.String("chaos.namespace", experimentsDetails.ChaosNamespace), + ), + ) defer span.End() var err error @@ -275,10 +287,15 @@ func createHelperPod(ctx context.Context, experimentsDetails *experimentTypes.Ex helperPod.Spec.Volumes = append(helperPod.Spec.Volumes, common.GetSidecarVolumes(chaosDetails)...) } - _, err := clients.KubeClient.CoreV1().Pods(experimentsDetails.ChaosNamespace).Create(context.Background(), helperPod, v1.CreateOptions{}) + createdHelperPod, err := clients.KubeClient.CoreV1().Pods(experimentsDetails.ChaosNamespace).Create(context.Background(), helperPod, v1.CreateOptions{}) if err != nil { return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("unable to create helper pod: %s", err.Error())} } + span.SetAttributes( + attribute.String("helper.pod.name", createdHelperPod.Name), + attribute.String("helper.image.name", createdHelperPod.Spec.Containers[0].Image), + ) + return nil } diff --git a/chaoslib/litmus/node-cpu-hog/lib/node-cpu-hog.go b/chaoslib/litmus/node-cpu-hog/lib/node-cpu-hog.go index 57bd9adb0..70c371828 100644 --- a/chaoslib/litmus/node-cpu-hog/lib/node-cpu-hog.go +++ b/chaoslib/litmus/node-cpu-hog/lib/node-cpu-hog.go @@ -3,6 +3,8 @@ package lib import ( "context" "fmt" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" "strconv" "strings" @@ -27,7 +29,9 @@ import ( // PrepareNodeCPUHog contains preparation steps before chaos injection func PrepareNodeCPUHog(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PrepareNodeCPUHogFault") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PrepareNodeCPUHogFault", + trace.WithAttributes(attribute.Int("experiment.ramptime", experimentsDetails.RampTime)), + ) defer span.End() //set up the tunables if provided in range @@ -87,7 +91,14 @@ func PrepareNodeCPUHog(ctx context.Context, experimentsDetails *experimentTypes. // injectChaosInSerialMode stress the cpu of all the target nodes serially (one by one) func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, targetNodeList []string, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectNodeCPUHogFaultInSerialMode") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectNodeCPUHogFaultInSerialMode", + trace.WithAttributes( + attribute.Int("chaos.duration", experimentsDetails.ChaosDuration), + attribute.String("chaos.namespace", experimentsDetails.ChaosNamespace), + attribute.String("node.name", experimentsDetails.TargetNodes), + attribute.String("node.label", experimentsDetails.NodeLabel), + ), + ) defer span.End() nodeCPUCores := experimentsDetails.NodeCPUcores @@ -156,7 +167,14 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // injectChaosInParallelMode stress the cpu of all the target nodes in parallel mode (all at once) func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, targetNodeList []string, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectNodeCPUHogFaultInParallelMode") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectNodeCPUHogFaultInParallelMode", + trace.WithAttributes( + attribute.Int("chaos.duration", experimentsDetails.ChaosDuration), + attribute.String("chaos.namespace", experimentsDetails.ChaosNamespace), + attribute.String("node.name", experimentsDetails.TargetNodes), + attribute.String("node.label", experimentsDetails.NodeLabel), + ), + ) defer span.End() nodeCPUCores := experimentsDetails.NodeCPUcores @@ -282,10 +300,15 @@ func createHelperPod(ctx context.Context, experimentsDetails *experimentTypes.Ex helperPod.Spec.Volumes = append(helperPod.Spec.Volumes, common.GetSidecarVolumes(chaosDetails)...) } - _, err := clients.KubeClient.CoreV1().Pods(experimentsDetails.ChaosNamespace).Create(context.Background(), helperPod, v1.CreateOptions{}) + createdHelperPod, err := clients.KubeClient.CoreV1().Pods(experimentsDetails.ChaosNamespace).Create(context.Background(), helperPod, v1.CreateOptions{}) if err != nil { return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("unable to create helper pod: %s", err.Error())} } + span.SetAttributes( + attribute.String("helper.pod.name", createdHelperPod.Name), + attribute.String("helper.image.name", createdHelperPod.Spec.Containers[0].Image), + ) + return nil } diff --git a/chaoslib/litmus/node-drain/lib/node-drain.go b/chaoslib/litmus/node-drain/lib/node-drain.go index b46dd34bf..476ef5127 100644 --- a/chaoslib/litmus/node-drain/lib/node-drain.go +++ b/chaoslib/litmus/node-drain/lib/node-drain.go @@ -3,6 +3,8 @@ package lib import ( "context" "fmt" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" "os" "os/exec" "os/signal" @@ -36,7 +38,9 @@ var ( // PrepareNodeDrain contains the preparation steps before chaos injection func PrepareNodeDrain(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PrepareNodeDrainFault") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PrepareNodeDrainFault", + trace.WithAttributes(attribute.Int("experiment.ramptime", experimentsDetails.RampTime)), + ) defer span.End() // inject channel is used to transmit signal notifications. @@ -131,7 +135,14 @@ func PrepareNodeDrain(ctx context.Context, experimentsDetails *experimentTypes.E // drainNode drain the target node func drainNode(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectNodeDrainFault") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectNodeDrainFault", + trace.WithAttributes( + attribute.Int("chaos.duration", experimentsDetails.ChaosDuration), + attribute.String("chaos.namespace", experimentsDetails.ChaosNamespace), + attribute.String("node.name", experimentsDetails.TargetNode), + attribute.String("node.label", experimentsDetails.NodeLabel), + ), + ) defer span.End() select { diff --git a/chaoslib/litmus/node-io-stress/lib/node-io-stress.go b/chaoslib/litmus/node-io-stress/lib/node-io-stress.go index 94b269a81..8fcda01d4 100644 --- a/chaoslib/litmus/node-io-stress/lib/node-io-stress.go +++ b/chaoslib/litmus/node-io-stress/lib/node-io-stress.go @@ -3,6 +3,8 @@ package lib import ( "context" "fmt" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" "strconv" "strings" @@ -27,7 +29,9 @@ import ( // PrepareNodeIOStress contains preparation steps before chaos injection func PrepareNodeIOStress(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PrepareNodeIOStressFault") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PrepareNodeIOStressFault", + trace.WithAttributes(attribute.Int("experiment.ramptime", experimentsDetails.RampTime)), + ) defer span.End() //set up the tunables if provided in range setChaosTunables(experimentsDetails) @@ -87,7 +91,16 @@ func PrepareNodeIOStress(ctx context.Context, experimentsDetails *experimentType // injectChaosInSerialMode stress the io of all the target nodes serially (one by one) func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, targetNodeList []string, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectNodeIOStressFaultInSerialMode") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectNodeIOStressFaultInSerialMode", + trace.WithAttributes( + attribute.Int("chaos.duration", experimentsDetails.ChaosDuration), + attribute.String("chaos.namespace", experimentsDetails.ChaosNamespace), + attribute.String("node.name", experimentsDetails.TargetNodes), + attribute.String("node.label", experimentsDetails.NodeLabel), + attribute.String("io.worker.count", experimentsDetails.NumberOfWorkers), + attribute.String("vm.worker.count", experimentsDetails.VMWorkers), + ), + ) defer span.End() // run the probes during chaos @@ -147,7 +160,16 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // injectChaosInParallelMode stress the io of all the target nodes in parallel mode (all at once) func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, targetNodeList []string, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectNodeIOStressFaultInParallelMode") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectNodeIOStressFaultInParallelMode", + trace.WithAttributes( + attribute.Int("chaos.duration", experimentsDetails.ChaosDuration), + attribute.String("chaos.namespace", experimentsDetails.ChaosNamespace), + attribute.String("node.name", experimentsDetails.TargetNodes), + attribute.String("node.label", experimentsDetails.NodeLabel), + attribute.String("io.worker.count", experimentsDetails.NumberOfWorkers), + attribute.String("vm.worker.count", experimentsDetails.VMWorkers), + ), + ) defer span.End() // run the probes during chaos @@ -249,10 +271,15 @@ func createHelperPod(ctx context.Context, experimentsDetails *experimentTypes.Ex helperPod.Spec.Volumes = append(helperPod.Spec.Volumes, common.GetSidecarVolumes(chaosDetails)...) } - _, err := clients.KubeClient.CoreV1().Pods(experimentsDetails.ChaosNamespace).Create(context.Background(), helperPod, v1.CreateOptions{}) + createdHelperPod, err := clients.KubeClient.CoreV1().Pods(experimentsDetails.ChaosNamespace).Create(context.Background(), helperPod, v1.CreateOptions{}) if err != nil { return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("unable to create helper pod: %s", err.Error())} } + span.SetAttributes( + attribute.String("helper.pod.name", createdHelperPod.Name), + attribute.String("helper.image.name", createdHelperPod.Spec.Containers[0].Image), + ) + return nil } diff --git a/chaoslib/litmus/node-memory-hog/lib/node-memory-hog.go b/chaoslib/litmus/node-memory-hog/lib/node-memory-hog.go index 6562d817f..a011f35bd 100644 --- a/chaoslib/litmus/node-memory-hog/lib/node-memory-hog.go +++ b/chaoslib/litmus/node-memory-hog/lib/node-memory-hog.go @@ -3,6 +3,8 @@ package lib import ( "context" "fmt" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" "strconv" "strings" @@ -28,7 +30,9 @@ import ( // PrepareNodeMemoryHog contains preparation steps before chaos injection func PrepareNodeMemoryHog(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PrepareNodeMemoryHogFault") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PrepareNodeMemoryHogFault", + trace.WithAttributes(attribute.Int("experiment.ramptime", experimentsDetails.RampTime)), + ) defer span.End() //set up the tunables if provided in range @@ -89,7 +93,15 @@ func PrepareNodeMemoryHog(ctx context.Context, experimentsDetails *experimentTyp // injectChaosInSerialMode stress the memory of all the target nodes serially (one by one) func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, targetNodeList []string, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectNodeMemoryHogFaultInSerialMode") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectNodeMemoryHogFaultInSerialMode", + trace.WithAttributes( + attribute.Int("chaos.duration", experimentsDetails.ChaosDuration), + attribute.String("chaos.namespace", experimentsDetails.ChaosNamespace), + attribute.String("node.name", experimentsDetails.TargetNodes), + attribute.String("node.label", experimentsDetails.NodeLabel), + attribute.String("io.worker.count", experimentsDetails.NumberOfWorkers), + ), + ) defer span.End() // run the probes during chaos @@ -165,7 +177,15 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // injectChaosInParallelMode stress the memory all the target nodes in parallel mode (all at once) func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, targetNodeList []string, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectNodeMemoryHogFaultInParallelMode") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectNodeMemoryHogFaultInParallelMode", + trace.WithAttributes( + attribute.Int("chaos.duration", experimentsDetails.ChaosDuration), + attribute.String("chaos.namespace", experimentsDetails.ChaosNamespace), + attribute.String("node.name", experimentsDetails.TargetNodes), + attribute.String("node.label", experimentsDetails.NodeLabel), + attribute.String("io.worker.count", experimentsDetails.NumberOfWorkers), + ), + ) defer span.End() // run the probes during chaos @@ -365,10 +385,15 @@ func createHelperPod(ctx context.Context, experimentsDetails *experimentTypes.Ex helperPod.Spec.Volumes = append(helperPod.Spec.Volumes, common.GetSidecarVolumes(chaosDetails)...) } - _, err := clients.KubeClient.CoreV1().Pods(experimentsDetails.ChaosNamespace).Create(context.Background(), helperPod, v1.CreateOptions{}) + createdHelperPod, err := clients.KubeClient.CoreV1().Pods(experimentsDetails.ChaosNamespace).Create(context.Background(), helperPod, v1.CreateOptions{}) if err != nil { return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("unable to create helper pod: %s", err.Error())} } + span.SetAttributes( + attribute.String("helper.pod.name", createdHelperPod.Name), + attribute.String("helper.image.name", createdHelperPod.Spec.Containers[0].Image), + ) + return nil } diff --git a/chaoslib/litmus/node-restart/lib/node-restart.go b/chaoslib/litmus/node-restart/lib/node-restart.go index d4750912e..66cba727d 100644 --- a/chaoslib/litmus/node-restart/lib/node-restart.go +++ b/chaoslib/litmus/node-restart/lib/node-restart.go @@ -3,6 +3,8 @@ package lib import ( "context" "fmt" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" "strconv" "strings" @@ -41,7 +43,9 @@ const ( // PrepareNodeRestart contains preparation steps before chaos injection func PrepareNodeRestart(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PrepareNodeRestartFault") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PrepareNodeRestartFault", + trace.WithAttributes(attribute.Int("experiment.ramptime", experimentsDetails.RampTime)), + ) defer span.End() //Select the node @@ -74,6 +78,14 @@ func PrepareNodeRestart(ctx context.Context, experimentsDetails *experimentTypes common.WaitForDuration(experimentsDetails.RampTime) } + span.SetAttributes( + attribute.Int("chaos.duration", experimentsDetails.ChaosDuration), + attribute.String("chaos.namespace", experimentsDetails.ChaosNamespace), + attribute.String("node.name", experimentsDetails.TargetNode), + attribute.String("node.label", experimentsDetails.NodeLabel), + attribute.String("node.ip", experimentsDetails.TargetNodeIP), + ) + if experimentsDetails.EngineName != "" { msg := "Injecting " + experimentsDetails.ExperimentName + " chaos on " + experimentsDetails.TargetNode + " node" types.SetEngineEventAttributes(eventsDetails, types.ChaosInject, msg, "Normal", chaosDetails) @@ -214,10 +226,15 @@ func createHelperPod(ctx context.Context, experimentsDetails *experimentTypes.Ex helperPod.Spec.Volumes = append(helperPod.Spec.Volumes, common.GetSidecarVolumes(chaosDetails)...) } - _, err := clients.KubeClient.CoreV1().Pods(experimentsDetails.ChaosNamespace).Create(context.Background(), helperPod, v1.CreateOptions{}) + createdHelperPod, err := clients.KubeClient.CoreV1().Pods(experimentsDetails.ChaosNamespace).Create(context.Background(), helperPod, v1.CreateOptions{}) if err != nil { return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("unable to create helper pod: %s", err.Error())} } + span.SetAttributes( + attribute.String("helper.pod.name", createdHelperPod.Name), + attribute.String("helper.image.name", createdHelperPod.Spec.Containers[0].Image), + ) + return nil } diff --git a/chaoslib/litmus/node-taint/lib/node-taint.go b/chaoslib/litmus/node-taint/lib/node-taint.go index 8efd0a289..3ea564468 100644 --- a/chaoslib/litmus/node-taint/lib/node-taint.go +++ b/chaoslib/litmus/node-taint/lib/node-taint.go @@ -3,6 +3,8 @@ package lib import ( "context" "fmt" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" "os" "os/signal" "strings" @@ -33,7 +35,9 @@ var ( // PrepareNodeTaint contains the preparation steps before chaos injection func PrepareNodeTaint(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PrepareNodeTaintFault") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PrepareNodeTaintFault", + trace.WithAttributes(attribute.Int("experiment.ramptime", experimentsDetails.RampTime)), + ) defer span.End() // inject channel is used to transmit signal notifications. @@ -123,7 +127,15 @@ func PrepareNodeTaint(ctx context.Context, experimentsDetails *experimentTypes.E // taintNode taint the application node func taintNode(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectNodeTaintFault") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectNodeTaintFault", + trace.WithAttributes( + attribute.Int("chaos.duration", experimentsDetails.ChaosDuration), + attribute.String("chaos.namespace", experimentsDetails.ChaosNamespace), + attribute.String("node.name", experimentsDetails.TargetNode), + attribute.String("node.label", experimentsDetails.NodeLabel), + attribute.String("taint.label", experimentsDetails.Taints), + ), + ) defer span.End() // get the taint labels & effect diff --git a/chaoslib/litmus/pod-autoscaler/lib/pod-autoscaler.go b/chaoslib/litmus/pod-autoscaler/lib/pod-autoscaler.go index feefad7f6..e6d542e91 100644 --- a/chaoslib/litmus/pod-autoscaler/lib/pod-autoscaler.go +++ b/chaoslib/litmus/pod-autoscaler/lib/pod-autoscaler.go @@ -3,6 +3,8 @@ package lib import ( "context" "fmt" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" "os" "os/signal" "strings" @@ -36,7 +38,9 @@ var ( // PreparePodAutoscaler contains the preparation steps and chaos injection steps func PreparePodAutoscaler(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PreparePodAutoscalerFault") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PreparePodAutoscalerFault", + trace.WithAttributes(attribute.Int("experiment.ramptime", experimentsDetails.RampTime)), + ) defer span.End() //Waiting for the ramp time before chaos injection diff --git a/chaoslib/litmus/pod-cpu-hog-exec/lib/pod-cpu-hog-exec.go b/chaoslib/litmus/pod-cpu-hog-exec/lib/pod-cpu-hog-exec.go index f28be58f7..750a6aa59 100644 --- a/chaoslib/litmus/pod-cpu-hog-exec/lib/pod-cpu-hog-exec.go +++ b/chaoslib/litmus/pod-cpu-hog-exec/lib/pod-cpu-hog-exec.go @@ -3,6 +3,8 @@ package lib import ( "context" "fmt" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" "os" "os/signal" "strings" @@ -31,7 +33,9 @@ var inject chan os.Signal // PrepareCPUExecStress contains the chaos preparation and injection steps func PrepareCPUExecStress(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PreparePodCPUHogExecFault") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PreparePodCPUHogExecFault", + trace.WithAttributes(attribute.Int("experiment.ramptime", experimentsDetails.RampTime)), + ) defer span.End() // inject channel is used to transmit signal notifications. inject = make(chan os.Signal, 1) @@ -58,7 +62,10 @@ func PrepareCPUExecStress(ctx context.Context, experimentsDetails *experimentTyp // stressCPU Uses the REST API to exec into the target container of the target pod // The function will be constantly increasing the CPU utilisation until it reaches the maximum available or allowed number. // Using the TOTAL_CHAOS_DURATION we will need to specify for how long this experiment will last -func stressCPU(experimentsDetails *experimentTypes.ExperimentDetails, podName, ns string, clients clients.ClientSets, stressErr chan error) { +func stressCPU(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, podName, ns string, clients clients.ClientSets, stressErr chan error) { + span := trace.SpanFromContext(ctx) + span.SetAttributes(attribute.String("inject.command_line", experimentsDetails.ChaosInjectCmd)) + // It will contain all the pod & container details required for exec command execCommandDetails := litmusexec.PodDetails{} command := []string{"/bin/sh", "-c", experimentsDetails.ChaosInjectCmd} @@ -105,7 +112,13 @@ func experimentCPU(ctx context.Context, experimentsDetails *experimentTypes.Expe // injectChaosInSerialMode stressed the cpu of all target application serially (one by one) func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, targetPodList corev1.PodList, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectPodCPUHogExecFaultInSerialMode") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectPodCPUHogExecFaultInSerialMode", + trace.WithAttributes( + attribute.Int("chaos.duration", experimentsDetails.ChaosDuration), + attribute.Int("chaos.interval", experimentsDetails.ChaosInterval), + attribute.String("chaos.namespace", experimentsDetails.ChaosNamespace), + ), + ) defer span.End() // run the probes during chaos @@ -152,7 +165,7 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment }) for i := 0; i < experimentsDetails.CPUcores; i++ { - go stressCPU(experimentsDetails, pod.Name, pod.Namespace, clients, stressErr) + go stressCPU(ctx, experimentsDetails, pod.Name, pod.Namespace, clients, stressErr) } common.SetTargets(pod.Name, "injected", "pod", chaosDetails) @@ -176,7 +189,7 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment } case <-signChan: log.Info("[Chaos]: Revert Started") - if err := killStressCPUSerial(experimentsDetails, pod.Name, pod.Namespace, clients, chaosDetails); err != nil { + if err := killStressCPUSerial(ctx, experimentsDetails, pod.Name, pod.Namespace, clients, chaosDetails); err != nil { log.Errorf("Error in Kill stress after abortion, err: %v", err) } // updating the chaosresult after stopped @@ -194,7 +207,7 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment break loop } } - if err := killStressCPUSerial(experimentsDetails, pod.Name, pod.Namespace, clients, chaosDetails); err != nil { + if err := killStressCPUSerial(ctx, experimentsDetails, pod.Name, pod.Namespace, clients, chaosDetails); err != nil { return stacktrace.Propagate(err, "could not revert cpu stress") } } @@ -204,7 +217,13 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // injectChaosInParallelMode stressed the cpu of all target application in parallel mode (all at once) func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, targetPodList corev1.PodList, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectPodCPUHogExecFaultInParallelMode") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectPodCPUHogExecFaultInParallelMode", + trace.WithAttributes( + attribute.Int("chaos.duration", experimentsDetails.ChaosDuration), + attribute.Int("chaos.interval", experimentsDetails.ChaosInterval), + attribute.String("chaos.namespace", experimentsDetails.ChaosNamespace), + ), + ) defer span.End() // creating err channel to receive the error from the go routine @@ -249,7 +268,7 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime "CPU CORE": experimentsDetails.CPUcores, }) for i := 0; i < experimentsDetails.CPUcores; i++ { - go stressCPU(experimentsDetails, pod.Name, pod.Namespace, clients, stressErr) + go stressCPU(ctx, experimentsDetails, pod.Name, pod.Namespace, clients, stressErr) } common.SetTargets(pod.Name, "injected", "pod", chaosDetails) } @@ -274,7 +293,7 @@ loop: } case <-signChan: log.Info("[Chaos]: Revert Started") - if err := killStressCPUParallel(experimentsDetails, targetPodList, clients, chaosDetails); err != nil { + if err := killStressCPUParallel(ctx, experimentsDetails, targetPodList, clients, chaosDetails); err != nil { log.Errorf("Error in Kill stress after abortion, err: %v", err) } // updating the chaosresult after stopped @@ -292,13 +311,16 @@ loop: break loop } } - return killStressCPUParallel(experimentsDetails, targetPodList, clients, chaosDetails) + return killStressCPUParallel(ctx, experimentsDetails, targetPodList, clients, chaosDetails) } // killStressCPUSerial function to kill a stress process running inside target container // // Triggered by either timeout of chaos duration or termination of the experiment -func killStressCPUSerial(experimentsDetails *experimentTypes.ExperimentDetails, podName, ns string, clients clients.ClientSets, chaosDetails *types.ChaosDetails) error { +func killStressCPUSerial(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, podName, ns string, clients clients.ClientSets, chaosDetails *types.ChaosDetails) error { + span := trace.SpanFromContext(ctx) + span.SetAttributes(attribute.String("recover.command_line", experimentsDetails.ChaosInjectCmd)) + // It will contain all the pod & container details required for exec command execCommandDetails := litmusexec.PodDetails{} @@ -315,10 +337,10 @@ func killStressCPUSerial(experimentsDetails *experimentTypes.ExperimentDetails, // killStressCPUParallel function to kill all the stress process running inside target container // Triggered by either timeout of chaos duration or termination of the experiment -func killStressCPUParallel(experimentsDetails *experimentTypes.ExperimentDetails, targetPodList corev1.PodList, clients clients.ClientSets, chaosDetails *types.ChaosDetails) error { +func killStressCPUParallel(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, targetPodList corev1.PodList, clients clients.ClientSets, chaosDetails *types.ChaosDetails) error { var errList []string for _, pod := range targetPodList.Items { - if err := killStressCPUSerial(experimentsDetails, pod.Name, pod.Namespace, clients, chaosDetails); err != nil { + if err := killStressCPUSerial(ctx, experimentsDetails, pod.Name, pod.Namespace, clients, chaosDetails); err != nil { errList = append(errList, err.Error()) } } diff --git a/chaoslib/litmus/pod-delete/lib/pod-delete.go b/chaoslib/litmus/pod-delete/lib/pod-delete.go index 127e53b1f..92f3c19e7 100644 --- a/chaoslib/litmus/pod-delete/lib/pod-delete.go +++ b/chaoslib/litmus/pod-delete/lib/pod-delete.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" "strconv" "strings" "time" @@ -27,7 +28,9 @@ import ( // PreparePodDelete contains the preparation steps before chaos injection func PreparePodDelete(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PreparePodDeleteFault") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PreparePodDeleteFault", + trace.WithAttributes(attribute.Int("experiment.ramptime", experimentsDetails.RampTime)), + ) defer span.End() //Waiting for the ramp time before chaos injection @@ -67,7 +70,13 @@ func PreparePodDelete(ctx context.Context, experimentsDetails *experimentTypes.E // injectChaosInSerialMode delete the target application pods serial mode(one by one) func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, chaosDetails *types.ChaosDetails, eventsDetails *types.EventDetails, resultDetails *types.ResultDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectPodDeleteFaultInSerialMode") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectPodDeleteFaultInSerialMode", + trace.WithAttributes( + attribute.Int("chaos.duration", experimentsDetails.ChaosDuration), + attribute.String("chaos.interval", experimentsDetails.ChaosInterval), + attribute.String("chaos.namespace", experimentsDetails.ChaosNamespace), + ), + ) defer span.End() // run the probes during chaos @@ -114,10 +123,6 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment //Deleting the application pod for _, pod := range targetPodList.Items { - span.SetAttributes( - attribute.String("pod.name", pod.Name), - attribute.String("pod.namespace", pod.Namespace), - ) log.InfoWithValues("[Info]: Killing the following pods", logrus.Fields{ "PodName": pod.Name}) @@ -170,7 +175,13 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // injectChaosInParallelMode delete the target application pods in parallel mode (all at once) func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, chaosDetails *types.ChaosDetails, eventsDetails *types.EventDetails, resultDetails *types.ResultDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectPodDeleteFaultInParallelMode") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectPodDeleteFaultInParallelMode", + trace.WithAttributes( + attribute.Int("chaos.duration", experimentsDetails.ChaosDuration), + attribute.String("chaos.interval", experimentsDetails.ChaosInterval), + attribute.String("chaos.namespace", experimentsDetails.ChaosNamespace), + ), + ) defer span.End() // run the probes during chaos @@ -216,10 +227,6 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime //Deleting the application pod for _, pod := range targetPodList.Items { - span.SetAttributes( - attribute.String("pod.name", pod.Name), - attribute.String("pod.namespace", pod.Namespace), - ) log.InfoWithValues("[Info]: Killing the following pods", logrus.Fields{ "PodName": pod.Name}) @@ -261,8 +268,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime } } duration = int(time.Since(ChaosStartTimeStamp).Seconds()) - } + } log.Infof("[Completion]: %v chaos is done", experimentsDetails.ExperimentName) return nil diff --git a/chaoslib/litmus/pod-dns-chaos/helper/dnschaos.go b/chaoslib/litmus/pod-dns-chaos/helper/dnschaos.go index 04b895f9b..01719268e 100644 --- a/chaoslib/litmus/pod-dns-chaos/helper/dnschaos.go +++ b/chaoslib/litmus/pod-dns-chaos/helper/dnschaos.go @@ -8,6 +8,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/telemetry" "github.com/palantir/stacktrace" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/attribute" "os" "os/exec" "os/signal" @@ -69,6 +70,12 @@ func Helper(ctx context.Context, clients clients.ClientSets) { // Set the chaos result uid result.SetResultUID(&resultDetails, clients, &chaosDetails) + span.SetAttributes( + attribute.String("container.runtime", experimentsDetails.ContainerRuntime), + attribute.String("dns.spoof.map", experimentsDetails.SpoofMap), + attribute.String("dns.match.scheme", experimentsDetails.MatchScheme), + ) + if err := preparePodDNSChaos(&experimentsDetails, clients, &eventsDetails, &chaosDetails, &resultDetails); err != nil { // update failstep inside chaosresult if resultErr := result.UpdateFailedStepFromHelper(&resultDetails, &chaosDetails, clients, err); resultErr != nil { diff --git a/chaoslib/litmus/pod-dns-chaos/lib/pod-dns-chaos.go b/chaoslib/litmus/pod-dns-chaos/lib/pod-dns-chaos.go index ce608e564..919fb467f 100644 --- a/chaoslib/litmus/pod-dns-chaos/lib/pod-dns-chaos.go +++ b/chaoslib/litmus/pod-dns-chaos/lib/pod-dns-chaos.go @@ -3,6 +3,8 @@ package lib import ( "context" "fmt" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" "os" "strconv" "strings" @@ -28,7 +30,9 @@ import ( // PrepareAndInjectChaos contains the preparation & injection steps func PrepareAndInjectChaos(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PreparePodDNSFault") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PreparePodDNSFault", + trace.WithAttributes(attribute.Int("experiment.ramptime", experimentsDetails.RampTime)), + ) defer span.End() // Get the target pod details for the chaos execution // if the target pod is not defined it will derive the random target pod list using pod affected percentage @@ -85,7 +89,12 @@ func PrepareAndInjectChaos(ctx context.Context, experimentsDetails *experimentTy // injectChaosInSerialMode inject the DNS Chaos in all target application serially (one by one) func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, targetPodList apiv1.PodList, clients clients.ClientSets, chaosDetails *types.ChaosDetails, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectPodDNSFaultInSerialMode") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectPodDNSFaultInSerialMode", + trace.WithAttributes( + attribute.Int("chaos.duration", experimentsDetails.ChaosDuration), + attribute.String("chaos.namespace", experimentsDetails.ChaosNamespace), + ), + ) defer span.End() // run the probes during chaos @@ -143,7 +152,12 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // injectChaosInParallelMode inject the DNS Chaos in all target application in parallel mode (all at once) func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, targetPodList apiv1.PodList, clients clients.ClientSets, chaosDetails *types.ChaosDetails, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectPodDNSFaultInParallelMode") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectPodDNSFaultInParallelMode", + trace.WithAttributes( + attribute.Int("chaos.duration", experimentsDetails.ChaosDuration), + attribute.String("chaos.namespace", experimentsDetails.ChaosNamespace), + ), + ) defer span.End() var err error @@ -264,10 +278,15 @@ func createHelperPod(ctx context.Context, experimentsDetails *experimentTypes.Ex helperPod.Spec.Volumes = append(helperPod.Spec.Volumes, common.GetSidecarVolumes(chaosDetails)...) } - _, err := clients.KubeClient.CoreV1().Pods(experimentsDetails.ChaosNamespace).Create(context.Background(), helperPod, v1.CreateOptions{}) + createdHelperPod, err := clients.KubeClient.CoreV1().Pods(experimentsDetails.ChaosNamespace).Create(context.Background(), helperPod, v1.CreateOptions{}) if err != nil { return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("unable to create helper pod: %s", err.Error())} } + span.SetAttributes( + attribute.String("helper.pod.name", createdHelperPod.Name), + attribute.String("helper.image.name", createdHelperPod.Spec.Containers[0].Image), + ) + return nil } diff --git a/chaoslib/litmus/pod-fio-stress/lib/pod-fio-stress.go b/chaoslib/litmus/pod-fio-stress/lib/pod-fio-stress.go index 56cecf6e1..cf59b7e03 100644 --- a/chaoslib/litmus/pod-fio-stress/lib/pod-fio-stress.go +++ b/chaoslib/litmus/pod-fio-stress/lib/pod-fio-stress.go @@ -3,6 +3,8 @@ package lib import ( "context" "fmt" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" "os" "os/signal" "strings" @@ -29,7 +31,9 @@ import ( // PrepareChaos contains the chaos preparation and injection steps func PrepareChaos(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PreparePodFIOStressFault") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PreparePodFIOStressFault", + trace.WithAttributes(attribute.Int("experiment.ramptime", experimentsDetails.RampTime)), + ) defer span.End() //Waiting for the ramp time before chaos injection @@ -52,7 +56,8 @@ func PrepareChaos(ctx context.Context, experimentsDetails *experimentTypes.Exper // stressStorage uses the REST API to exec into the target container of the target pod // The function will be constantly increasing the storage utilisation until it reaches the maximum available or allowed number. // Using the TOTAL_CHAOS_DURATION we will need to specify for how long this experiment will last -func stressStorage(experimentDetails *experimentTypes.ExperimentDetails, podName, ns string, clients clients.ClientSets, stressErr chan error) { +func stressStorage(ctx context.Context, experimentDetails *experimentTypes.ExperimentDetails, podName, ns string, clients clients.ClientSets, stressErr chan error) { + span := trace.SpanFromContext(ctx) log.Infof("The storage consumption is: %vM", experimentDetails.Size) @@ -62,6 +67,7 @@ func stressStorage(experimentDetails *experimentTypes.ExperimentDetails, podName if experimentDetails.GroupReporting { fioCmd += " --group_reporting" } + span.SetAttributes(attribute.String("inject.command_line", fioCmd)) log.Infof("Running the command:\n%v", fioCmd) command := []string{"/bin/sh", "-c", fioCmd} @@ -110,7 +116,13 @@ func experimentExecution(ctx context.Context, experimentsDetails *experimentType // injectChaosInSerialMode stressed the storage of all target application in serial mode (one by one) func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, targetPodList corev1.PodList, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectPodFIOStressFaultInSerialMode") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectPodFIOStressFaultInSerialMode", + trace.WithAttributes( + attribute.Int("chaos.duration", experimentsDetails.ChaosDuration), + attribute.Int("chaos.interval", experimentsDetails.ChaosInterval), + attribute.String("chaos.namespace", experimentsDetails.ChaosNamespace), + ), + ) defer span.End() // creating err channel to receive the error from the go routine @@ -142,7 +154,7 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment "Target Pod": pod.Name, "Space Consumption(MB)": experimentsDetails.Size, }) - go stressStorage(experimentsDetails, pod.Name, pod.Namespace, clients, stressErr) + go stressStorage(ctx, experimentsDetails, pod.Name, pod.Namespace, clients, stressErr) log.Infof("[Chaos]:Waiting for: %vs", experimentsDetails.ChaosDuration) @@ -168,7 +180,7 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment } case <-signChan: log.Info("[Chaos]: Revert Started") - if err := killStressSerial(experimentsDetails.TargetContainer, pod.Name, pod.Namespace, experimentsDetails.ChaosKillCmd, clients); err != nil { + if err := killStressSerial(ctx, experimentsDetails.TargetContainer, pod.Name, pod.Namespace, experimentsDetails.ChaosKillCmd, clients); err != nil { log.Errorf("Error in Kill stress after abortion, err: %v", err) } err := cerrors.Error{ErrorCode: cerrors.ErrorTypeExperimentAborted, Target: fmt.Sprintf("{podName: %s, namespace: %s, container: %s}", pod.Name, pod.Namespace, experimentsDetails.TargetContainer), Reason: "experiment is aborted"} @@ -185,7 +197,7 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment break loop } } - if err := killStressSerial(experimentsDetails.TargetContainer, pod.Name, pod.Namespace, experimentsDetails.ChaosKillCmd, clients); err != nil { + if err := killStressSerial(ctx, experimentsDetails.TargetContainer, pod.Name, pod.Namespace, experimentsDetails.ChaosKillCmd, clients); err != nil { return stacktrace.Propagate(err, "could not revert chaos") } } @@ -194,7 +206,13 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // injectChaosInParallelMode stressed the storage of all target application in parallel mode (all at once) func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, targetPodList corev1.PodList, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectPodFIOStressFaultInParallelMode") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectPodFIOStressFaultInParallelMode", + trace.WithAttributes( + attribute.Int("chaos.duration", experimentsDetails.ChaosDuration), + attribute.Int("chaos.interval", experimentsDetails.ChaosInterval), + attribute.String("chaos.namespace", experimentsDetails.ChaosNamespace), + ), + ) defer span.End() // creating err channel to receive the error from the go routine @@ -226,7 +244,7 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime "Target Pod": pod.Name, "Storage Consumption(MB)": experimentsDetails.Size, }) - go stressStorage(experimentsDetails, pod.Name, pod.Namespace, clients, stressErr) + go stressStorage(ctx, experimentsDetails, pod.Name, pod.Namespace, clients, stressErr) } log.Infof("[Chaos]:Waiting for: %vs", experimentsDetails.ChaosDuration) @@ -252,7 +270,7 @@ loop: } case <-signChan: log.Info("[Chaos]: Revert Started") - if err := killStressParallel(experimentsDetails.TargetContainer, targetPodList, experimentsDetails.ChaosKillCmd, clients); err != nil { + if err := killStressParallel(ctx, experimentsDetails.TargetContainer, targetPodList, experimentsDetails.ChaosKillCmd, clients); err != nil { log.Errorf("Error in Kill stress after abortion, err: %v", err) } err := cerrors.Error{ErrorCode: cerrors.ErrorTypeExperimentAborted, Reason: "experiment is aborted"} @@ -268,7 +286,7 @@ loop: break loop } } - if err := killStressParallel(experimentsDetails.TargetContainer, targetPodList, experimentsDetails.ChaosKillCmd, clients); err != nil { + if err := killStressParallel(ctx, experimentsDetails.TargetContainer, targetPodList, experimentsDetails.ChaosKillCmd, clients); err != nil { return stacktrace.Propagate(err, "could revert chaos") } @@ -278,7 +296,10 @@ loop: // killStressSerial function to kill a stress process running inside target container // // Triggered by either timeout of chaos duration or termination of the experiment -func killStressSerial(containerName, podName, namespace, KillCmd string, clients clients.ClientSets) error { +func killStressSerial(ctx context.Context, containerName, podName, namespace, KillCmd string, clients clients.ClientSets) error { + span := trace.SpanFromContext(ctx) + span.SetAttributes(attribute.String("recover.command_line", KillCmd)) + // It will contain all the pod & container details required for exec command execCommandDetails := litmusexec.PodDetails{} @@ -294,10 +315,10 @@ func killStressSerial(containerName, podName, namespace, KillCmd string, clients // killStressParallel function to kill all the stress process running inside target container // Triggered by either timeout of chaos duration or termination of the experiment -func killStressParallel(containerName string, targetPodList corev1.PodList, KillCmd string, clients clients.ClientSets) error { +func killStressParallel(ctx context.Context, containerName string, targetPodList corev1.PodList, KillCmd string, clients clients.ClientSets) error { var errList []string for _, pod := range targetPodList.Items { - if err := killStressSerial(containerName, pod.Name, pod.Namespace, KillCmd, clients); err != nil { + if err := killStressSerial(ctx, containerName, pod.Name, pod.Namespace, KillCmd, clients); err != nil { errList = append(errList, err.Error()) } } diff --git a/chaoslib/litmus/pod-memory-hog-exec/lib/pod-memory-hog-exec.go b/chaoslib/litmus/pod-memory-hog-exec/lib/pod-memory-hog-exec.go index cbd9c0f4f..a32482760 100644 --- a/chaoslib/litmus/pod-memory-hog-exec/lib/pod-memory-hog-exec.go +++ b/chaoslib/litmus/pod-memory-hog-exec/lib/pod-memory-hog-exec.go @@ -3,6 +3,8 @@ package lib import ( "context" "fmt" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" "os" "os/signal" "strconv" @@ -32,7 +34,9 @@ var inject chan os.Signal // PrepareMemoryExecStress contains the chaos preparation and injection steps func PrepareMemoryExecStress(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PreparePodMemoryHogExecFault") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PreparePodMemoryHogExecFault", + trace.WithAttributes(attribute.Int("experiment.ramptime", experimentsDetails.RampTime)), + ) defer span.End() // inject channel is used to transmit signal notifications. @@ -60,7 +64,8 @@ func PrepareMemoryExecStress(ctx context.Context, experimentsDetails *experiment // stressMemory Uses the REST API to exec into the target container of the target pod // The function will be constantly increasing the Memory utilisation until it reaches the maximum available or allowed number. // Using the TOTAL_CHAOS_DURATION we will need to specify for how long this experiment will last -func stressMemory(MemoryConsumption, containerName, podName, namespace string, clients clients.ClientSets, stressErr chan error) { +func stressMemory(ctx context.Context, MemoryConsumption, containerName, podName, namespace string, clients clients.ClientSets, stressErr chan error) { + span := trace.SpanFromContext(ctx) log.Infof("The memory consumption is: %v", MemoryConsumption) @@ -68,6 +73,7 @@ func stressMemory(MemoryConsumption, containerName, podName, namespace string, c execCommandDetails := litmusexec.PodDetails{} ddCmd := fmt.Sprintf("dd if=/dev/zero of=/dev/null bs=" + MemoryConsumption + "M") + span.SetAttributes(attribute.String("inject.command_line", ddCmd)) command := []string{"/bin/sh", "-c", ddCmd} litmusexec.SetExecCommandAttributes(&execCommandDetails, podName, containerName, namespace) @@ -115,7 +121,13 @@ func experimentMemory(ctx context.Context, experimentsDetails *experimentTypes.E // injectChaosInSerialMode stressed the memory of all target application serially (one by one) func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, targetPodList corev1.PodList, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectPodMemoryHogExecFaultInSerialMode") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectPodMemoryHogExecFaultInSerialMode", + trace.WithAttributes( + attribute.Int("chaos.duration", experimentsDetails.ChaosDuration), + attribute.Int("chaos.interval", experimentsDetails.ChaosInterval), + attribute.String("chaos.namespace", experimentsDetails.ChaosNamespace), + ), + ) defer span.End() // run the probes during chaos @@ -160,7 +172,7 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment "Target Pod": pod.Name, "Memory Consumption(MB)": experimentsDetails.MemoryConsumption, }) - go stressMemory(strconv.Itoa(experimentsDetails.MemoryConsumption), experimentsDetails.TargetContainer, pod.Name, pod.Namespace, clients, stressErr) + go stressMemory(ctx, strconv.Itoa(experimentsDetails.MemoryConsumption), experimentsDetails.TargetContainer, pod.Name, pod.Namespace, clients, stressErr) common.SetTargets(pod.Name, "injected", "pod", chaosDetails) @@ -183,7 +195,7 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment } case <-signChan: log.Info("[Chaos]: Revert Started") - if err := killStressMemorySerial(experimentsDetails.TargetContainer, pod.Name, pod.Namespace, experimentsDetails.ChaosKillCmd, clients, chaosDetails); err != nil { + if err := killStressMemorySerial(ctx, experimentsDetails.TargetContainer, pod.Name, pod.Namespace, experimentsDetails.ChaosKillCmd, clients, chaosDetails); err != nil { log.Errorf("Error in Kill stress after abortion, err: %v", err) } // updating the chaosresult after stopped @@ -201,7 +213,7 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment break loop } } - if err := killStressMemorySerial(experimentsDetails.TargetContainer, pod.Name, pod.Namespace, experimentsDetails.ChaosKillCmd, clients, chaosDetails); err != nil { + if err := killStressMemorySerial(ctx, experimentsDetails.TargetContainer, pod.Name, pod.Namespace, experimentsDetails.ChaosKillCmd, clients, chaosDetails); err != nil { return stacktrace.Propagate(err, "could not revert memory stress") } } @@ -211,7 +223,13 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // injectChaosInParallelMode stressed the memory of all target application in parallel mode (all at once) func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, targetPodList corev1.PodList, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectPodMemoryHogExecFaultInParallelMode") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectPodMemoryHogExecFaultInParallelMode", + trace.WithAttributes( + attribute.Int("chaos.duration", experimentsDetails.ChaosDuration), + attribute.Int("chaos.interval", experimentsDetails.ChaosInterval), + attribute.String("chaos.namespace", experimentsDetails.ChaosNamespace), + ), + ) defer span.End() // creating err channel to receive the error from the go routine @@ -257,7 +275,7 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime "Memory Consumption(MB)": experimentsDetails.MemoryConsumption, }) - go stressMemory(strconv.Itoa(experimentsDetails.MemoryConsumption), experimentsDetails.TargetContainer, pod.Name, pod.Namespace, clients, stressErr) + go stressMemory(ctx, strconv.Itoa(experimentsDetails.MemoryConsumption), experimentsDetails.TargetContainer, pod.Name, pod.Namespace, clients, stressErr) } } @@ -280,7 +298,7 @@ loop: } case <-signChan: log.Info("[Chaos]: Revert Started") - if err := killStressMemoryParallel(experimentsDetails.TargetContainer, targetPodList, experimentsDetails.ChaosKillCmd, clients, chaosDetails); err != nil { + if err := killStressMemoryParallel(ctx, experimentsDetails.TargetContainer, targetPodList, experimentsDetails.ChaosKillCmd, clients, chaosDetails); err != nil { log.Errorf("Error in Kill stress after abortion, err: %v", err) } // updating the chaosresult after stopped @@ -297,13 +315,16 @@ loop: break loop } } - return killStressMemoryParallel(experimentsDetails.TargetContainer, targetPodList, experimentsDetails.ChaosKillCmd, clients, chaosDetails) + return killStressMemoryParallel(ctx, experimentsDetails.TargetContainer, targetPodList, experimentsDetails.ChaosKillCmd, clients, chaosDetails) } // killStressMemorySerial function to kill a stress process running inside target container // // Triggered by either timeout of chaos duration or termination of the experiment -func killStressMemorySerial(containerName, podName, namespace, memFreeCmd string, clients clients.ClientSets, chaosDetails *types.ChaosDetails) error { +func killStressMemorySerial(ctx context.Context, containerName, podName, namespace, memFreeCmd string, clients clients.ClientSets, chaosDetails *types.ChaosDetails) error { + span := trace.SpanFromContext(ctx) + span.SetAttributes(attribute.String("recover.command_line", memFreeCmd)) + // It will contains all the pod & container details required for exec command execCommandDetails := litmusexec.PodDetails{} @@ -320,10 +341,10 @@ func killStressMemorySerial(containerName, podName, namespace, memFreeCmd string // killStressMemoryParallel function to kill all the stress process running inside target container // Triggered by either timeout of chaos duration or termination of the experiment -func killStressMemoryParallel(containerName string, targetPodList corev1.PodList, memFreeCmd string, clients clients.ClientSets, chaosDetails *types.ChaosDetails) error { +func killStressMemoryParallel(ctx context.Context, containerName string, targetPodList corev1.PodList, memFreeCmd string, clients clients.ClientSets, chaosDetails *types.ChaosDetails) error { var errList []string for _, pod := range targetPodList.Items { - if err := killStressMemorySerial(containerName, pod.Name, pod.Namespace, memFreeCmd, clients, chaosDetails); err != nil { + if err := killStressMemorySerial(ctx, containerName, pod.Name, pod.Namespace, memFreeCmd, clients, chaosDetails); err != nil { errList = append(errList, err.Error()) } } diff --git a/chaoslib/litmus/pod-network-partition/lib/pod-network-partition.go b/chaoslib/litmus/pod-network-partition/lib/pod-network-partition.go index ac1b37717..139370d08 100644 --- a/chaoslib/litmus/pod-network-partition/lib/pod-network-partition.go +++ b/chaoslib/litmus/pod-network-partition/lib/pod-network-partition.go @@ -3,6 +3,8 @@ package lib import ( "context" "fmt" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" "os" "os/signal" "strings" @@ -35,7 +37,9 @@ var ( // PrepareAndInjectChaos contains the prepration & injection steps func PrepareAndInjectChaos(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PreparePodNetworkPartitionFault") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PreparePodNetworkPartitionFault", + trace.WithAttributes(attribute.Int("experiment.ramptime", experimentsDetails.RampTime)), + ) defer span.End() // inject channel is used to transmit signal notifications. @@ -145,7 +149,14 @@ func PrepareAndInjectChaos(ctx context.Context, experimentsDetails *experimentTy // createNetworkPolicy creates the network policy in the application namespace // it blocks ingress/egress traffic for the targeted application for specific/all IPs func createNetworkPolicy(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, networkPolicy *NetworkPolicy, runID string) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectPodNetworkPartitionFault") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectPodNetworkPartitionFault", + trace.WithAttributes( + attribute.Int("chaos.duration", experimentsDetails.ChaosDuration), + attribute.String("chaos.namespace", experimentsDetails.ChaosNamespace), + attribute.String("network.policy.name", experimentsDetails.ExperimentName+"-np-"+runID), + attribute.String("network.policy.type", experimentsDetails.PolicyTypes), + ), + ) defer span.End() np := &networkv1.NetworkPolicy{ diff --git a/chaoslib/litmus/redfish-node-restart/lib/redfish-node-restart.go b/chaoslib/litmus/redfish-node-restart/lib/redfish-node-restart.go index 61fa9c44d..b3a78eb19 100644 --- a/chaoslib/litmus/redfish-node-restart/lib/redfish-node-restart.go +++ b/chaoslib/litmus/redfish-node-restart/lib/redfish-node-restart.go @@ -3,6 +3,8 @@ package lib import ( "context" "fmt" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" "time" redfishLib "github.com/litmuschaos/litmus-go/pkg/baremetal/redfish" @@ -20,7 +22,12 @@ import ( // injectChaos initiates node restart chaos on the target node func injectChaos(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectRedfishNodeRestartFault") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectRedfishNodeRestartFault", + trace.WithAttributes( + attribute.Int("chaos.duration", experimentsDetails.ChaosDuration), + attribute.String("chaos.namespace", experimentsDetails.ChaosNamespace), + ), + ) defer span.End() URL := fmt.Sprintf("https://%v/redfish/v1/Systems/System.Embedded.1/Actions/ComputerSystem.Reset", experimentsDetails.IPMIIP) @@ -54,7 +61,9 @@ func experimentExecution(ctx context.Context, experimentsDetails *experimentType // PrepareChaos contains the chaos prepration and injection steps func PrepareChaos(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PrepareRedfishNodeRestartFault") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PrepareRedfishNodeRestartFault", + trace.WithAttributes(attribute.Int("experiment.ramptime", experimentsDetails.RampTime)), + ) defer span.End() //Waiting for the ramp time before chaos injection diff --git a/chaoslib/litmus/spring-boot-chaos/lib/spring-boot-chaos.go b/chaoslib/litmus/spring-boot-chaos/lib/spring-boot-chaos.go index 6980608a6..1fb03bd07 100644 --- a/chaoslib/litmus/spring-boot-chaos/lib/spring-boot-chaos.go +++ b/chaoslib/litmus/spring-boot-chaos/lib/spring-boot-chaos.go @@ -5,6 +5,8 @@ import ( "context" "encoding/json" "fmt" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" "net/http" "os" "os/signal" @@ -55,7 +57,9 @@ func SetTargetPodList(experimentsDetails *experimentTypes.ExperimentDetails, cli // PrepareChaos contains the preparation steps before chaos injection func PrepareChaos(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PrepareSpringBootFault") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PrepareSpringBootFault", + trace.WithAttributes(attribute.Int("experiment.ramptime", experimentsDetails.RampTime)), + ) defer span.End() // Waiting for the ramp time before chaos injection @@ -218,7 +222,15 @@ func disableChaosMonkey(ctx context.Context, chaosMonkeyPort string, chaosMonkey // injectChaosInSerialMode injects chaos monkey assault on pods in serial mode(one by one) func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, chaosDetails *types.ChaosDetails, eventsDetails *types.EventDetails, resultDetails *types.ResultDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectSpringBootFaultInSerialMode") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectSpringBootFaultInSerialMode", + trace.WithAttributes( + attribute.Int("chaos.duration", experimentsDetails.ChaosDuration), + attribute.Int("chaos.interval", experimentsDetails.ChaosInterval), + attribute.String("chaos.namespace", experimentsDetails.ChaosNamespace), + attribute.String("cm.port", experimentsDetails.ChaosMonkeyPort), + attribute.String("cm.path", experimentsDetails.ChaosMonkeyPath), + ), + ) defer span.End() // run the probes during chaos @@ -308,7 +320,15 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // injectChaosInParallelMode injects chaos monkey assault on pods in parallel mode (all at once) func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, chaosDetails *types.ChaosDetails, eventsDetails *types.EventDetails, resultDetails *types.ResultDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectSpringBootFaultInParallelMode") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectSpringBootFaultInParallelMode", + trace.WithAttributes( + attribute.Int("chaos.duration", experimentsDetails.ChaosDuration), + attribute.Int("chaos.interval", experimentsDetails.ChaosInterval), + attribute.String("chaos.namespace", experimentsDetails.ChaosNamespace), + attribute.String("cm.port", experimentsDetails.ChaosMonkeyPort), + attribute.String("cm.path", experimentsDetails.ChaosMonkeyPath), + ), + ) defer span.End() // run the probes during chaos diff --git a/chaoslib/litmus/stress-chaos/helper/stress-helper.go b/chaoslib/litmus/stress-chaos/helper/stress-helper.go index dd310f07a..4c9015c1b 100644 --- a/chaoslib/litmus/stress-chaos/helper/stress-helper.go +++ b/chaoslib/litmus/stress-chaos/helper/stress-helper.go @@ -9,6 +9,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/telemetry" "github.com/palantir/stacktrace" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/attribute" "io" "os" "os/exec" @@ -87,6 +88,8 @@ func Helper(ctx context.Context, clients clients.ClientSets) { // Set the chaos result uid result.SetResultUID(&resultDetails, clients, &chaosDetails) + span.SetAttributes(attribute.String("container.runtime", experimentsDetails.ContainerRuntime)) + if err := prepareStressChaos(&experimentsDetails, clients, &eventsDetails, &chaosDetails, &resultDetails); err != nil { // update failstep inside chaosresult if resultErr := result.UpdateFailedStepFromHelper(&resultDetails, &chaosDetails, clients, err); resultErr != nil { @@ -562,7 +565,6 @@ func injectChaos(t targetDetails, stressors, stressType string) (*exec.Cmd, erro if stressType == "pod-io-stress" { stressCommand = fmt.Sprintf("pause nsutil -t %v -p -m -- %v", strconv.Itoa(t.Pid), stressors) } - log.Infof("[Info]: starting process: %v", stressCommand) // launch the stress-ng process on the target container in paused mode diff --git a/chaoslib/litmus/stress-chaos/lib/stress-chaos.go b/chaoslib/litmus/stress-chaos/lib/stress-chaos.go index bfa6c878c..2e7e37310 100644 --- a/chaoslib/litmus/stress-chaos/lib/stress-chaos.go +++ b/chaoslib/litmus/stress-chaos/lib/stress-chaos.go @@ -3,6 +3,8 @@ package lib import ( "context" "fmt" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" "os" "strconv" "strings" @@ -27,7 +29,9 @@ import ( // PrepareAndInjectStressChaos contains the prepration & injection steps for the stress experiments. func PrepareAndInjectStressChaos(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PreparePodStressFault") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PreparePodStressFault", + trace.WithAttributes(attribute.Int("experiment.ramptime", experimentsDetails.RampTime)), + ) defer span.End() var err error //Set up the tunables if provided in range @@ -109,7 +113,12 @@ func PrepareAndInjectStressChaos(ctx context.Context, experimentsDetails *experi // injectChaosInSerialMode inject the stress chaos in all target application serially (one by one) func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, targetPodList apiv1.PodList, clients clients.ClientSets, chaosDetails *types.ChaosDetails, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectPodStressFaultInSerialMode") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectPodStressFaultInSerialMode", + trace.WithAttributes( + attribute.Int("chaos.duration", experimentsDetails.ChaosDuration), + attribute.String("chaos.namespace", experimentsDetails.ChaosNamespace), + ), + ) defer span.End() // run the probes during chaos @@ -167,7 +176,12 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // injectChaosInParallelMode inject the stress chaos in all target application in parallel mode (all at once) func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, targetPodList apiv1.PodList, clients clients.ClientSets, chaosDetails *types.ChaosDetails, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectPodStressFaultInParallelMode") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectPodStressFaultInParallelMode", + trace.WithAttributes( + attribute.Int("chaos.duration", experimentsDetails.ChaosDuration), + attribute.String("chaos.namespace", experimentsDetails.ChaosNamespace), + ), + ) defer span.End() var err error @@ -305,10 +319,15 @@ func createHelperPod(ctx context.Context, experimentsDetails *experimentTypes.Ex helperPod.Spec.Volumes = append(helperPod.Spec.Volumes, common.GetSidecarVolumes(chaosDetails)...) } - _, err := clients.KubeClient.CoreV1().Pods(experimentsDetails.ChaosNamespace).Create(context.Background(), helperPod, v1.CreateOptions{}) + createdHelperPod, err := clients.KubeClient.CoreV1().Pods(experimentsDetails.ChaosNamespace).Create(context.Background(), helperPod, v1.CreateOptions{}) if err != nil { return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("unable to create helper pod: %s", err.Error())} } + span.SetAttributes( + attribute.String("helper.pod.name", createdHelperPod.Name), + attribute.String("helper.image.name", createdHelperPod.Spec.Containers[0].Image), + ) + return nil } diff --git a/chaoslib/litmus/vm-poweroff/lib/vm-poweroff.go b/chaoslib/litmus/vm-poweroff/lib/vm-poweroff.go index e828be326..c2d63ccb0 100644 --- a/chaoslib/litmus/vm-poweroff/lib/vm-poweroff.go +++ b/chaoslib/litmus/vm-poweroff/lib/vm-poweroff.go @@ -3,6 +3,8 @@ package lib import ( "context" "fmt" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" "os" "os/signal" "strings" @@ -27,7 +29,9 @@ var inject, abort chan os.Signal // InjectVMPowerOffChaos injects the chaos in serial or parallel mode func InjectVMPowerOffChaos(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails, cookie string) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PrepareVMPowerOffFault") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PrepareVMPowerOffFault", + trace.WithAttributes(attribute.Int("experiment.ramptime", experimentsDetails.RampTime)), + ) defer span.End() // inject channel is used to transmit signal notifications. inject = make(chan os.Signal, 1) @@ -75,7 +79,14 @@ func InjectVMPowerOffChaos(ctx context.Context, experimentsDetails *experimentTy // injectChaosInSerialMode stops VMs in serial mode i.e. one after the other func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, vmIdList []string, cookie string, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "injectVMPowerOffFaultInSerialMode") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "injectVMPowerOffFaultInSerialMode", + trace.WithAttributes( + attribute.Int("chaos.duration", experimentsDetails.ChaosDuration), + attribute.Int("chaos.interval", experimentsDetails.ChaosInterval), + attribute.String("chaos.namespace", experimentsDetails.ChaosNamespace), + attribute.String("vmware.vm.id", experimentsDetails.VMIds), + ), + ) defer span.End() select { @@ -149,7 +160,14 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // injectChaosInParallelMode stops VMs in parallel mode i.e. all at once func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, vmIdList []string, cookie string, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "injectVMPowerOffFaultInParallelMode") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "injectVMPowerOffFaultInParallelMode", + trace.WithAttributes( + attribute.Int("chaos.duration", experimentsDetails.ChaosDuration), + attribute.Int("chaos.interval", experimentsDetails.ChaosInterval), + attribute.String("chaos.namespace", experimentsDetails.ChaosNamespace), + attribute.String("vmware.vm.id", experimentsDetails.VMIds), + ), + ) defer span.End() select { diff --git a/pkg/probe/probe.go b/pkg/probe/probe.go index 3800b6c6e..c2611884e 100644 --- a/pkg/probe/probe.go +++ b/pkg/probe/probe.go @@ -28,7 +28,9 @@ var err error // RunProbes contains the steps to trigger the probes // It contains steps to trigger all three probes: k8sprobe, httpprobe, cmdprobe func RunProbes(ctx context.Context, chaosDetails *types.ChaosDetails, clients clients.ClientSets, resultDetails *types.ResultDetails, phase string, eventsDetails *types.EventDetails) error { - ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "RunProbes") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "RunProbes", + trace.WithAttributes(attribute.String("probe.phase", phase)), + ) defer span.End() // get the probes details from the chaosengine @@ -37,8 +39,6 @@ func RunProbes(ctx context.Context, chaosDetails *types.ChaosDetails, clients cl return err } - span.SetAttributes(attribute.String("probe.phase", phase)) - switch strings.ToLower(phase) { //execute probes for the prechaos phase case "prechaos":