Skip to content

Commit f81a30b

Browse files
vimalmeena238MeharwadeDivya
authored andcommitted
Bug Fix - resource discovery deadlocks and improve error handling
1 parent 456145b commit f81a30b

File tree

3 files changed

+131
-16
lines changed

3 files changed

+131
-16
lines changed

internal/resourcediscovery/export_compartment.go

Lines changed: 38 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import (
1919
"sync"
2020
"time"
2121

22+
"github.com/hashicorp/go-multierror"
2223
"github.com/hashicorp/go-version"
2324

2425
"github.com/oracle/terraform-provider-oci/internal/globalvar"
@@ -489,6 +490,7 @@ func runExportCommand(ctx *resourceDiscoveryContext) error {
489490
utils.Debugf("[DEBUG] discover: Running step %d", i)
490491
defer elapsed(fmt.Sprintf("time taken in discovering resources for step %d", i), step.getBaseStep(), Discovery)()
491492
defer func() {
493+
<-sem
492494
if r := recover(); r != nil {
493495
utils.Logf("[ERROR] panic in discover goroutine")
494496
utils.Logf("[ERROR] panic in discover goroutine")
@@ -520,7 +522,6 @@ func runExportCommand(ctx *resourceDiscoveryContext) error {
520522

521523
utils.Debugf("[DEBUG] discover: Completed step %d", i)
522524
utils.Debugf("[DEBUG] discovered %d resources for step %d", len(step.getDiscoveredResources()), i)
523-
<-sem
524525
}(i, step)
525526

526527
}
@@ -560,7 +561,13 @@ func runExportCommand(ctx *resourceDiscoveryContext) error {
560561
// Reset discovered resources if already set by writeTmpConfigurationForImport
561562
ctx.discoveredResources = make([]*OCIResource, 0)
562563

563-
errorChannel := make(chan error)
564+
/*
565+
sem allows number of steps equals to arg.Parallelism to execute in parallel.
566+
arg.Parallelism is very less compare to total number of steps
567+
So, errorChannel should be atleast equals to number of steps to allow all steps getting chance to execute.
568+
in case of multiple steps fails, steps will be blocked to write errors and pending steps will wait to acquire sem which leads to deadlock situation
569+
*/
570+
errorChannel := make(chan error, len(steps))
564571
var configWg sync.WaitGroup
565572
configWg.Add(len(steps))
566573

@@ -574,6 +581,7 @@ func runExportCommand(ctx *resourceDiscoveryContext) error {
574581
go func(i int, step resourceDiscoveryStep) {
575582
utils.Debugf("[DEBUG] writeConfiguration: Running step %d", i)
576583
defer func() {
584+
<-sem
577585
if r := recover(); r != nil {
578586
utils.Logf("[ERROR] panic in writeConfiguration goroutine")
579587
debug.PrintStack()
@@ -586,7 +594,6 @@ func runExportCommand(ctx *resourceDiscoveryContext) error {
586594
}
587595

588596
utils.Debugf("[DEBUG] writeConfiguration: Completed step %d", i)
589-
<-sem
590597
}(i, step)
591598
}
592599

@@ -603,10 +610,13 @@ func runExportCommand(ctx *resourceDiscoveryContext) error {
603610
utils.Debugf("[DEBUG] ~~~~~~ writeConfiguration steps completed ~~~~~~")
604611
utils.Debugf("[DEBUG] writing config took %v\n", time.Since(configStart))
605612
break
606-
case err := <-errorChannel:
613+
case errs := <-errorChannel:
607614
close(errorChannel)
608-
utils.Logf("[ERROR] error writing final configuration for resources found: %s", err.Error())
609-
return err
615+
for i := 0; i < len(errorChannel); i++ {
616+
errs = multierror.Append(errs, <-errorChannel)
617+
}
618+
utils.Logf("[ERROR] error writing final configuration for resources found: %s", errs.Error())
619+
return errs
610620
}
611621

612622
region, err := exportConfigProvider.Region()
@@ -651,7 +661,13 @@ func generateStateParallel(ctx *resourceDiscoveryContext, steps []resourceDiscov
651661
defer cleanupTempStateFiles(ctx)
652662
defer elapsed("generating state in parallel", nil, 0)()
653663

654-
errorChannel := make(chan error)
664+
/*
665+
sem allows number of steps equals to arg.Parallelism to execute in parallel.
666+
arg.Parallelism is very less compare to total number of steps
667+
So, errorChannel should be atleast equals to number of steps to allow all steps getting chance to execute.
668+
in case of multiple steps fails, steps will be blocked to write errors and pending steps will wait to acquire sem which leads to deadlock situation
669+
*/
670+
errorChannel := make(chan error, len(steps))
655671
var stateWg sync.WaitGroup
656672
wgDone := make(chan bool)
657673

@@ -669,6 +685,7 @@ func generateStateParallel(ctx *resourceDiscoveryContext, steps []resourceDiscov
669685
utils.Debugf("[DEBUG] writing temp config and state: Running step %d", i)
670686
defer elapsed(fmt.Sprintf("time taken by step %s to generate state", fmt.Sprint(i)), step.getBaseStep(), GeneratingState)()
671687
defer func() {
688+
<-sem
672689
if r := recover(); r != nil {
673690
utils.Logf("[ERROR] panic in writing temp config and state goroutine")
674691
debug.PrintStack()
@@ -680,17 +697,20 @@ func generateStateParallel(ctx *resourceDiscoveryContext, steps []resourceDiscov
680697
/* Generate temporary HCL configs from all discovered resources to run import
681698
Final configuration will be generated after import so that we can exclude the resources for which import failed
682699
and also remove the references to failed resources from the referenceMap */
683-
if err := step.writeTmpConfigurationForImport(); err != nil {
684-
errorChannel <- fmt.Errorf("[ERROR] error writing temp config for resources found: %s", err.Error())
700+
var err error = nil
701+
if err = step.writeTmpConfigurationForImport(); err != nil {
702+
errorChannel <- fmt.Errorf("[ERROR] error while writing temp config for resources found in step %d: %s", i, err.Error())
685703
}
686704

687705
// Write temp state file for each service, this step will import resources into a separate state file for each service in parallel
688-
if err := step.writeTmpState(); err != nil {
689-
errorChannel <- fmt.Errorf("[ERROR] error writing temp state for resources found: %s", err.Error())
706+
// If writing temp configuration thrown error, won't attempt to write temp state
707+
if err == nil {
708+
if err = step.writeTmpState(); err != nil {
709+
errorChannel <- fmt.Errorf("[ERROR] error while writing temp state for resources found in step %d: %s", i, err.Error())
710+
}
690711
}
691712

692713
utils.Debugf("writing temp config and state: Completed step %d", i)
693-
<-sem
694714
}(i, step)
695715
}
696716

@@ -706,9 +726,13 @@ func generateStateParallel(ctx *resourceDiscoveryContext, steps []resourceDiscov
706726
case <-wgDone:
707727
utils.Debugf("[DEBUG] ~~~~~~ writing temp config and state steps completed ~~~~~~")
708728
break
709-
case err := <-errorChannel:
729+
case errs := <-errorChannel:
710730
close(errorChannel)
711-
return err
731+
for i := 0; i < len(errorChannel); i++ {
732+
errs = multierror.Append(errs, <-errorChannel)
733+
}
734+
utils.Logf("[ERROR] error writing temp config and state: %s", errs.Error())
735+
return errs
712736
}
713737

714738
// Generate final state by merging state json generated for all services

internal/resourcediscovery/export_compartment_test.go

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2222,6 +2222,94 @@ func TestUnitGenerateStateParallel(t *testing.T) {
22222222
assert.NoError(t, err, "")
22232223
}
22242224

2225+
func TestUnitGenerateStateParallelWhenTfInitFails(t *testing.T) {
2226+
2227+
ctx := getTestCtx()
2228+
nSteps := 80 // number of steps
2229+
steps := make([]resourceDiscoveryStep, nSteps)
2230+
2231+
compartmentId := resourceDiscoveryTestCompartmentOcid
2232+
if err := os.Setenv("export_tenancy_id", resourceDiscoveryTestTenancyOcid); err != nil {
2233+
t.Logf("unable to set export_tenancy_id. err: %v", err)
2234+
t.Fail()
2235+
}
2236+
outputDir, err := os.Getwd()
2237+
outputDir = fmt.Sprintf("%s%sdiscoveryTest-%d", outputDir, string(os.PathSeparator), time.Now().Nanosecond())
2238+
if err = os.Mkdir(outputDir, os.ModePerm); err != nil {
2239+
t.Logf("unable to mkdir %s. err: %v", outputDir, err)
2240+
t.Fail()
2241+
}
2242+
2243+
for i := 0; i < nSteps; i++ {
2244+
discoveredResources := []*OCIResource{}
2245+
discoveredResources = append(discoveredResources, &OCIResource{
2246+
compartmentId: compartmentId,
2247+
TerraformResource: TerraformResource{
2248+
id: "ocid1.a.b.c",
2249+
terraformClass: "oci_resource_type1",
2250+
terraformName: "type1_res1",
2251+
},
2252+
parent: &OCIResource{
2253+
TerraformResource: TerraformResource{terraformName: "tf"},
2254+
},
2255+
})
2256+
2257+
steps[i] = &resourceDiscoveryWithTargetIds{
2258+
resourceDiscoveryBaseStep: resourceDiscoveryBaseStep{
2259+
ctx: ctx,
2260+
name: "resources" + fmt.Sprint(i),
2261+
discoveredResources: discoveredResources,
2262+
omittedResources: []*OCIResource{},
2263+
},
2264+
}
2265+
}
2266+
2267+
type args struct {
2268+
steps []resourceDiscoveryStep
2269+
ctx *resourceDiscoveryContext
2270+
}
2271+
t_args := args{
2272+
steps: steps,
2273+
ctx: ctx,
2274+
}
2275+
tests := []struct {
2276+
name string
2277+
args args
2278+
mock func()
2279+
wantError bool
2280+
}{
2281+
{
2282+
name: "If Import failed ,should Return error",
2283+
args: t_args,
2284+
mock: func() {
2285+
t_args.ctx.terraformProviderBinaryPath = "tf"
2286+
t_args.ctx.OutputDir = &outputDir
2287+
ctxTerraformImportVar = func(ctx *resourceDiscoveryContext, ctxBackground context.Context, address, id string, importArgs ...tfexec.ImportOption) error {
2288+
return nil
2289+
}
2290+
terraformInitMockVar = func(r *resourceDiscoveryBaseStep, backgroundCtx context.Context, initArgs []tfexec.InitOption) error {
2291+
return errors.New("Init failed")
2292+
}
2293+
sem = make(chan struct{}, 4) // Parallelism=4
2294+
resourcesMap = mockResourcesMap()
2295+
},
2296+
wantError: true,
2297+
},
2298+
}
2299+
2300+
for _, tt := range tests {
2301+
t.Run(tt.name, func(t *testing.T) {
2302+
tt.mock()
2303+
err := generateStateParallel(ctx, steps)
2304+
if tt.wantError {
2305+
assert.Error(t, err)
2306+
} else {
2307+
assert.NoError(t, err, "")
2308+
}
2309+
})
2310+
}
2311+
}
2312+
22252313
func TestUnitGenerateState(test *testing.T) {
22262314
defer func() {
22272315
outputDir := getOutputDir()

internal/resourcediscovery/export_resource_helpers.go

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -335,6 +335,10 @@ func (r *resourceDiscoveryBaseStep) writeTmpState() error {
335335
if !isInitDone {
336336
utils.Debugf("[DEBUG] acquiring lock to run terraform init")
337337
initLock.Lock()
338+
defer func() {
339+
utils.Debugf("[DEBUG] releasing lock")
340+
initLock.Unlock()
341+
}()
338342
// Check for existence of .terraform folder to make sure init is not run already by another thread
339343
if _, err := os.Stat(fmt.Sprintf("%s%s.terraform", *r.ctx.OutputDir, string(os.PathSeparator))); os.IsNotExist(err) {
340344
// Run init command if not already run
@@ -349,12 +353,11 @@ func (r *resourceDiscoveryBaseStep) writeTmpState() error {
349353
}
350354

351355
if err := terraformInitMockVar(r, backgroundCtx, initArgs); err != nil {
356+
utils.Debugf("[ERROR] error occured while terraform init: %s", err.Error())
352357
return err
353358
}
354359
isInitDone = true
355360
}
356-
initLock.Unlock()
357-
utils.Debugf("[DEBUG] releasing lock")
358361
}
359362
tmpStateOutputDir := filepath.Join(*r.ctx.OutputDir, "tmp", r.name)
360363
tmpStateOutputFilePrefix := filepath.Join(tmpStateOutputDir, globalvar.DefaultTmpStateFile)

0 commit comments

Comments
 (0)