@@ -3,9 +3,13 @@ package workspace
33import (
44 "context"
55 "encoding/base64"
6+ "log"
7+ "os"
68 "path/filepath"
9+ "strconv"
710 "strings"
811 "sync"
12+ "time"
913
1014 "github.com/databricks/terraform-provider-databricks/common"
1115
@@ -139,6 +143,99 @@ func (a NotebooksAPI) Mkdirs(path string) error {
139143 }, nil )
140144}
141145
146+ type syncAnswer struct {
147+ MU sync.Mutex
148+ data []ObjectStatus
149+ }
150+
151+ func (a * syncAnswer ) append (objs []ObjectStatus ) {
152+ a .MU .Lock ()
153+ a .data = append (a .data , objs ... )
154+ a .MU .Unlock ()
155+ }
156+
157+ type directoryInfo struct {
158+ Path string
159+ Attempts int
160+ }
161+
162+ // constants related to the parallel listing
163+ const (
164+ directoryListingMaxAttempts = 3
165+ envVarListParallelism = "EXPORTER_WS_LIST_PARALLELISM"
166+ envVarDirectoryChannelSize = "EXPORTER_CHANNEL_SIZE"
167+ defaultWorkersPoolSize = 5
168+ defaultDirectoryChannelSize = 100000
169+ )
170+
171+ func getFormattedNowTime () string {
172+ return time .Now ().Local ().Format (time .RFC3339Nano )
173+ }
174+
175+ func (a NotebooksAPI ) recursiveAddPathsParallel (directory directoryInfo , dirChannel chan directoryInfo ,
176+ answer * syncAnswer , wg * sync.WaitGroup ) {
177+ defer wg .Done ()
178+ notebookInfoList , err := a .list (directory .Path )
179+ if err != nil {
180+ log .Printf ("[WARN] error listing '%s': %v" , directory .Path , err )
181+ if directory .Attempts < directoryListingMaxAttempts {
182+ wg .Add (1 )
183+ dirChannel <- directoryInfo {Path : directory .Path , Attempts : directory .Attempts + 1 }
184+ }
185+ }
186+ answer .append (notebookInfoList )
187+ for _ , v := range notebookInfoList {
188+ if v .ObjectType == Directory {
189+ wg .Add (1 )
190+ log .Printf ("[DEBUG] %s: putting directory '%s' into channel. Channel size: %d" ,
191+ getFormattedNowTime (), v .Path , len (dirChannel ))
192+ dirChannel <- directoryInfo {Path : v .Path }
193+ time .Sleep (15 * time .Millisecond )
194+ }
195+ }
196+ }
197+
198+ func getEnvAsInt (envName string , defaultValue int ) int {
199+ if val , exists := os .LookupEnv (envName ); exists {
200+ parsedVal , err := strconv .Atoi (val )
201+ if err == nil {
202+ return parsedVal
203+ }
204+ }
205+ return defaultValue
206+ }
207+
208+ func (a NotebooksAPI ) ListParallel (path string , recursive bool ) ([]ObjectStatus , error ) {
209+ var answer syncAnswer
210+ wg := & sync.WaitGroup {}
211+
212+ numWorkers := getEnvAsInt (envVarListParallelism , defaultWorkersPoolSize )
213+ channelSize := getEnvAsInt (envVarDirectoryChannelSize , defaultDirectoryChannelSize )
214+ dirChannel := make (chan directoryInfo , channelSize )
215+ for i := 0 ; i < numWorkers ; i ++ {
216+ t := i
217+ go func () {
218+ log .Printf ("[DEBUG] %s: starting go routine %d" , getFormattedNowTime (), t )
219+ for directory := range dirChannel {
220+ log .Printf ("[DEBUG] %s: processing directory %s" , getFormattedNowTime (), directory .Path )
221+ a .recursiveAddPathsParallel (directory , dirChannel , & answer , wg )
222+ }
223+ }()
224+
225+ }
226+ log .Printf ("[DEBUG] %s: pushing initial path to channel" , getFormattedNowTime ())
227+ wg .Add (1 )
228+ a .recursiveAddPathsParallel (directoryInfo {Path : path }, dirChannel , & answer , wg )
229+ log .Printf ("[DEBUG] %s: starting to wait" , getFormattedNowTime ())
230+ wg .Wait ()
231+ log .Printf ("[DEBUG] %s: closing the directory channel" , getFormattedNowTime ())
232+ close (dirChannel )
233+
234+ answer .MU .Lock ()
235+ defer answer .MU .Unlock ()
236+ return answer .data , nil
237+ }
238+
142239// List will list all objects in a path on the workspace
143240// and with the recursive flag it will recursively list
144241// all the objects
0 commit comments