Skip to content

Commit 61b0536

Browse files
authored
Merge pull request #93 from RachelTucker/GOSDK-28-excessive-polling
GOSDK-28: When puts are in progress, the sdk logs are rolling every minute, 48k entries in that time
2 parents 3c014ef + ce99938 commit 61b0536

File tree

7 files changed

+267
-73
lines changed

7 files changed

+267
-73
lines changed

helpers/blobStrategy.go

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,10 @@ import "time"
44

55
// Strategy for how to blob objects, used both in writing and reading blob strategies
66
type BlobStrategy interface {
7-
// performs delay when no job chunks are available
8-
delay()
7+
// Determines the maximum amount to delay between calls to getAvailableJobChunk.
8+
// If blobs are finishing processing, then we will query for more ready blobs earlier.
9+
// The recommended duration is five minutes.
10+
delay() time.Duration
911

1012
// determines the maximum number of go routines to be created when transferring objects to/from BP
1113
maxConcurrentTransfers() uint
@@ -21,8 +23,8 @@ type SimpleBlobStrategy struct {
2123
MaxWaitingTransfers uint
2224
}
2325

24-
func (strategy *SimpleBlobStrategy) delay() {
25-
time.Sleep(strategy.Delay)
26+
func (strategy *SimpleBlobStrategy) delay() time.Duration {
27+
return strategy.Delay
2628
}
2729

2830
func (strategy *SimpleBlobStrategy) maxConcurrentTransfers() uint {

helpers/consumer.go

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,33 +12,50 @@ type consumerImpl struct {
1212
queue *chan TransferOperation
1313
waitGroup *sync.WaitGroup
1414
maxConcurrentOperations uint
15+
blobDoneChannel chan<- struct{}
1516
}
1617

17-
func newConsumer(queue *chan TransferOperation, waitGroup *sync.WaitGroup, maxConcurrentOperations uint) Consumer {
18+
func newConsumer(queue *chan TransferOperation, blobDoneChannel chan<- struct{}, waitGroup *sync.WaitGroup, maxConcurrentOperations uint) Consumer {
1819
return &consumerImpl{
1920
queue: queue,
2021
waitGroup: waitGroup,
2122
maxConcurrentOperations: maxConcurrentOperations,
23+
blobDoneChannel: blobDoneChannel,
2224
}
2325
}
2426

25-
func performTransfer(operation *TransferOperation, semaphore *chan int, waitGroup *sync.WaitGroup) {
26-
defer waitGroup.Done()
27-
(*operation)()
27+
func performTransfer(operation TransferOperation, semaphore *chan int, blobDoneChannel chan<- struct{}, jobWaitGroup *sync.WaitGroup, childWaitGroup *sync.WaitGroup) {
28+
defer func() {
29+
// per operation that finishes, send a done message to the producer
30+
blobDoneChannel <-struct {}{}
31+
jobWaitGroup.Done()
32+
childWaitGroup.Done()
33+
}()
34+
operation()
2835
<- *semaphore
2936
}
3037

3138
func (consumer *consumerImpl) run() {
39+
// Defer closing the blob done channel. This will signal to the producer that it can shut down.
40+
defer func() {close(consumer.blobDoneChannel)}()
41+
3242
// semaphore for controlling max number of transfer operations in flight per job
3343
semaphore := make(chan int, consumer.maxConcurrentOperations + 1)
44+
45+
var childWaitGroup sync.WaitGroup
3446
for {
3547
nextOp, ok := <- *consumer.queue
3648
if ok {
3749
semaphore <- 1
38-
go performTransfer(&nextOp, &semaphore, consumer.waitGroup)
50+
childWaitGroup.Add(1)
51+
go performTransfer(nextOp, &semaphore, consumer.blobDoneChannel, consumer.waitGroup, &childWaitGroup)
3952
} else {
4053
consumer.waitGroup.Done()
41-
return
54+
break
4255
}
4356
}
57+
58+
// Wait for all child transfer operations to finish before shutting down.
59+
// This is to stop the done channel from being close prematurely
60+
childWaitGroup.Wait()
4461
}

helpers/consumer_test.go

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
package helpers
22

33
import (
4-
"testing"
5-
"sync"
64
"github.com/SpectraLogic/ds3_go_sdk/ds3_utils/ds3Testing"
5+
"sync"
6+
"testing"
77
)
88

99
func testTransferBuilder(t *testing.T, i int, resultCount *int, resultMux *sync.Mutex) TransferOperation {
@@ -17,6 +17,7 @@ func testTransferBuilder(t *testing.T, i int, resultCount *int, resultMux *sync.
1717
}
1818

1919
func TestProducerConsumerModel(t *testing.T) {
20+
const numOperations = 10
2021
var wg sync.WaitGroup
2122
wg.Add(2)
2223

@@ -25,7 +26,7 @@ func TestProducerConsumerModel(t *testing.T) {
2526

2627
var producer = func(queue *chan TransferOperation) {
2728
defer wg.Done()
28-
for i := 0; i < 10; i++ {
29+
for i := 0; i < numOperations; i++ {
2930
wg.Add(1)
3031

3132
var transferOf = testTransferBuilder(t, i, &resultCount, &resultMux)
@@ -39,12 +40,24 @@ func TestProducerConsumerModel(t *testing.T) {
3940

4041
queue := make(chan TransferOperation, 5)
4142

42-
consumer := newConsumer(&queue, &wg, 5)
43+
// make the blob done channel larger than the number of transfer operations queued.
44+
blobDoneChannel := make(chan struct{}, numOperations+1)
45+
46+
consumer := newConsumer(&queue, blobDoneChannel, &wg, 5)
4347

4448
go producer(&queue)
4549
go consumer.run()
4650

4751
wg.Wait()
4852

49-
ds3Testing.AssertInt(t, "Executed Transfer Operations", 10, resultCount)
53+
ds3Testing.AssertInt(t, "Executed Transfer Operations", numOperations, resultCount)
54+
55+
// verify that 10 done messages were sent
56+
ds3Testing.AssertInt(t, "Done signals sent", numOperations, len(blobDoneChannel))
57+
for len(blobDoneChannel) > 0 {
58+
_, ok := <-blobDoneChannel
59+
ds3Testing.AssertBool(t, "expected channel not to be closed", true, ok)
60+
}
61+
_, ok := <- blobDoneChannel
62+
ds3Testing.AssertBool(t, "expected channel to be closed", false, ok)
5063
}

helpers/getProducer.go

Lines changed: 109 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
"github.com/SpectraLogic/ds3_go_sdk/sdk_log"
99
"io"
1010
"sync"
11+
"time"
1112
)
1213

1314
type getProducer struct {
@@ -22,9 +23,25 @@ type getProducer struct {
2223
deferredBlobQueue BlobDescriptionQueue // queue of blobs whose channels are not yet ready for transfer
2324
rangeFinder ranges.BlobRangeFinder
2425
sdk_log.Logger
26+
27+
// Channel that represents blobs that have finished being process.
28+
// This will be written to once a get object operation has completed regardless of error or success.
29+
// This is used to notify the runner to re-check if any blobs are now ready to be retrieved.
30+
blobDoneChannel <-chan struct{}
31+
32+
// Used to track if we are done queuing blobs
33+
continueQueuingBlobs bool
2534
}
2635

27-
func newGetProducer(jobMasterObjectList *ds3Models.MasterObjectList, getObjects *[]helperModels.GetObject, queue *chan TransferOperation, strategy *ReadTransferStrategy, client *ds3.Client, waitGroup *sync.WaitGroup) *getProducer {
36+
func newGetProducer(
37+
jobMasterObjectList *ds3Models.MasterObjectList,
38+
getObjects *[]helperModels.GetObject,
39+
queue *chan TransferOperation,
40+
strategy *ReadTransferStrategy,
41+
blobDoneChannel <-chan struct{},
42+
client *ds3.Client,
43+
waitGroup *sync.WaitGroup) *getProducer {
44+
2845
return &getProducer{
2946
JobMasterObjectList: jobMasterObjectList,
3047
GetObjects: getObjects,
@@ -37,6 +54,8 @@ func newGetProducer(jobMasterObjectList *ds3Models.MasterObjectList, getObjects
3754
deferredBlobQueue: NewBlobDescriptionQueue(),
3855
rangeFinder: ranges.NewBlobRangeFinder(getObjects),
3956
Logger: client.Logger, //use the same logger as the client
57+
blobDoneChannel: blobDoneChannel,
58+
continueQueuingBlobs: true,
4059
}
4160
}
4261

@@ -217,40 +236,103 @@ func (producer *getProducer) processWaitingBlobs(bucketName string, jobId string
217236
// Each transfer operation will retrieve one blob of content from the BP.
218237
// Once all blobs have been queued to be transferred, the producer will finish, even if all operations have not been consumed yet.
219238
func (producer *getProducer) run() error {
220-
defer close(*producer.queue)
221-
222239
// determine number of blobs to be processed
223240
var totalBlobCount int64 = producer.totalBlobCount()
224241
producer.Debugf("job status totalBlobs=%d processedBlobs=%d", totalBlobCount, producer.processedBlobTracker.NumberOfProcessedBlobs())
225242

226-
// process all chunks and make sure all blobs are queued for transfer
227-
for producer.processedBlobTracker.NumberOfProcessedBlobs() < totalBlobCount || producer.deferredBlobQueue.Size() > 0 {
228-
// Get the list of available chunks that the server can receive. The server may
229-
// not be able to receive everything, so not all chunks will necessarily be
230-
// returned
231-
chunksReady := ds3Models.NewGetJobChunksReadyForClientProcessingSpectraS3Request(producer.JobMasterObjectList.JobId)
232-
chunksReadyResponse, err := producer.client.GetJobChunksReadyForClientProcessingSpectraS3(chunksReady)
233-
if err != nil {
234-
producer.Errorf("unrecoverable error: %v", err)
235-
return err
236-
}
243+
// initiate first set of blob transfers
244+
err := producer.queueBlobsReadyForTransfer(totalBlobCount)
245+
if err != nil {
246+
return err
247+
}
237248

238-
// Check to see if any chunks can be processed
239-
numberOfChunks := len(chunksReadyResponse.MasterObjectList.Objects)
240-
if numberOfChunks > 0 {
241-
// Loop through all the chunks that are available for processing, and send
242-
// the files that are contained within them.
243-
for _, curChunk := range chunksReadyResponse.MasterObjectList.Objects {
244-
producer.processChunk(&curChunk, *chunksReadyResponse.MasterObjectList.BucketName, chunksReadyResponse.MasterObjectList.JobId)
249+
// wait for either a timer or for at least one blob to finish before attempting to queue more items for transfer
250+
ticker := time.NewTicker(producer.strategy.BlobStrategy.delay())
251+
var fatalErr error
252+
for {
253+
select {
254+
case _, ok := <- producer.blobDoneChannel:
255+
if ok {
256+
// reset the timer
257+
ticker.Stop()
258+
ticker = time.NewTicker(producer.strategy.BlobStrategy.delay())
259+
260+
err = producer.queueBlobsReadyForTransfer(totalBlobCount)
261+
if err != nil {
262+
// A fatal error has occurred, stop queuing blobs for processing and
263+
// close processing queue to signal consumer we won't be sending any more blobs.
264+
producer.continueQueuingBlobs = false
265+
fatalErr = err
266+
close(*producer.queue)
267+
}
268+
} else {
269+
// The consumer closed the channel, signaling completion.
270+
return fatalErr
271+
}
272+
case <- ticker.C:
273+
err = producer.queueBlobsReadyForTransfer(totalBlobCount)
274+
if err != nil {
275+
// A fatal error has occurred, stop queuing blobs for processing and
276+
// close processing queue to signal consumer we won't be sending any more blobs.
277+
producer.continueQueuingBlobs = false
278+
fatalErr = err
279+
close(*producer.queue)
245280
}
281+
}
282+
}
283+
return fatalErr
284+
}
285+
286+
func (producer *getProducer) hasMoreToProcess(totalBlobCount int64) bool {
287+
return producer.processedBlobTracker.NumberOfProcessedBlobs() < totalBlobCount || producer.deferredBlobQueue.Size() > 0
288+
}
289+
290+
func (producer *getProducer) queueBlobsReadyForTransfer(totalBlobCount int64) error {
291+
if !producer.continueQueuingBlobs {
292+
// We've queued up all the blobs we are going to for this job.
293+
return nil
294+
}
295+
296+
// check if there is anything left to be queued
297+
if !producer.hasMoreToProcess(totalBlobCount) {
298+
// Everything has been queued for processing.
299+
producer.continueQueuingBlobs = false
300+
// close processing queue to signal consumer we won't be sending any more blobs.
301+
close(*producer.queue)
302+
return nil
303+
}
304+
305+
// Attempt to transfer waiting blobs
306+
producer.processWaitingBlobs(*producer.JobMasterObjectList.BucketName, producer.JobMasterObjectList.JobId)
307+
308+
// Check if we need to query the BP for allocated blobs, or if we already know everything is allocated.
309+
if int64(producer.deferredBlobQueue.Size()) + producer.processedBlobTracker.NumberOfProcessedBlobs() >= totalBlobCount {
310+
// Everything is already allocated, no need to query BP for allocated chunks
311+
return nil
312+
}
313+
314+
// Get the list of available chunks that the server can receive. The server may
315+
// not be able to receive everything, so not all chunks will necessarily be
316+
// returned
317+
chunksReady := ds3Models.NewGetJobChunksReadyForClientProcessingSpectraS3Request(producer.JobMasterObjectList.JobId)
318+
chunksReadyResponse, err := producer.client.GetJobChunksReadyForClientProcessingSpectraS3(chunksReady)
319+
if err != nil {
320+
producer.Errorf("unrecoverable error: %v", err)
321+
return err
322+
}
246323

247-
// Attempt to transfer waiting blobs
248-
producer.processWaitingBlobs(*chunksReadyResponse.MasterObjectList.BucketName, chunksReadyResponse.MasterObjectList.JobId)
249-
} else {
250-
// When no chunks are returned we need to sleep to allow for cache space to
251-
// be freed.
252-
producer.strategy.BlobStrategy.delay()
324+
// Check to see if any chunks can be processed
325+
numberOfChunks := len(chunksReadyResponse.MasterObjectList.Objects)
326+
if numberOfChunks > 0 {
327+
// Loop through all the chunks that are available for processing, and send
328+
// the files that are contained within them.
329+
for _, curChunk := range chunksReadyResponse.MasterObjectList.Objects {
330+
producer.processChunk(&curChunk, *chunksReadyResponse.MasterObjectList.BucketName, chunksReadyResponse.MasterObjectList.JobId)
253331
}
332+
} else {
333+
// When no chunks are returned we need to sleep to allow for cache space to
334+
// be freed.
335+
producer.strategy.BlobStrategy.delay()
254336
}
255337
return nil
256338
}

helpers/getTransfernator.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,9 +78,10 @@ func (transceiver *getTransceiver) transfer() (string, error) {
7878
// init queue, producer and consumer
7979
var waitGroup sync.WaitGroup
8080

81+
blobDoneChannel := make(chan struct{}, 10)
8182
queue := newOperationQueue(transceiver.Strategy.BlobStrategy.maxWaitingTransfers(), transceiver.Client.Logger)
82-
producer := newGetProducer(&bulkGetResponse.MasterObjectList, transceiver.ReadObjects, &queue, transceiver.Strategy, transceiver.Client, &waitGroup)
83-
consumer := newConsumer(&queue, &waitGroup, transceiver.Strategy.BlobStrategy.maxConcurrentTransfers())
83+
producer := newGetProducer(&bulkGetResponse.MasterObjectList, transceiver.ReadObjects, &queue, transceiver.Strategy, blobDoneChannel, transceiver.Client, &waitGroup)
84+
consumer := newConsumer(&queue, blobDoneChannel, &waitGroup, transceiver.Strategy.BlobStrategy.maxConcurrentTransfers())
8485

8586
// Wait for completion of producer-consumer goroutines
8687
var aggErr ds3Models.AggregateError

0 commit comments

Comments
 (0)