@@ -20,9 +20,9 @@ import (
20
20
"encoding/binary"
21
21
"errors"
22
22
"fmt"
23
- "hash"
24
23
"io"
25
24
"sync"
25
+ "time"
26
26
)
27
27
28
28
/*
@@ -50,14 +50,6 @@ data_{i} := size(subtree_{i}) || key_{j} || key_{j+1} .... || key_{j+n-1}
50
50
The underlying hash function is configurable
51
51
*/
52
52
53
- const (
54
- defaultHash = "SHA3"
55
- // defaultHash = "BMTSHA3" // http://golang.org/pkg/hash/#Hash
56
- // defaultHash = "SHA256" // http://golang.org/pkg/hash/#Hash
57
- defaultBranches int64 = 128
58
- // hashSize int64 = hasherfunc.New().Size() // hasher knows about its own length in bytes
59
- // chunksize int64 = branches * hashSize // chunk is defined as this
60
- )
61
53
62
54
/*
63
55
Tree chunker is a concrete implementation of data chunking.
@@ -67,25 +59,19 @@ If all is well it is possible to implement this by simply composing readers so t
67
59
The hashing itself does use extra copies and allocation though, since it does need it.
68
60
*/
69
61
70
- type ChunkerParams struct {
71
- Branches int64
72
- Hash string
73
- }
74
-
75
- func NewChunkerParams () * ChunkerParams {
76
- return & ChunkerParams {
77
- Branches : defaultBranches ,
78
- Hash : defaultHash ,
79
- }
80
- }
62
+ var (
63
+ errAppendOppNotSuported = errors .New ("Append operation not supported" )
64
+ errOperationTimedOut = errors .New ("operation timed out" )
65
+ )
81
66
82
67
type TreeChunker struct {
83
68
branches int64
84
- hashFunc Hasher
69
+ hashFunc SwarmHasher
85
70
// calculated
86
71
hashSize int64 // self.hashFunc.New().Size()
87
72
chunkSize int64 // hashSize* branches
88
- workerCount int
73
+ workerCount int64 // the number of worker routines used
74
+ workerLock sync.RWMutex // lock for the worker count
89
75
}
90
76
91
77
func NewTreeChunker (params * ChunkerParams ) (self * TreeChunker ) {
@@ -94,7 +80,8 @@ func NewTreeChunker(params *ChunkerParams) (self *TreeChunker) {
94
80
self .branches = params .Branches
95
81
self .hashSize = int64 (self .hashFunc ().Size ())
96
82
self .chunkSize = self .hashSize * self .branches
97
- self .workerCount = 1
83
+ self .workerCount = 0
84
+
98
85
return
99
86
}
100
87
@@ -114,13 +101,31 @@ type hashJob struct {
114
101
parentWg * sync.WaitGroup
115
102
}
116
103
117
- func (self * TreeChunker ) Split (data io.Reader , size int64 , chunkC chan * Chunk , swg , wwg * sync.WaitGroup ) (Key , error ) {
104
+ func (self * TreeChunker ) incrementWorkerCount () {
105
+ self .workerLock .Lock ()
106
+ defer self .workerLock .Unlock ()
107
+ self .workerCount += 1
108
+ }
109
+
110
+ func (self * TreeChunker ) getWorkerCount () int64 {
111
+ self .workerLock .RLock ()
112
+ defer self .workerLock .RUnlock ()
113
+ return self .workerCount
114
+ }
118
115
116
+ func (self * TreeChunker ) decrementWorkerCount () {
117
+ self .workerLock .Lock ()
118
+ defer self .workerLock .Unlock ()
119
+ self .workerCount -= 1
120
+ }
121
+
122
+ func (self * TreeChunker ) Split (data io.Reader , size int64 , chunkC chan * Chunk , swg , wwg * sync.WaitGroup ) (Key , error ) {
119
123
if self .chunkSize <= 0 {
120
124
panic ("chunker must be initialised" )
121
125
}
122
126
123
- jobC := make (chan * hashJob , 2 * processors )
127
+
128
+ jobC := make (chan * hashJob , 2 * ChunkProcessors )
124
129
wg := & sync.WaitGroup {}
125
130
errC := make (chan error )
126
131
quitC := make (chan bool )
@@ -129,6 +134,8 @@ func (self *TreeChunker) Split(data io.Reader, size int64, chunkC chan *Chunk, s
129
134
if wwg != nil {
130
135
wwg .Add (1 )
131
136
}
137
+
138
+ self .incrementWorkerCount ()
132
139
go self .hashWorker (jobC , chunkC , errC , quitC , swg , wwg )
133
140
134
141
depth := 0
@@ -157,17 +164,24 @@ func (self *TreeChunker) Split(data io.Reader, size int64, chunkC chan *Chunk, s
157
164
close (errC )
158
165
}()
159
166
160
- //TODO: add a timeout
161
- if err := <- errC ; err != nil {
162
- close (quitC )
163
- return nil , err
167
+
168
+ defer close (quitC )
169
+ select {
170
+ case err := <- errC :
171
+ if err != nil {
172
+ return nil , err
173
+ }
174
+ case <- time .NewTimer (splitTimeout ).C :
175
+ return nil ,errOperationTimedOut
164
176
}
165
177
166
178
return key , nil
167
179
}
168
180
169
181
func (self * TreeChunker ) split (depth int , treeSize int64 , key Key , data io.Reader , size int64 , jobC chan * hashJob , chunkC chan * Chunk , errC chan error , quitC chan bool , parentWg , swg , wwg * sync.WaitGroup ) {
170
182
183
+ //
184
+
171
185
for depth > 0 && size < treeSize {
172
186
treeSize /= self .branches
173
187
depth --
@@ -223,12 +237,15 @@ func (self *TreeChunker) split(depth int, treeSize int64, key Key, data io.Reade
223
237
// parentWg.Add(1)
224
238
// go func() {
225
239
childrenWg .Wait ()
226
- if len (jobC ) > self .workerCount && self .workerCount < processors {
240
+
241
+ worker := self .getWorkerCount ()
242
+ if int64 (len (jobC )) > worker && worker < ChunkProcessors {
227
243
if wwg != nil {
228
244
wwg .Add (1 )
229
245
}
230
- self .workerCount ++
246
+ self .incrementWorkerCount ()
231
247
go self .hashWorker (jobC , chunkC , errC , quitC , swg , wwg )
248
+
232
249
}
233
250
select {
234
251
case jobC <- & hashJob {key , chunk , size , parentWg }:
@@ -237,6 +254,8 @@ func (self *TreeChunker) split(depth int, treeSize int64, key Key, data io.Reade
237
254
}
238
255
239
256
func (self * TreeChunker ) hashWorker (jobC chan * hashJob , chunkC chan * Chunk , errC chan error , quitC chan bool , swg , wwg * sync.WaitGroup ) {
257
+ defer self .decrementWorkerCount ()
258
+
240
259
hasher := self .hashFunc ()
241
260
if wwg != nil {
242
261
defer wwg .Done ()
@@ -249,7 +268,6 @@ func (self *TreeChunker) hashWorker(jobC chan *hashJob, chunkC chan *Chunk, errC
249
268
return
250
269
}
251
270
// now we got the hashes in the chunk, then hash the chunks
252
- hasher .Reset ()
253
271
self .hashChunk (hasher , job , chunkC , swg )
254
272
case <- quitC :
255
273
return
@@ -260,9 +278,11 @@ func (self *TreeChunker) hashWorker(jobC chan *hashJob, chunkC chan *Chunk, errC
260
278
// The treeChunkers own Hash hashes together
261
279
// - the size (of the subtree encoded in the Chunk)
262
280
// - the Chunk, ie. the contents read from the input reader
263
- func (self * TreeChunker ) hashChunk (hasher hash.Hash , job * hashJob , chunkC chan * Chunk , swg * sync.WaitGroup ) {
264
- hasher .Write (job .chunk )
281
+ func (self * TreeChunker ) hashChunk (hasher SwarmHash , job * hashJob , chunkC chan * Chunk , swg * sync.WaitGroup ) {
282
+ hasher .ResetWithLength (job .chunk [:8 ]) // 8 bytes of length
283
+ hasher .Write (job .chunk [8 :]) // minus 8 []byte length
265
284
h := hasher .Sum (nil )
285
+
266
286
newChunk := & Chunk {
267
287
Key : h ,
268
288
SData : job .chunk ,
@@ -285,6 +305,10 @@ func (self *TreeChunker) hashChunk(hasher hash.Hash, job *hashJob, chunkC chan *
285
305
}
286
306
}
287
307
308
+ func (self * TreeChunker ) Append (key Key , data io.Reader , chunkC chan * Chunk , swg , wwg * sync.WaitGroup ) (Key , error ) {
309
+ return nil , errAppendOppNotSuported
310
+ }
311
+
288
312
// LazyChunkReader implements LazySectionReader
289
313
type LazyChunkReader struct {
290
314
key Key // root key
@@ -298,7 +322,6 @@ type LazyChunkReader struct {
298
322
299
323
// implements the Joiner interface
300
324
func (self * TreeChunker ) Join (key Key , chunkC chan * Chunk ) LazySectionReader {
301
-
302
325
return & LazyChunkReader {
303
326
key : key ,
304
327
chunkC : chunkC ,
0 commit comments