Skip to content

Commit 78027f0

Browse files
authored
feat: Make UUID in testdata always deterministic like all other columns (#1479)
#### Summary `UUID` columns now follow the same pattern as all other columns This could also be considered a `refactor`... I am open to changing it if people want
1 parent 31085d2 commit 78027f0

File tree

1 file changed

+5
-6
lines changed

1 file changed

+5
-6
lines changed

schema/testdata.go

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package schema
22

33
import (
4+
"crypto/sha256"
45
"encoding/base64"
56
"fmt"
67
"sort"
@@ -190,8 +191,6 @@ type GenTestDataOptions struct {
190191
SyncTime time.Time
191192
// MaxRows is the number of rows to generate.
192193
MaxRows int
193-
// StableUUID is the UUID to use for all rows. If set to uuid.Nil, a new UUID will be generated
194-
StableUUID uuid.UUID
195194
// StableTime is the time to use for all rows other than sync time. If set to time.Time{}, a new time will be generated
196195
StableTime time.Time
197196
// TimePrecision is the precision to use for time columns.
@@ -301,10 +300,10 @@ func (tg TestDataGenerator) getExampleJSON(colName string, dataType arrow.DataTy
301300
}
302301
// handle extension types
303302
if arrow.TypeEqual(dataType, types.ExtensionTypes.UUID) {
304-
u := uuid.New()
305-
if opts.StableUUID != uuid.Nil {
306-
u = opts.StableUUID
307-
}
303+
// This will make UUIDs deterministic like all other types
304+
hash := sha256.New()
305+
hash.Write([]byte(fmt.Sprintf(`"AString%d"`, rnd.Intn(100000))))
306+
u := uuid.NewSHA1(uuid.UUID{}, hash.Sum(nil))
308307
return `"` + u.String() + `"`
309308
}
310309
if arrow.TypeEqual(dataType, types.ExtensionTypes.JSON) {

0 commit comments

Comments
 (0)