Skip to content

Commit 9334bd4

Browse files
committed
*: tests for squash vs no squash correctness
This commit implements a test suite to ensure results using the squash optimization are the exact same as the ones not using it. It also fixes two of these differences: - Without squashing, remote iterator did not return a string but another type for the refspecs. - Joining blobs with refs or commits could lead to extra rows because blobs could appear more than once per commit or ref. Now they only appear once. Signed-off-by: Miguel Molina <[email protected]>
1 parent 300fc41 commit 9334bd4

File tree

8 files changed

+229
-22
lines changed

8 files changed

+229
-22
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ _testmain.go
2222
*.exe
2323
*.test
2424
*.prof
25+
gitbase
2526

2627
# CI
2728
.ci/

integration_test.go

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,95 @@ func TestUastQueries(t *testing.T) {
189189
require.Len(rows, 3)
190190
}
191191

192+
func TestSquashCorrectness(t *testing.T) {
193+
engine := sqle.New()
194+
squashEngine := sqle.New()
195+
require.NoError(t, fixtures.Init())
196+
defer func() {
197+
require.NoError(t, fixtures.Clean())
198+
}()
199+
200+
pool := gitbase.NewRepositoryPool()
201+
for _, f := range fixtures.ByTag("worktree") {
202+
pool.AddGit(f.Worktree().Root())
203+
}
204+
205+
engine.AddDatabase(gitbase.NewDatabase("foo"))
206+
engine.Catalog.RegisterFunctions(function.Functions)
207+
208+
squashEngine.AddDatabase(gitbase.NewDatabase("foo"))
209+
squashEngine.Catalog.RegisterFunctions(function.Functions)
210+
squashEngine.Analyzer.AddRule(rule.SquashJoinsRule, rule.SquashJoins)
211+
212+
queries := []string{
213+
`SELECT * FROM repositories`,
214+
`SELECT * FROM refs`,
215+
`SELECT * FROM remotes`,
216+
`SELECT * FROM commits`,
217+
`SELECT * FROM tree_entries`,
218+
`SELECT * FROM blobs`,
219+
`SELECT * FROM repositories r INNER JOIN refs ON r.id = refs.repository_id`,
220+
`SELECT * FROM repositories r INNER JOIN remotes ON r.id = remotes.repository_id`,
221+
`SELECT * FROM refs r INNER JOIN remotes re ON r.repository_id = re.repository_id`,
222+
`SELECT * FROM refs r INNER JOIN commits c ON r.hash = c.hash`,
223+
`SELECT * FROM refs r INNER JOIN commits c ON history_idx(r.hash, c.hash) >= 0`,
224+
`SELECT * FROM refs r INNER JOIN tree_entries te ON commit_has_tree(r.hash, te.tree_hash)`,
225+
`SELECT * FROM refs r INNER JOIN blobs b ON commit_has_blob(r.hash, b.hash)`,
226+
`SELECT * FROM commits c INNER JOIN tree_entries te ON commit_has_tree(c.hash, te.tree_hash)`,
227+
`SELECT * FROM commits c INNER JOIN tree_entries te ON c.tree_hash = te.tree_hash`,
228+
`SELECT * FROM commits c INNER JOIN blobs b ON commit_has_blob(c.hash, b.hash)`,
229+
`SELECT * FROM tree_entries te INNER JOIN blobs b ON te.entry_hash = b.hash`,
230+
231+
`SELECT * FROM repositories r
232+
INNER JOIN refs re
233+
ON r.id = re.repository_id
234+
INNER JOIN commits c
235+
ON re.hash = c.hash
236+
WHERE re.name = 'HEAD'`,
237+
238+
`SELECT * FROM commits c
239+
INNER JOIN tree_entries te
240+
ON c.tree_hash = te.tree_hash
241+
INNER JOIN blobs b
242+
ON te.entry_hash = b.hash
243+
WHERE te.name = 'LICENSE'`,
244+
245+
`SELECT * FROM repositories,
246+
commits c INNER JOIN tree_entries te
247+
ON c.tree_hash = te.tree_hash`,
248+
}
249+
250+
for _, q := range queries {
251+
t.Run(q, func(t *testing.T) {
252+
expected := queryResults(t, engine, &pool, q)
253+
result := queryResults(t, squashEngine, &pool, q)
254+
require.ElementsMatch(
255+
t,
256+
expected,
257+
result,
258+
)
259+
})
260+
}
261+
}
262+
263+
func queryResults(
264+
t *testing.T,
265+
e *sqle.Engine,
266+
pool *gitbase.RepositoryPool,
267+
q string,
268+
) []sql.Row {
269+
session := gitbase.NewSession(pool)
270+
ctx := sql.NewContext(context.TODO(), sql.WithSession(session))
271+
272+
_, iter, err := e.Query(ctx, q)
273+
require.NoError(t, err)
274+
275+
rows, err := sql.RowIterToRows(iter)
276+
require.NoError(t, err)
277+
278+
return rows
279+
}
280+
192281
func BenchmarkQueries(b *testing.B) {
193282
queries := []struct {
194283
name string

internal/rule/squashjoins.go

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -322,16 +322,8 @@ func buildSquashedTable(
322322
return nil, err
323323
}
324324

325-
iter = gitbase.NewTreeEntryBlobsIter(
326-
gitbase.NewCommitTreeEntriesIter(
327-
gitbase.NewRefHEADCommitsIter(
328-
it,
329-
nil,
330-
true,
331-
),
332-
nil,
333-
true,
334-
),
325+
iter = gitbase.NewCommitBlobsIter(
326+
gitbase.NewRefHEADCommitsIter(it, nil, true),
335327
f,
336328
)
337329
case gitbase.CommitsIter:

internal/rule/squashjoins_test.go

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -653,14 +653,10 @@ func TestBuildSquashedTable(t *testing.T) {
653653
},
654654
nil,
655655
newSquashedTable(
656-
gitbase.NewTreeEntryBlobsIter(
657-
gitbase.NewCommitTreeEntriesIter(
658-
gitbase.NewRefHEADCommitsIter(
659-
gitbase.NewAllRefsIter(
660-
fixIdx(t, refFilter, refsBlobsSchema),
661-
),
662-
nil,
663-
true,
656+
gitbase.NewCommitBlobsIter(
657+
gitbase.NewRefHEADCommitsIter(
658+
gitbase.NewAllRefsIter(
659+
fixIdx(t, refFilter, refsBlobsSchema),
664660
),
665661
nil,
666662
true,

iterator.go

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1442,6 +1442,115 @@ func (i *treeEntryBlobsIter) Schema() sql.Schema {
14421442
return append(i.treeEntries.Schema(), BlobsSchema...)
14431443
}
14441444

1445+
type commitBlobsIter struct {
1446+
ctx *sql.Context
1447+
repo *git.Repository
1448+
filters sql.Expression
1449+
commits CommitsIter
1450+
files *object.FileIter
1451+
row sql.Row
1452+
seen map[plumbing.Hash]struct{}
1453+
}
1454+
1455+
// NewCommitBlobsIter returns an iterator that will return all blobs
1456+
// for the commit in the given iter that match the given filters.
1457+
func NewCommitBlobsIter(
1458+
commits CommitsIter,
1459+
filters sql.Expression,
1460+
) BlobsIter {
1461+
return &commitBlobsIter{commits: commits, filters: filters}
1462+
}
1463+
1464+
func (i *commitBlobsIter) Close() error {
1465+
if i.commits != nil {
1466+
return i.commits.Close()
1467+
}
1468+
1469+
return nil
1470+
}
1471+
func (i *commitBlobsIter) New(ctx *sql.Context, repo *Repository) (ChainableIter, error) {
1472+
iter, err := i.commits.New(ctx, repo)
1473+
if err != nil {
1474+
return nil, err
1475+
}
1476+
1477+
return &commitBlobsIter{
1478+
ctx: ctx,
1479+
repo: repo.Repo,
1480+
commits: iter.(CommitsIter),
1481+
filters: i.filters,
1482+
seen: make(map[plumbing.Hash]struct{}),
1483+
}, nil
1484+
}
1485+
func (i *commitBlobsIter) Row() sql.Row { return i.row }
1486+
func (i *commitBlobsIter) Advance() error {
1487+
for {
1488+
if i.commits == nil {
1489+
return io.EOF
1490+
}
1491+
1492+
if i.files == nil {
1493+
err := i.commits.Advance()
1494+
if err == io.EOF {
1495+
i.commits = nil
1496+
return io.EOF
1497+
}
1498+
1499+
if err != nil {
1500+
return err
1501+
}
1502+
1503+
tree, err := i.repo.TreeObject(i.commits.Commit().TreeHash)
1504+
if err != nil {
1505+
return err
1506+
}
1507+
1508+
i.files = tree.Files()
1509+
// uniqueness of blob is per commit, so we need to reset the seen map
1510+
i.seen = make(map[plumbing.Hash]struct{})
1511+
}
1512+
1513+
file, err := i.files.Next()
1514+
if err == io.EOF {
1515+
i.files = nil
1516+
continue
1517+
}
1518+
1519+
if _, ok := i.seen[file.Hash]; ok {
1520+
continue
1521+
}
1522+
1523+
i.seen[file.Hash] = struct{}{}
1524+
blob, err := i.repo.BlobObject(file.Hash)
1525+
if err != nil {
1526+
return err
1527+
}
1528+
1529+
row, err := blobToRow(blob)
1530+
if err != nil {
1531+
return err
1532+
}
1533+
1534+
i.row = append(i.commits.Row(), row...)
1535+
1536+
if i.filters != nil {
1537+
ok, err := evalFilters(i.ctx, i.row, i.filters)
1538+
if err != nil {
1539+
return err
1540+
}
1541+
1542+
if !ok {
1543+
continue
1544+
}
1545+
}
1546+
1547+
return nil
1548+
}
1549+
}
1550+
func (i *commitBlobsIter) Schema() sql.Schema {
1551+
return append(i.commits.Schema(), BlobsSchema...)
1552+
}
1553+
14451554
// NewChainableRowRepoIter creates a new RowRepoIter from a ChainableIter.
14461555
func NewChainableRowRepoIter(ctx *sql.Context, iter ChainableIter) RowRepoIter {
14471556
return &chainableRowRepoIter{iter, ctx}

iterator_test.go

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -419,6 +419,26 @@ func TestRecursiveTreeFileIter(t *testing.T) {
419419
require.Equal(expected, result)
420420
}
421421

422+
func TestCommitBlobsIter(t *testing.T) {
423+
require := require.New(t)
424+
ctx, cleanup := setupIter(t)
425+
defer cleanup()
426+
427+
rows := chainableIterRows(
428+
t, ctx,
429+
NewCommitBlobsIter(
430+
NewRefHEADCommitsIter(
431+
NewAllRefsIter(nil),
432+
nil,
433+
true,
434+
),
435+
nil,
436+
),
437+
)
438+
439+
require.Len(rows, 42)
440+
}
441+
422442
func chainableIterRows(t *testing.T, ctx *sql.Context, iter ChainableIter) []sql.Row {
423443
it, err := NewRowRepoIter(ctx, NewChainableRowRepoIter(ctx, iter))
424444
require.NoError(t, err)

remotes.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -130,8 +130,8 @@ func (i *remotesIter) Next() (sql.Row, error) {
130130
config.Name,
131131
config.URLs[i.urlPos],
132132
config.URLs[i.urlPos],
133-
config.Fetch[i.urlPos],
134-
config.Fetch[i.urlPos],
133+
config.Fetch[i.urlPos].String(),
134+
config.Fetch[i.urlPos].String(),
135135
)
136136

137137
i.urlPos++

remotes_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,8 +81,8 @@ func TestRemotesTable_RowIter(t *testing.T) {
8181
require.Equal(url, row[3]) // fetch
8282

8383
ref := fmt.Sprintf("refs/heads/*:refs/remotes/fetch%v/*", num)
84-
require.Equal(gitconfig.RefSpec(ref), row[4]) // push
85-
require.Equal(gitconfig.RefSpec(ref), row[5]) // fetch
84+
require.Equal(gitconfig.RefSpec(ref).String(), row[4]) // push
85+
require.Equal(gitconfig.RefSpec(ref).String(), row[5]) // fetch
8686
} else {
8787
require.Equal("origin", row[1])
8888
}

0 commit comments

Comments
 (0)