Skip to content

Commit a61496a

Browse files
authored
Merge pull request #428 from kuba--/regression-queries
Regression queries
2 parents b161d3d + 7a5209a commit a61496a

File tree

2 files changed

+161
-0
lines changed

2 files changed

+161
-0
lines changed

_testdata/regression.yml

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
---
2+
-
3+
ID: 'query0'
4+
Name: 'All commits'
5+
Statements:
6+
- SELECT * FROM commits
7+
-
8+
ID: 'query1'
9+
Name: 'Last commit messages in HEAD for every repository'
10+
Statements:
11+
- SELECT c.commit_message FROM refs r JOIN commits c ON r.commit_hash = c.commit_hash WHERE r.ref_name = 'HEAD'
12+
-
13+
ID: 'query2'
14+
Name: 'All commit messages in HEAD history for every repository'
15+
Statements:
16+
- SELECT c.commit_message FROM commits c NATURAL JOIN ref_commits r WHERE r.ref_name = 'HEAD'
17+
-
18+
ID: 'query3'
19+
Name: 'Top 10 repositories by commit count in HEAD'
20+
Statements:
21+
- SELECT repository_id,commit_count FROM (SELECT r.repository_id,count(*) AS commit_count FROM ref_commits r WHERE r.ref_name = 'HEAD' GROUP BY r.repository_id) AS q ORDER BY commit_count DESC LIMIT 10
22+
-
23+
ID: 'query4'
24+
Name: 'Top 10 repositories by contributor count (all branches)'
25+
Statements:
26+
- SELECT repository_id,contributor_count FROM (SELECT repository_id, COUNT(DISTINCT commit_author_email) AS contributor_count FROM commits GROUP BY repository_id) AS q ORDER BY contributor_count DESC LIMIT 10
27+
-
28+
ID: 'query5'
29+
Name: 'Create pilosa index on language UDF'
30+
Statements:
31+
- CREATE INDEX language_idx ON files USING pilosa (language(file_path, blob_content)) WITH (async = false)
32+
-
33+
ID: 'query6'
34+
Name: 'Create pilosalib index on language UDF'
35+
Statements:
36+
- CREATE INDEX language_idx ON files USING pilosalib (language(file_path, blob_content)) WITH (async = false)
37+
-
38+
ID: 'query7'
39+
Name: 'Query by language using the pilosa index'
40+
Statements:
41+
- CREATE INDEX language_idx ON files USING pilosa (language(file_path, blob_content)) WITH (async = false)
42+
- SELECT file_path FROM files WHERE language(file_path, blob_content) = 'Go'
43+
- DROP INDEX language_idx ON files
44+
-
45+
ID: 'query8'
46+
Name: 'Query by language using the pilosalib index'
47+
Statements:
48+
- CREATE INDEX language_idx ON files USING pilosalib (language(file_path, blob_content)) WITH (async = false)
49+
- SELECT file_path FROM files WHERE language(file_path, blob_content) = 'Go'
50+
- DROP INDEX language_idx ON files
51+
-
52+
ID: 'query9'
53+
Name: 'Query all files from HEAD'
54+
Statements:
55+
- SELECT cf.file_path, f.blob_content FROM ref_commits r NATURAL JOIN commit_files cf NATURAL JOIN files f WHERE r.ref_name = 'HEAD' AND r.index = 0
56+
-
57+
ID: 'query10'
58+
Name: 'Get all LICENSE blobs using pilosa index'
59+
Statements:
60+
- CREATE INDEX file_path_idx ON files USING pilosa (file_path) WITH (async = false)
61+
- SELECT blob_content FROM files WHERE file_path = 'LICENSE'
62+
-
63+
ID: 'query11'
64+
Name: 'Get all LICENSE blobs using pilosalib index'
65+
Statements:
66+
- CREATE INDEX file_path_idx ON files USING pilosalib (file_path) WITH (async = false)
67+
- SELECT blob_content FROM files WHERE file_path = 'LICENSE'
68+
-
69+
ID: 'query12'
70+
Name: '10 top repos by file count in HEAD'
71+
Statements:
72+
- SELECT repository_id, num_files FROM (SELECT COUNT(f.*) num_files, f.repository_id FROM ref_commits r INNER JOIN commit_files cf ON r.commit_hash = cf.commit_hash AND r.repository_id = cf.repository_id INNER JOIN files f ON cf.repository_id = f.repository_id AND cf.blob_hash = f.blob_hash AND cf.tree_hash = f.tree_hash AND cf.file_path = f.file_path WHERE r.ref_name = 'HEAD' GROUP BY f.repository_id) t ORDER BY num_files DESC LIMIT 10
73+
-
74+
ID: 'query13'
75+
Name: 'Top committers per repository'
76+
Statements:
77+
- SELECT * FROM (SELECT commit_author_email as author, repository_id as id, count(*) as num_commits FROM commits GROUP BY commit_author_email, repository_id) AS t ORDER BY num_commits DESC
78+
-
79+
ID: 'query14'
80+
Name: 'Top committers in all repositories'
81+
Statements:
82+
- SELECT * FROM (SELECT commit_author_email as author,count(*) as num_commits FROM commits GROUP BY commit_author_email) t ORDER BY num_commits DESC
83+
-
84+
ID: 'query15'
85+
Name: 'Union operation with pilosa index'
86+
Statements:
87+
- CREATE INDEX file_path_idx ON files USING pilosa (file_path) WITH (async = false)
88+
- SELECT blob_content FROM files WHERE file_path = 'LICENSE' OR file_path = 'README.md'
89+
- DROP INDEX file_path_idx ON files
90+
-
91+
ID: 'query16'
92+
Name: 'Union operation with pilosalib index'
93+
Statements:
94+
- CREATE INDEX file_path_idx ON files USING pilosalib (file_path) WITH (async = false)
95+
- SELECT blob_content FROM files WHERE file_path = 'LICENSE' OR file_path = 'README.md'
96+
- DROP INDEX file_path_idx ON files
97+
-
98+
ID: 'query17'
99+
Name: 'Count all commits with NOT operation'
100+
Statements:
101+
- SELECT COUNT(*) FROM commits WHERE NOT(commit_author_email = 'non existing email address');
102+
-
103+
ID: 'query18'
104+
Name: 'Count all commits with NOT operation and pilosalib index'
105+
Statements:
106+
- CREATE INDEX email_idx ON commits USING pilosalib (commit_author_email) WITH (async = false)
107+
- SELECT COUNT(*) FROM commits WHERE NOT(commit_author_email = 'non existing email address')
108+
- DROP INDEX email_idx ON commits

regression_test.go

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
package gitbase_test
2+
3+
import (
4+
"context"
5+
"io/ioutil"
6+
"testing"
7+
8+
"github.com/src-d/gitbase"
9+
"github.com/stretchr/testify/require"
10+
"gopkg.in/src-d/go-mysql-server.v0/sql"
11+
"gopkg.in/src-d/go-mysql-server.v0/sql/parse"
12+
yaml "gopkg.in/yaml.v2"
13+
)
14+
15+
type Query struct {
16+
ID string `yaml:"ID"`
17+
Name string `yaml:"Name,omitempty"`
18+
Statements []string `yaml:"Statements"`
19+
}
20+
21+
func TestParseRegressionQueries(t *testing.T) {
22+
require := require.New(t)
23+
24+
queries, err := loadQueriesYaml("./_testdata/regression.yml")
25+
require.NoError(err)
26+
27+
ctx := sql.NewContext(
28+
context.TODO(),
29+
sql.WithSession(gitbase.NewSession(gitbase.NewRepositoryPool())),
30+
)
31+
32+
for _, q := range queries {
33+
for _, stmt := range q.Statements {
34+
if _, err := parse.Parse(ctx, stmt); err != nil {
35+
require.Failf(err.Error(), "ID: %s, Name: %s, Statement: %s", q.ID, q.Name, stmt)
36+
}
37+
}
38+
}
39+
}
40+
41+
func loadQueriesYaml(file string) ([]Query, error) {
42+
text, err := ioutil.ReadFile(file)
43+
if err != nil {
44+
return nil, err
45+
}
46+
var q []Query
47+
err = yaml.Unmarshal(text, &q)
48+
if err != nil {
49+
return nil, err
50+
}
51+
52+
return q, nil
53+
}

0 commit comments

Comments
 (0)