Skip to content

Commit 90b377c

Browse files
authored
function: LOC function implementation (#798)
function: LOC function implementation
2 parents 1a83350 + a9f8b81 commit 90b377c

25 files changed

+1981
-3
lines changed

Gopkg.lock

Lines changed: 9 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/using-gitbase/examples.md

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ FROM
4242
WHERE num > 1;
4343
```
4444

45-
## Get the number of blobs per HEAD commit
45+
## Get the number of blobs per HEAD commit
4646

4747
```sql
4848
SELECT COUNT(commit_hash),
@@ -76,6 +76,22 @@ GROUP BY committer_email,
7676
repo_id;
7777
```
7878

79+
## Report of line count per file from HEAD references
80+
81+
```sql
82+
SELECT
83+
LANGUAGE(file_path, blob_content) as lang,
84+
SUM(JSON_EXTRACT(LOC(file_path, blob_content), '$.Code')) as code,
85+
SUM(JSON_EXTRACT(LOC(file_path, blob_content), '$.Comments')) as comments,
86+
SUM(JSON_EXTRACT(LOC(file_path, blob_content), '$.Blanks')) as blanks,
87+
COUNT(1) as files
88+
FROM commit_files
89+
NATURAL JOIN refs
90+
NATURAL JOIN blobs
91+
WHERE ref_name='HEAD'
92+
GROUP BY lang;
93+
```
94+
7995
## Files from first 6 commits from HEAD references that contains some key and are not in vendor directory
8096

8197
```sql
@@ -201,4 +217,4 @@ To kill a query that's currently running you can use the value in `Id`. If we we
201217

202218
```sql
203219
KILL QUERY 168;
204-
```
220+
```

docs/using-gitbase/functions.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ To make some common tasks easier for the user, there are some functions to inter
1414
|`uast_xpath(blob, xpath) blob`| performs an XPath query over the given UAST nodes |
1515
|`uast_extract(blob, key) text array`| extracts information identified by the given key from the uast nodes |
1616
|`uast_children(blob) blob`| returns a flattened array of the children UAST nodes from each one of the UAST nodes in the given array |
17-
17+
|`loc(path, blob) json`| returns a JSON map, containing the lines of code of a file, separated in three categories: Code, Blank and Comment lines |
1818
## Standard functions
1919

2020
These are all functions that are available because they are implemented in `go-mysql-server`, used by gitbase.

internal/function/loc.go

Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
package function
2+
3+
import (
4+
"bytes"
5+
"errors"
6+
"fmt"
7+
8+
"github.com/hhatto/gocloc"
9+
"gopkg.in/src-d/enry.v1"
10+
"gopkg.in/src-d/go-mysql-server.v0/sql"
11+
)
12+
13+
var languages = gocloc.NewDefinedLanguages()
14+
15+
var errEmptyInputValues = errors.New("empty input values")
16+
17+
type LOC struct {
18+
Left sql.Expression
19+
Right sql.Expression
20+
}
21+
22+
// NewLOC creates a new LOC UDF.
23+
func NewLOC(args ...sql.Expression) (sql.Expression, error) {
24+
if len(args) != 2 {
25+
return nil, sql.ErrInvalidArgumentNumber.New("2", len(args))
26+
}
27+
28+
return &LOC{args[0], args[1]}, nil
29+
}
30+
31+
// Resolved implements the Expression interface.
32+
func (f *LOC) Resolved() bool {
33+
return f.Left.Resolved() && f.Right.Resolved()
34+
}
35+
36+
func (f *LOC) String() string {
37+
return fmt.Sprintf("loc(%s, %s)", f.Left, f.Right)
38+
}
39+
40+
// IsNullable implements the Expression interface.
41+
func (f *LOC) IsNullable() bool {
42+
return f.Left.IsNullable() || f.Right.IsNullable()
43+
}
44+
45+
// Type implements the Expression interface.
46+
func (LOC) Type() sql.Type {
47+
return sql.JSON
48+
}
49+
50+
// TransformUp implements the Expression interface.
51+
func (f *LOC) TransformUp(fn sql.TransformExprFunc) (sql.Expression, error) {
52+
left, err := f.Left.TransformUp(fn)
53+
if err != nil {
54+
return nil, err
55+
}
56+
57+
right, err := f.Right.TransformUp(fn)
58+
if err != nil {
59+
return nil, err
60+
}
61+
62+
return fn(&LOC{left, right})
63+
}
64+
65+
// Eval implements the Expression interface.
66+
func (f *LOC) Eval(ctx *sql.Context, row sql.Row) (interface{}, error) {
67+
span, ctx := ctx.Span("gitbase.LOC")
68+
defer span.Finish()
69+
path, blob, err := f.getInputValues(ctx, row)
70+
if err != nil {
71+
if err == errEmptyInputValues {
72+
return nil, nil
73+
}
74+
75+
return nil, err
76+
}
77+
78+
lang := f.getLanguage(path, blob)
79+
if lang == "" || languages.Langs[lang] == nil {
80+
return nil, nil
81+
}
82+
83+
return gocloc.AnalyzeReader(
84+
path,
85+
languages.Langs[lang],
86+
bytes.NewReader(blob), &gocloc.ClocOptions{},
87+
), nil
88+
}
89+
90+
func (f *LOC) getInputValues(ctx *sql.Context, row sql.Row) (string, []byte, error) {
91+
left, err := f.Left.Eval(ctx, row)
92+
if err != nil {
93+
return "", nil, err
94+
}
95+
96+
left, err = sql.Text.Convert(left)
97+
if err != nil {
98+
return "", nil, err
99+
}
100+
101+
right, err := f.Right.Eval(ctx, row)
102+
if err != nil {
103+
return "", nil, err
104+
}
105+
106+
right, err = sql.Blob.Convert(right)
107+
if err != nil {
108+
return "", nil, err
109+
}
110+
111+
if right == nil {
112+
return "", nil, errEmptyInputValues
113+
}
114+
115+
path, ok := left.(string)
116+
if !ok {
117+
return "", nil, errEmptyInputValues
118+
}
119+
120+
blob, ok := right.([]byte)
121+
122+
if !ok {
123+
return "", nil, errEmptyInputValues
124+
}
125+
126+
if len(blob) == 0 || len(path) == 0 {
127+
return "", nil, errEmptyInputValues
128+
}
129+
130+
return path, blob, nil
131+
}
132+
133+
func (f *LOC) getLanguage(path string, blob []byte) string {
134+
hash := languageHash(path, blob)
135+
136+
value, ok := languageCache.Get(hash)
137+
if ok {
138+
return value.(string)
139+
}
140+
141+
lang := enry.GetLanguage(path, blob)
142+
if len(blob) > 0 {
143+
languageCache.Add(hash, lang)
144+
}
145+
146+
return lang
147+
}
148+
149+
// Children implements the Expression interface.
150+
func (f *LOC) Children() []sql.Expression {
151+
if f.Right == nil {
152+
return []sql.Expression{f.Left}
153+
}
154+
155+
return []sql.Expression{f.Left, f.Right}
156+
}

internal/function/loc_test.go

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
package function
2+
3+
import (
4+
"testing"
5+
6+
"github.com/hhatto/gocloc"
7+
"github.com/stretchr/testify/require"
8+
"gopkg.in/src-d/go-errors.v1"
9+
"gopkg.in/src-d/go-mysql-server.v0/sql"
10+
"gopkg.in/src-d/go-mysql-server.v0/sql/expression"
11+
)
12+
13+
func TestLoc(t *testing.T) {
14+
testCases := []struct {
15+
name string
16+
row sql.Row
17+
expected interface{}
18+
err *errors.Kind
19+
}{
20+
{"left is null", sql.NewRow(nil), nil, nil},
21+
{"both are null", sql.NewRow(nil, nil), nil, nil},
22+
{"too few args given", sql.NewRow("foo.foobar"), nil, nil},
23+
{"too many args given", sql.NewRow("foo.rb", "bar", "baz"), nil, sql.ErrInvalidArgumentNumber},
24+
{"invalid blob type given", sql.NewRow("foo", 5), nil, sql.ErrInvalidType},
25+
{"path and blob are given", sql.NewRow("foo", "#!/usr/bin/env python\n\nprint 'foo'"), &gocloc.ClocFile{
26+
Code: 2, Comments: 0, Blanks: 1, Name: "foo", Lang: "",
27+
}, nil},
28+
}
29+
30+
for _, tt := range testCases {
31+
t.Run(tt.name, func(t *testing.T) {
32+
require := require.New(t)
33+
ctx := sql.NewEmptyContext()
34+
35+
var args = make([]sql.Expression, len(tt.row))
36+
for i := range tt.row {
37+
args[i] = expression.NewGetField(i, sql.Text, "", false)
38+
}
39+
40+
f, err := NewLOC(args...)
41+
if err == nil {
42+
var val interface{}
43+
val, err = f.Eval(ctx, tt.row)
44+
if tt.err == nil {
45+
require.NoError(err)
46+
require.Equal(tt.expected, val)
47+
}
48+
}
49+
50+
if tt.err != nil {
51+
require.Error(err)
52+
require.True(tt.err.Is(err))
53+
}
54+
})
55+
}
56+
}

internal/function/registry.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ var Functions = []sql.Function{
77
sql.Function1{Name: "is_tag", Fn: NewIsTag},
88
sql.Function1{Name: "is_remote", Fn: NewIsRemote},
99
sql.FunctionN{Name: "language", Fn: NewLanguage},
10+
sql.FunctionN{Name: "loc", Fn: NewLOC},
1011
sql.FunctionN{Name: "uast", Fn: NewUAST},
1112
sql.Function3{Name: "uast_mode", Fn: NewUASTMode},
1213
sql.Function2{Name: "uast_xpath", Fn: NewUASTXPath},

vendor/github.com/hhatto/gocloc/.gitignore

Lines changed: 10 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vendor/github.com/hhatto/gocloc/.goreleaser.yml

Lines changed: 34 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vendor/github.com/hhatto/gocloc/.travis.yml

Lines changed: 22 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vendor/github.com/hhatto/gocloc/Gopkg.toml

Lines changed: 11 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)