Skip to content

Commit d2b2043

Browse files
committed
add dataset workflow integration test
1 parent 34252d4 commit d2b2043

File tree

3 files changed

+108
-1
lines changed

3 files changed

+108
-1
lines changed

common/config/test.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,4 +101,5 @@ access_key_secret = "def"
101101
endpoint = ""
102102
bucket = "testcsg"
103103
enable_ssl = false
104-
url_upload_max_file_size = 5153960755
104+
url_upload_max_file_size = 5153960755
105+
bucket_lookup = "path"

tests/1.parquet

791 Bytes
Binary file not shown.

tests/api/dataset_viewer_test.go

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
package api_test
2+
3+
import (
4+
"bytes"
5+
"context"
6+
"encoding/json"
7+
"fmt"
8+
"io"
9+
"net/http"
10+
"os"
11+
"os/exec"
12+
"strings"
13+
"testing"
14+
15+
"github.com/stretchr/testify/require"
16+
"github.com/tidwall/gjson"
17+
"opencsg.com/csghub-server/common/types"
18+
"opencsg.com/csghub-server/tests/testinfra"
19+
)
20+
21+
func TestIntegrationDatasetViewer_Workflow(t *testing.T) {
22+
if testing.Short() {
23+
t.Skip("skipping integration test")
24+
}
25+
ctx := context.TODO()
26+
env, err := testinfra.StartTestEnv()
27+
defer func() { _ = env.Shutdown(ctx) }()
28+
require.NoError(t, err)
29+
token, err := env.CreateUser(ctx, "user")
30+
require.NoError(t, err)
31+
userClient := testinfra.GetClient(token)
32+
33+
data := `{"name":"test","nickname":"","namespace":"user","license":"apache-2.0","description":"","private":false}`
34+
req, err := http.NewRequest(
35+
"POST", "http://localhost:9091/api/v1/datasets", bytes.NewBuffer([]byte(data)),
36+
)
37+
require.NoError(t, err)
38+
resp, err := userClient.Do(req)
39+
require.NoError(t, err)
40+
defer resp.Body.Close()
41+
require.Equal(t, 200, resp.StatusCode)
42+
resp, err = userClient.Get("http://localhost:9091/api/v1/datasets/user/test")
43+
require.NoError(t, err)
44+
defer resp.Body.Close()
45+
body, err := io.ReadAll(resp.Body)
46+
require.NoError(t, err)
47+
var model types.Model
48+
err = json.Unmarshal([]byte(gjson.GetBytes(body, "data").Raw), &model)
49+
require.NoError(t, err)
50+
cloneURL := model.Repository.HTTPCloneURL
51+
52+
token, err = env.CreateAccessToken(ctx, "user", types.AccessTokenAppGit)
53+
require.NoError(t, err)
54+
url := strings.ReplaceAll(cloneURL, "http://", fmt.Sprintf("http://%s:%s@", "user", token))
55+
dir := "dataset_clone"
56+
err = gitClone(url, dir)
57+
require.NoError(t, err)
58+
defer os.RemoveAll(dir)
59+
// add yaml config to readme
60+
file, err := os.OpenFile(dir+"/README.md", os.O_RDWR|os.O_CREATE, 0644)
61+
require.NoError(t, err)
62+
defer file.Close()
63+
fileContent := ""
64+
buf := make([]byte, 1024)
65+
for {
66+
n, err := file.Read(buf)
67+
if err != nil {
68+
break
69+
}
70+
fileContent += string(buf[:n])
71+
}
72+
configContent := `---
73+
configs:
74+
- config_name: defaultgo
75+
data_files:
76+
- split: traingo
77+
path: "train/0.parquet"
78+
- split: testgo
79+
path: "test/1.parquet"
80+
---
81+
`
82+
newContent := configContent + fileContent
83+
file.Seek(0, 0)
84+
_, err = file.WriteString(newContent)
85+
require.NoError(t, err)
86+
87+
err = exec.Command("mkdir", dir+"/train").Run()
88+
require.NoError(t, err)
89+
err = exec.Command("mkdir", dir+"/test").Run()
90+
require.NoError(t, err)
91+
err = exec.Command("cp", "tests/0.parquet", dir+"/train/0.parquet").Run()
92+
require.NoError(t, err)
93+
err = exec.Command("cp", "tests/1.parquet", dir+"/test/1.parquet").Run()
94+
require.NoError(t, err)
95+
err = gitCommitAndPush(dir)
96+
require.NoError(t, err)
97+
98+
resp, err = userClient.Get("http://localhost:9091/api/v1/datasets/user/test/dataviewer/catalog")
99+
require.NoError(t, err)
100+
defer resp.Body.Close()
101+
body, err = io.ReadAll(resp.Body)
102+
require.NoError(t, err)
103+
expected := `{"msg":"OK","data":{"configs":[{"config_name":"defaultgo","data_files":[{"split":"traingo","path":["train/0.parquet"]},{"split":"testgo","path":["test/1.parquet"]}]}],"dataset_info":[{"config_name":"defaultgo","splits":[{"name":"traingo","num_examples":20},{"name":"testgo","num_examples":20}]}],"status":0,"logs":""}}
104+
`
105+
require.Equal(t, expected, string(body))
106+
}

0 commit comments

Comments
 (0)