Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions common/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -241,10 +241,10 @@ type Config struct {
ActivityMaximumAttempts int32 `env:"OPENCSG_DATAVIEWER_ACTIVITY_MAXIMUM_ATTEMPTS, default=2"`
CacheDir string `env:"OPENCSG_DATAVIEWER_CACHE_DIR, default=/tmp/opencsg"`
DownloadLfsFile bool `env:"OPENCSG_DATAVIEWER_DOWNLOAD_LFS_FILE, default=true"`
ThreadNumOfExport int `env:"OPENCSG_DATAVIEWER_THREAD_NUM_OF_EXPORT, default=4"`
MaxFileSize int64 `env:"OPENCSG_DATAVIEWER_MAX_FILE_SIZE, default=104857600"` // 100 MB
MaxThreadNumOfExport int `env:"OPENCSG_DATAVIEWER_MAX_THREAD_NUM_OF_EXPORT, default=8"`
MaxConcurrentSessionExecutionSize int `env:"OPENCSG_DATAVIEWER_MAX_CONCURRENT_SESSION_EXECUTION_SIZE, default=1"`
SessionExecutionTimeout int `env:"OPENCSG_DATAVIEWER_SESSION_EXECUTION_TIMEOUT, default=240"` // 240 minutes
SessionExecutionTimeout int `env:"OPENCSG_DATAVIEWER_SESSION_EXECUTION_TIMEOUT, default=240"` // 240 mins
ConvertLimitSize int64 `env:"OPENCSG_DATAVIEWER_CONVERT_LIMIT_SIZE, default=5368709120"` // 5G
}

Proxy struct {
Expand Down
4 changes: 2 additions & 2 deletions component/model.go
Original file line number Diff line number Diff line change
Expand Up @@ -1073,7 +1073,7 @@ func (c *modelComponentImpl) SetRuntimeFrameworkModes(ctx context.Context, curre
if err != nil {
return nil, err
}
if relations == nil || len(relations) < 1 {
if len(relations) < 1 {
err = c.repoRuntimeFrameworkStore.Add(ctx, id, model.Repository.ID, deployType)
if err != nil {
failedModels = append(failedModels, model.Repository.Path)
Expand Down Expand Up @@ -1135,7 +1135,7 @@ func (c *modelComponentImpl) ListModelsOfRuntimeFrameworks(ctx context.Context,
return nil, 0, fmt.Errorf("failed to get repo by deploy type, error:%w", err)
}

if runtimeRepos == nil || len(runtimeRepos) < 1 {
if len(runtimeRepos) < 1 {
return nil, 0, nil
}

Expand Down
47 changes: 35 additions & 12 deletions dataviewer/common/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,19 +67,27 @@ type Split struct {
}

type RepoFilesReq struct {
Namespace string
RepoName string
RepoType types.RepositoryType
Ref string
Folder string
GSTree func(ctx context.Context, req gitserver.GetRepoInfoByPathReq) ([]*types.File, error)
Namespace string
RepoName string
RepoType types.RepositoryType
Ref string
Folder string
GSTree func(ctx context.Context, req gitserver.GetRepoInfoByPathReq) ([]*types.File, error)
TotalLimitSize int64
}

type RepoFile struct {
*types.File
DownloadSize int64
}

type RepoFilesClass struct {
AllFiles map[string]*types.File
ParquetFiles map[string]*types.File
JsonlFiles map[string]*types.File
CsvFiles map[string]*types.File
AllFiles map[string]*RepoFile
ParquetFiles map[string]*RepoFile
JsonlFiles map[string]*RepoFile
CsvFiles map[string]*RepoFile
TotalJsonSize int64
TotalCsvSize int64
}

type DownloadCard struct {
Expand Down Expand Up @@ -111,6 +119,7 @@ type FileObject struct {
ObjectKey string `yaml:"object_key" json:"object_key"`
LocalRepoPath string `yaml:"local_repo_path" json:"local_repo_path"`
LocalFileName string `yaml:"local_file_name" json:"local_file_name"`
DownloadSize int64 `yaml:"download_size" json:"download_size"`
}

type CataLogRespone struct {
Expand All @@ -126,8 +135,8 @@ type WorkflowUpdateParams struct {
}

type ScanRepoFileReq struct {
Req types.UpdateViewerReq
MaxFileSize int64
Req types.UpdateViewerReq
ConvertLimitSize int64
}

type DetermineCardReq struct {
Expand Down Expand Up @@ -172,3 +181,17 @@ type UpdateWorkflowStatusReq struct {
WorkflowErrMsg string
ShouldUpdateViewer bool
}

type FileExtName struct {
Parquet string
Jsonl string
Json string
Csv string
}

type SplitName struct {
Train string
Test string
Val string
Other string
}
18 changes: 9 additions & 9 deletions dataviewer/component/dataset_viewer.go
Original file line number Diff line number Diff line change
Expand Up @@ -602,11 +602,11 @@ func (c *datasetViewerComponentImpl) getParquetFilesBySplit(ctx context.Context,

var validator func(string) bool
switch split {
case workflows.TrainSplitName:
case workflows.SplitName.Train:
validator = workflows.IsTrainFile
case workflows.TestSplitName:
case workflows.SplitName.Test:
validator = workflows.IsTestFile
case workflows.ValSplitName:
case workflows.SplitName.Val:
validator = workflows.IsValidationFile
default:
return nil, fmt.Errorf("unknown split type: %s", split)
Expand Down Expand Up @@ -661,24 +661,24 @@ func (c *datasetViewerComponentImpl) genDefaultCatalog(ctx context.Context, req
if calcTotal {
total = c.getFilesRowCount(ctx, req, trainFiles)
}
configData.DataFiles = append(configData.DataFiles, dvCom.DataFiles{Split: workflows.TrainSplitName, Path: trainFiles})
datasetInfo.Splits = append(datasetInfo.Splits, dvCom.Split{Name: workflows.TrainSplitName, NumExamples: total})
configData.DataFiles = append(configData.DataFiles, dvCom.DataFiles{Split: workflows.SplitName.Train, Path: trainFiles})
datasetInfo.Splits = append(datasetInfo.Splits, dvCom.Split{Name: workflows.SplitName.Train, NumExamples: total})
}
if len(testFiles) > 0 {
total := 0
if calcTotal {
total = c.getFilesRowCount(ctx, req, testFiles)
}
configData.DataFiles = append(configData.DataFiles, dvCom.DataFiles{Split: workflows.TestSplitName, Path: testFiles})
datasetInfo.Splits = append(datasetInfo.Splits, dvCom.Split{Name: workflows.TestSplitName, NumExamples: total})
configData.DataFiles = append(configData.DataFiles, dvCom.DataFiles{Split: workflows.SplitName.Test, Path: testFiles})
datasetInfo.Splits = append(datasetInfo.Splits, dvCom.Split{Name: workflows.SplitName.Test, NumExamples: total})
}
if len(valFiles) > 0 {
total := 0
if calcTotal {
total = c.getFilesRowCount(ctx, req, valFiles)
}
configData.DataFiles = append(configData.DataFiles, dvCom.DataFiles{Split: workflows.ValSplitName, Path: valFiles})
datasetInfo.Splits = append(datasetInfo.Splits, dvCom.Split{Name: workflows.ValSplitName, NumExamples: total})
configData.DataFiles = append(configData.DataFiles, dvCom.DataFiles{Split: workflows.SplitName.Val, Path: valFiles})
datasetInfo.Splits = append(datasetInfo.Splits, dvCom.Split{Name: workflows.SplitName.Val, NumExamples: total})
}
configData.ConfigName = workflows.DefaultSubsetName
datasetInfo.ConfigName = workflows.DefaultSubsetName
Expand Down
Loading
Loading