-
Notifications
You must be signed in to change notification settings - Fork 7
Expand file tree
/
Copy pathvalohai.yaml
More file actions
159 lines (151 loc) · 4.28 KB
/
valohai.yaml
File metadata and controls
159 lines (151 loc) · 4.28 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
---
- step:
name: preprocess
image: arimbr/valohai-fasttext-example
command: python cli.py classification preprocess {parameters}
inputs:
- name: data
default: s3://valohai-fasttext-example/bbc/data.csv
parameters:
- name: output_data
description: name of the output file
type: string
default: preprocessed.txt
- name: text_column
description: name of the text column (pass multiple column names separated with a ,)
type: string
default: text
- name: label_column
description: name of label column
type: string
default: label
- name: engine
description: CSV parser engine to use (python, python-fwf, c)
type: string
default: python
- step:
name: split
image: arimbr/valohai-fasttext-example
command: python cli.py classification split {parameters}
inputs:
- name: data
parameters:
- name: output_train
description: name of the train output file
type: string
default: train.txt
- name: output_validation
description: name of the validation output file
type: string
default: validation.txt
- name: output_test
description: name of the test output file
type: string
default: test.txt
- name: train_ratio
description: ratio of the train data
type: float
default: 0.8
- name: validation_ratio
description: ratio of the validation data
type: float
default: 0.1
- name: test_ratio
description: ratio of the test data
type: float
default: 0.1
- name: shuffle
description: shuffle data
type: flag
default: true
- step:
name: autotune
image: arimbr/valohai-fasttext-example
command: python cli.py classification autotune {parameters}
inputs:
- name: train
- name: validation
parameters:
- name: metric
description: metric objective {f1, f1:labelname}
type: string
default: f1
- name: k
description: number of predictions used for evaluation
type: integer
default: 1
- name: duration
description: maximum duration in seconds
type: integer
default: 1200
- name: model_size
description: constraint model file size (empty = do not quantize)
type: string
default: 2000M
- step:
name: train
image: arimbr/valohai-fasttext-example
command: python cli.py classification train {parameters}
inputs:
- name: data
- name: parameters
- step:
name: test
image: arimbr/valohai-fasttext-example
command: python cli.py classification test {parameters}
inputs:
- name: test
- name: model
parameters:
- name: k
description: number of labels to predict
type: integer
default: 1
- step:
name: predict
image: arimbr/valohai-fasttext-example
command: python cli.py classification predict {parameters}
inputs:
- name: data
- name: model
parameters:
- name: k
description: number of labels to predict
type: integer
default: 1
- pipeline:
name: fasttext-train
nodes:
- name: split
type: execution
step: split
- name: preprocess
type: execution
step: preprocess
- name: autotune
type: execution
step: autotune
- name: train
type: execution
step: train
- name: test
type: execution
step: test
edges:
- [preprocess.output.preprocessed.txt, split.input.data]
- [preprocess.output.preprocessed.txt, train.input.data]
- [split.output.train.txt, autotune.input.train]
- [split.output.validation.txt, autotune.input.validation]
- [split.output.test.txt, test.input.test]
- [autotune.output.parameters.json, train.input.parameters]
- [autotune.output.train_model.bin, test.input.model]
- endpoint:
name: predict
description: Predict labels from text
image: arimbr/valohai-fasttext-example:deployment
port: 8000
server-command: uvicorn api:app --host 0.0.0.0 --port 8000
files:
- name: model
description: Model output file from training step.
path: model.bin