|
165 | 165 | "outputs": [], |
166 | 166 | "source": [ |
167 | 167 | "# TO COMPLETE\n", |
| 168 | + "\n", |
168 | 169 | "data_schema = ml3_models.DataSchema(\n", |
169 | 170 | " columns=[\n", |
170 | 171 | " # METADATA - SAMPLE ID\n", |
171 | 172 | " ml3_models.ColumnInfo(\n", |
172 | 173 | " name='sample_id',\n", |
173 | | - " data_type='float',\n", |
174 | | - " role='id',\n", |
| 174 | + " data_type=ml3_enums.DataType.STRING,\n", |
| 175 | + " role=ml3_enums.ColumnRole.ID,\n", |
175 | 176 | " is_nullable=False\n", |
176 | 177 | " ),\n", |
177 | 178 | " # METADATA - TIMESTAMP\n", |
178 | 179 | " ml3_models.ColumnInfo(\n", |
179 | 180 | " name='timestamp',\n", |
180 | | - " data_type='string',\n", |
181 | | - " role='time_id',\n", |
| 181 | + " data_type=ml3_enums.DataType.FLOAT,\n", |
| 182 | + " role=ml3_enums.ColumnRole.TIME_ID,\n", |
182 | 183 | " is_nullable=False\n", |
183 | 184 | " ),\n", |
184 | 185 | " # FEATURE\n", |
185 | 186 | " ml3_models.ColumnInfo(\n", |
186 | 187 | " name='feature_0',\n", |
187 | | - " data_type='float',\n", |
188 | | - " role='input',\n", |
| 188 | + " data_type=ml3_enums.DataType.FLOAT,\n", |
| 189 | + " role=ml3_enums.ColumnRole.INPUT,\n", |
189 | 190 | " is_nullable=False\n", |
190 | 191 | " ),\n", |
191 | 192 | " # TARGET\n", |
192 | 193 | " ml3_models.ColumnInfo(\n", |
193 | 194 | " name='target',\n", |
194 | | - " data_type='float',\n", |
195 | | - " role='target',\n", |
| 195 | + " data_type=ml3_enums.DataType.FLOAT,\n", |
| 196 | + " role=ml3_enums.ColumnRole.TARGET,\n", |
196 | 197 | " is_nullable=False\n", |
197 | 198 | " )\n", |
198 | 199 | " ]\n", |
|
209 | 210 | "**Historical data**\n", |
210 | 211 | "\n", |
211 | 212 | "Ok, now that you inserted the data schema for your Task you are able to upload data.\n", |
212 | | - "The first category of data that we suggest you to send is the *historical* that will improve retraining report quality.\n", |
| 213 | + "There are two classes of data: *historical* and *production*.\n", |
| 214 | + "Historical data represents data you had before the model was in production while, production data are data that comes from the production environment.\n", |
| 215 | + "Model reference data are selected from historical one by specifying the time range.\n", |
213 | 216 | "\n", |
214 | 217 | "This is the first time you send data to ML cube Platform, therefore, we have some things to explain:\n", |
215 | 218 | "\n", |
|
229 | 232 | "outputs": [], |
230 | 233 | "source": [ |
231 | 234 | "# TO COMPLETE\n", |
232 | | - "inputs_data_soure = ml3_models.LocalDataSource(\n", |
| 235 | + "inputs_data_source = ml3_models.LocalDataSource(\n", |
233 | 236 | " data_structure=ml3_enums.DataStructure.TABULAR,\n", |
234 | 237 | " file_path=\"path/to/file.csv\",\n", |
235 | 238 | " file_type=ml3_enums.FileType.CSV,\n", |
|
248 | 251 | "logger.info(f'API - Add historical data')\n", |
249 | 252 | "job_id = ml3_client.add_historical_data(\n", |
250 | 253 | " task_id=task_id,\n", |
251 | | - " inputs=ml3_models.TabularData(source=inputs_data_soure),\n", |
| 254 | + " inputs=ml3_models.TabularData(source=inputs_data_source),\n", |
252 | 255 | " target=ml3_models.TabularData(source=target_data_source)\n", |
253 | 256 | ")\n", |
254 | 257 | "logger.info(f'Job created, id {job_id}')\n", |
|
299 | 302 | "**Model reference**\n", |
300 | 303 | "\n", |
301 | 304 | "In the previous cell you created the model but it is not complete because it misses the training dataset that in ML cube Platform is called *reference*.\n", |
302 | | - "Here you add the reference data of the model by sending its data like you did for the historical data." |
| 305 | + "Here you add the reference data of the model by specifying the time range, ML cube Platform automatically select from all the previously uploaded data the reference data." |
303 | 306 | ] |
304 | 307 | }, |
305 | 308 | { |
|
310 | 313 | "outputs": [], |
311 | 314 | "source": [ |
312 | 315 | "# TO COMPLETE\n", |
313 | | - "inputs_data_soure = ml3_models.LocalDataSource(\n", |
314 | | - " data_structure=ml3_enums.DataStructure.TABULAR,\n", |
315 | | - " file_path=\"path/to/file.csv\",\n", |
316 | | - " file_type=ml3_enums.FileType.CSV,\n", |
317 | | - " is_folder=False,\n", |
318 | | - " folder_type=None\n", |
319 | | - ")\n", |
320 | | - "target_data_source = ml3_models.GCSDataSource(\n", |
321 | | - " dataset_type=ml3_enums.DatasetType.TABULAR,\n", |
322 | | - " object_path=\"gs://path/to/file.csv\",\n", |
323 | | - " credentials_id='gcp_credentials_id',\n", |
324 | | - " file_type=ml3_enums.FileType.CSV,\n", |
325 | | - " is_folder=False,\n", |
326 | | - " folder_type=None\n", |
327 | | - ")\n", |
328 | | - "\n", |
329 | 316 | "logger.info(f'API - Add model reference')\n", |
330 | | - "job_id = ml3_client.add_model_reference(\n", |
| 317 | + "job_id = ml3_client.set_model_reference(\n", |
331 | 318 | " model_id=model_id,\n", |
332 | | - " inputs=ml3_models.TabularData(source=inputs_data_soure),\n", |
333 | | - " target=ml3_models.TabularData(source=target_data_source)\n", |
| 319 | + " from_timestamp=0.,\n", |
| 320 | + " to_timestamp=0.,\n", |
334 | 321 | ")\n", |
335 | 322 | "logger.info(f'Job created, id {job_id}')\n", |
336 | 323 | "\n", |
|
0 commit comments