Set model reference

alelavml3 · alelavml3 · commit 9cdc10a3c6ae · 2024-03-21T14:10:51.000+01:00
diff --git a/notebooks/1_task_and_model.ipynb b/notebooks/1_task_and_model.ipynb
@@ -165,34 +165,35 @@
    "outputs": [],
    "source": [
     "# TO COMPLETE\n",
+    "\n",
     "data_schema = ml3_models.DataSchema(\n",
     "    columns=[\n",
     "        # METADATA - SAMPLE ID\n",
     "        ml3_models.ColumnInfo(\n",
     "            name='sample_id',\n",
-    "            data_type='float',\n",
-    "            role='id',\n",
+    "            data_type=ml3_enums.DataType.STRING,\n",
+    "            role=ml3_enums.ColumnRole.ID,\n",
     "            is_nullable=False\n",
     "        ),\n",
     "        # METADATA - TIMESTAMP\n",
     "        ml3_models.ColumnInfo(\n",
     "            name='timestamp',\n",
-    "            data_type='string',\n",
-    "            role='time_id',\n",
+    "            data_type=ml3_enums.DataType.FLOAT,\n",
+    "            role=ml3_enums.ColumnRole.TIME_ID,\n",
     "            is_nullable=False\n",
     "        ),\n",
     "        # FEATURE\n",
     "        ml3_models.ColumnInfo(\n",
     "            name='feature_0',\n",
-    "            data_type='float',\n",
-    "            role='input',\n",
+    "            data_type=ml3_enums.DataType.FLOAT,\n",
+    "            role=ml3_enums.ColumnRole.INPUT,\n",
     "            is_nullable=False\n",
     "        ),\n",
     "        # TARGET\n",
     "        ml3_models.ColumnInfo(\n",
     "            name='target',\n",
-    "            data_type='float',\n",
-    "            role='target',\n",
+    "            data_type=ml3_enums.DataType.FLOAT,\n",
+    "            role=ml3_enums.ColumnRole.TARGET,\n",
     "            is_nullable=False\n",
     "        )\n",
     "    ]\n",
@@ -209,7 +210,9 @@
     "**Historical data**\n",
     "\n",
     "Ok, now that you inserted the data schema for your Task you are able to upload data.\n",
-    "The first category of data that we suggest you to send is the *historical* that will improve retraining report quality.\n",
+    "There are two classes of data: *historical* and *production*.\n",
+    "Historical data represents data you had before the model was in production while, production data are data that comes from the production environment.\n",
+    "Model reference data are selected from historical one by specifying the time range.\n",
     "\n",
     "This is the first time you send data to ML cube Platform, therefore, we have some things to explain:\n",
     "\n",
@@ -229,7 +232,7 @@
    "outputs": [],
    "source": [
     "# TO COMPLETE\n",
-    "inputs_data_soure = ml3_models.LocalDataSource(\n",
+    "inputs_data_source = ml3_models.LocalDataSource(\n",
     "    data_structure=ml3_enums.DataStructure.TABULAR,\n",
     "    file_path=\"path/to/file.csv\",\n",
     "    file_type=ml3_enums.FileType.CSV,\n",
@@ -248,7 +251,7 @@
     "logger.info(f'API - Add historical data')\n",
     "job_id = ml3_client.add_historical_data(\n",
     "    task_id=task_id,\n",
-    "    inputs=ml3_models.TabularData(source=inputs_data_soure),\n",
+    "    inputs=ml3_models.TabularData(source=inputs_data_source),\n",
     "    target=ml3_models.TabularData(source=target_data_source)\n",
     ")\n",
     "logger.info(f'Job created, id {job_id}')\n",
@@ -299,7 +302,7 @@
     "**Model reference**\n",
     "\n",
     "In the previous cell you created the model but it is not complete because it misses the training dataset that in ML cube Platform is called *reference*.\n",
-    "Here you add the reference data of the model by sending its data like you did for the historical data."
+    "Here you add the reference data of the model by specifying the time range, ML cube Platform automatically select from all the previously uploaded data the reference data."
    ]
   },
   {
@@ -310,27 +313,11 @@
    "outputs": [],
    "source": [
     "# TO COMPLETE\n",
-    "inputs_data_soure = ml3_models.LocalDataSource(\n",
-    "    data_structure=ml3_enums.DataStructure.TABULAR,\n",
-    "    file_path=\"path/to/file.csv\",\n",
-    "    file_type=ml3_enums.FileType.CSV,\n",
-    "    is_folder=False,\n",
-    "    folder_type=None\n",
-    ")\n",
-    "target_data_source = ml3_models.GCSDataSource(\n",
-    "    dataset_type=ml3_enums.DatasetType.TABULAR,\n",
-    "    object_path=\"gs://path/to/file.csv\",\n",
-    "    credentials_id='gcp_credentials_id',\n",
-    "    file_type=ml3_enums.FileType.CSV,\n",
-    "    is_folder=False,\n",
-    "    folder_type=None\n",
-    ")\n",
-    "\n",
     "logger.info(f'API - Add model reference')\n",
-    "job_id = ml3_client.add_model_reference(\n",
+    "job_id = ml3_client.set_model_reference(\n",
     "    model_id=model_id,\n",
-    "    inputs=ml3_models.TabularData(source=inputs_data_soure),\n",
-    "    target=ml3_models.TabularData(source=target_data_source)\n",
+    "    from_timestamp=0.,\n",
+    "    to_timestamp=0.,\n",
     ")\n",
     "logger.info(f'Job created, id {job_id}')\n",
     "\n",