Skip to content

Commit 348c963

Browse files
bbozkayarnyakedknvoliverholworthy
authored
adding unit test for end-to-end example (#669)
* adding unit test for multi-gpu example * added test for notebook 03 * fixed formatting * update * update * Update 01-ETL-with-NVTabular.ipynb day of week is between 0 and 6; it must be scaled with a max value of 6 to produce correct values from the 0-1 range. If we do col+1 and scale with 7, then a section of the 0-2pi range (for Sine purposes) will not be represented. * Update 01-ETL-with-NVTabular.ipynb Reversed the previous edit for weekday scaling. It is correct that it should be scaled between 0-7, because day 0 (unused/nonapplicable after +1 added) overlaps with day 7 for Sine purposes. Monday should scale to 1/7, Sunday should scale to 7/7 to achieve even distribution of days along the sinus curve. * reduce num_rows * Update test_end_to_end_session_based.py * Update 01-ETL-with-NVTabular.ipynb * updated test script and notebook * updated file * removed nb3 test due to multi-gpu freezing issue * revised notebooks, added back nb3 test * fixed test file with black * update test py * update test py * Use `python -m torch.distributed.run` instead of `torchrun` The `torchrun` script installed in the system is a python script with a shebang line starting with `#!/usr/bin/python3` This picks up the wrong version of python when running in a virtualenv like our tox test environment. If instead this were `#!/usr/bin/env python3` it would work ok in a tox environment to call `torchrun`. However, until either the pytorch package is updated for this to happen or we update our CI image for this to take place. Running the python command directly is more reliable. --------- Co-authored-by: rnyak <[email protected]> Co-authored-by: edknv <[email protected]> Co-authored-by: rnyak <[email protected]> Co-authored-by: Oliver Holworthy <[email protected]>
1 parent f3c4d2a commit 348c963

File tree

4 files changed

+631
-329
lines changed

4 files changed

+631
-329
lines changed

examples/end-to-end-session-based/01-ETL-with-NVTabular.ipynb

Lines changed: 123 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,8 @@
6161
"\n",
6262
"The dataset is available on [Kaggle](https://www.kaggle.com/chadgostopp/recsys-challenge-2015). You need to download it and copy to the `DATA_FOLDER` path. Note that we are only using the `yoochoose-clicks.dat` file.\n",
6363
"\n",
64+
"Alternatively, you can generate a synthetic dataset with the same columns and dtypes as the `YOOCHOOSE` dataset and a default date range of 5 days. If the environment variable `USE_SYNTHETIC` is set to `True`, the code below will execute the function `generate_synthetic_data` and the rest of the notebook will run on a synthetic dataset.\n",
65+
"\n",
6466
"First, let's start by importing several libraries:"
6567
]
6668
},
@@ -75,17 +77,18 @@
7577
"output_type": "stream",
7678
"text": [
7779
"/usr/local/lib/python3.8/dist-packages/merlin/dtypes/mappings/tf.py:52: UserWarning: Tensorflow dtype mappings did not load successfully due to an error: No module named 'tensorflow'\n",
78-
" warn(f\"Tensorflow dtype mappings did not load successfully due to an error: {exc.msg}\")\n",
79-
"/usr/local/lib/python3.8/dist-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
80-
" from .autonotebook import tqdm as notebook_tqdm\n"
80+
" warn(f\"Tensorflow dtype mappings did not load successfully due to an error: {exc.msg}\")\n"
8181
]
8282
}
8383
],
8484
"source": [
8585
"import os\n",
8686
"import glob\n",
8787
"import numpy as np\n",
88+
"import pandas as pd\n",
8889
"import gc\n",
90+
"import calendar\n",
91+
"import datetime\n",
8992
"\n",
9093
"import cudf\n",
9194
"import cupy\n",
@@ -128,12 +131,14 @@
128131
"metadata": {},
129132
"outputs": [],
130133
"source": [
131-
"DATA_FOLDER = \"/workspace/data/\"\n",
134+
"DATA_FOLDER = os.environ.get(\"DATA_FOLDER\", \"/workspace/data\")\n",
132135
"FILENAME_PATTERN = 'yoochoose-clicks.dat'\n",
133136
"DATA_PATH = os.path.join(DATA_FOLDER, FILENAME_PATTERN)\n",
134137
"\n",
135138
"OUTPUT_FOLDER = \"./yoochoose_transformed\"\n",
136-
"OVERWRITE = False"
139+
"OVERWRITE = False\n",
140+
"\n",
141+
"USE_SYNTHETIC = os.environ.get(\"USE_SYNTHETIC\", False)"
137142
]
138143
},
139144
{
@@ -144,16 +149,89 @@
144149
"## Load and clean raw data"
145150
]
146151
},
152+
{
153+
"cell_type": "markdown",
154+
"id": "3fba8546-668c-4743-960e-ea2aef99ef24",
155+
"metadata": {},
156+
"source": [
157+
"Execute the cell below if you would like to work with synthetic data. Otherwise you can skip and continue with the next cell."
158+
]
159+
},
147160
{
148161
"cell_type": "code",
149162
"execution_count": 5,
163+
"id": "07d14289-c783-45f0-86e8-e5c1001bfd76",
164+
"metadata": {},
165+
"outputs": [],
166+
"source": [
167+
"def generate_synthetic_data(\n",
168+
" start_date: datetime.date, end_date: datetime.date, rows_per_day: int = 10000\n",
169+
") -> pd.DataFrame:\n",
170+
" assert end_date > start_date, \"end_date must be later than start_date\"\n",
171+
"\n",
172+
" number_of_days = (end_date - start_date).days\n",
173+
" total_number_of_rows = number_of_days * rows_per_day\n",
174+
"\n",
175+
" # Generate a long-tail distribution of item interactions. This simulates that some items are\n",
176+
" # more popular than others.\n",
177+
" long_tailed_item_distribution = np.clip(\n",
178+
" np.random.lognormal(3.0, 1.0, total_number_of_rows).astype(np.int64), 1, 50000\n",
179+
" )\n",
180+
"\n",
181+
" # generate random item interaction features\n",
182+
" df = pd.DataFrame(\n",
183+
" {\n",
184+
" \"session_id\": np.random.randint(70000, 80000, total_number_of_rows),\n",
185+
" \"item_id\": long_tailed_item_distribution,\n",
186+
" },\n",
187+
" )\n",
188+
"\n",
189+
" # generate category mapping for each item-id\n",
190+
" df[\"category\"] = pd.cut(df[\"item_id\"], bins=334, labels=np.arange(1, 335)).astype(\n",
191+
" np.int64\n",
192+
" )\n",
193+
"\n",
194+
" max_session_length = 60 * 60 # 1 hour\n",
195+
"\n",
196+
" def add_timestamp_to_session(session: pd.DataFrame):\n",
197+
" random_start_date_and_time = calendar.timegm(\n",
198+
" (\n",
199+
" start_date\n",
200+
" # Add day offset from start_date\n",
201+
" + datetime.timedelta(days=np.random.randint(0, number_of_days))\n",
202+
" # Add time offset within the random day\n",
203+
" + datetime.timedelta(seconds=np.random.randint(0, 86_400))\n",
204+
" ).timetuple()\n",
205+
" )\n",
206+
" session[\"timestamp\"] = random_start_date_and_time + np.clip(\n",
207+
" np.random.lognormal(3.0, 1.0, len(session)).astype(np.int64),\n",
208+
" 0,\n",
209+
" max_session_length,\n",
210+
" )\n",
211+
" return session\n",
212+
"\n",
213+
" df = df.groupby(\"session_id\").apply(add_timestamp_to_session).reset_index()\n",
214+
"\n",
215+
" return df"
216+
]
217+
},
218+
{
219+
"cell_type": "code",
220+
"execution_count": 6,
150221
"id": "f35dff52",
151222
"metadata": {},
152223
"outputs": [],
153224
"source": [
154-
"interactions_df = cudf.read_csv(DATA_PATH, sep=',', \n",
155-
" names=['session_id','timestamp', 'item_id', 'category'], \n",
156-
" dtype=['int', 'datetime64[s]', 'int', 'int'])"
225+
"if USE_SYNTHETIC:\n",
226+
" START_DATE = os.environ.get(\"START_DATE\", \"2014/4/1\")\n",
227+
" END_DATE = os.environ.get(\"END_DATE\", \"2014/4/5\")\n",
228+
" interactions_df = generate_synthetic_data(datetime.datetime.strptime(START_DATE, '%Y/%m/%d'),\n",
229+
" datetime.datetime.strptime(END_DATE, '%Y/%m/%d'))\n",
230+
" interactions_df = cudf.from_pandas(interactions_df)\n",
231+
"else:\n",
232+
" interactions_df = cudf.read_csv(DATA_PATH, sep=',', \n",
233+
" names=['session_id','timestamp', 'item_id', 'category'], \n",
234+
" dtype=['int', 'datetime64[s]', 'int', 'int'])"
157235
]
158236
},
159237
{
@@ -166,7 +244,7 @@
166244
},
167245
{
168246
"cell_type": "code",
169-
"execution_count": 6,
247+
"execution_count": 7,
170248
"id": "22c2df72",
171249
"metadata": {},
172250
"outputs": [
@@ -181,13 +259,16 @@
181259
],
182260
"source": [
183261
"print(\"Count with in-session repeated interactions: {}\".format(len(interactions_df)))\n",
262+
"\n",
184263
"# Sorts the dataframe by session and timestamp, to remove consecutive repetitions\n",
185264
"interactions_df.timestamp = interactions_df.timestamp.astype(int)\n",
186265
"interactions_df = interactions_df.sort_values(['session_id', 'timestamp'])\n",
187266
"past_ids = interactions_df['item_id'].shift(1).fillna()\n",
188267
"session_past_ids = interactions_df['session_id'].shift(1).fillna()\n",
268+
"\n",
189269
"# Keeping only no consecutive repeated in session interactions\n",
190270
"interactions_df = interactions_df[~((interactions_df['session_id'] == session_past_ids) & (interactions_df['item_id'] == past_ids))]\n",
271+
"\n",
191272
"print(\"Count after removed in-session repeated interactions: {}\".format(len(interactions_df)))"
192273
]
193274
},
@@ -201,7 +282,7 @@
201282
},
202283
{
203284
"cell_type": "code",
204-
"execution_count": 7,
285+
"execution_count": 8,
205286
"id": "66a1bd13",
206287
"metadata": {},
207288
"outputs": [
@@ -234,17 +315,19 @@
234315
},
235316
{
236317
"cell_type": "code",
237-
"execution_count": 8,
318+
"execution_count": 9,
238319
"id": "a0f908a1",
239320
"metadata": {},
240321
"outputs": [],
241322
"source": [
323+
"if os.path.isdir(DATA_FOLDER) == False:\n",
324+
" os.mkdir(DATA_FOLDER)\n",
242325
"interactions_merged_df.to_parquet(os.path.join(DATA_FOLDER, 'interactions_merged_df.parquet'))"
243326
]
244327
},
245328
{
246329
"cell_type": "code",
247-
"execution_count": 9,
330+
"execution_count": 10,
248331
"id": "909f87c5-bff5-48c8-b714-cc556a4bc64d",
249332
"metadata": {
250333
"tags": []
@@ -265,17 +348,17 @@
265348
},
266349
{
267350
"cell_type": "code",
268-
"execution_count": 10,
351+
"execution_count": 11,
269352
"id": "04a3b5b7",
270353
"metadata": {},
271354
"outputs": [
272355
{
273356
"data": {
274357
"text/plain": [
275-
"0"
358+
"517"
276359
]
277360
},
278-
"execution_count": 10,
361+
"execution_count": 11,
279362
"metadata": {},
280363
"output_type": "execute_result"
281364
}
@@ -330,7 +413,7 @@
330413
},
331414
{
332415
"cell_type": "code",
333-
"execution_count": 11,
416+
"execution_count": 13,
334417
"id": "86f80068",
335418
"metadata": {},
336419
"outputs": [],
@@ -425,7 +508,7 @@
425508
},
426509
{
427510
"cell_type": "code",
428-
"execution_count": 12,
511+
"execution_count": 14,
429512
"id": "10b5c96c",
430513
"metadata": {},
431514
"outputs": [],
@@ -447,7 +530,6 @@
447530
"# Truncate sequence features to first interacted 20 items \n",
448531
"SESSIONS_MAX_LENGTH = 20 \n",
449532
"\n",
450-
"\n",
451533
"item_feat = groupby_features['item_id-list'] >> nvt.ops.TagAsItemID()\n",
452534
"cont_feats = groupby_features['et_dayofweek_sin-list', 'product_recency_days_log_norm-list'] >> nvt.ops.AddMetadata(tags=[Tags.CONTINUOUS])\n",
453535
"\n",
@@ -491,7 +573,7 @@
491573
},
492574
{
493575
"cell_type": "code",
494-
"execution_count": 13,
576+
"execution_count": 15,
495577
"id": "45803886",
496578
"metadata": {},
497579
"outputs": [],
@@ -513,7 +595,7 @@
513595
},
514596
{
515597
"cell_type": "code",
516-
"execution_count": 14,
598+
"execution_count": 16,
517599
"id": "4c10efb5-89c5-4458-a634-475eb459a47c",
518600
"metadata": {
519601
"tags": []
@@ -600,7 +682,7 @@
600682
" <tr>\n",
601683
" <th>2</th>\n",
602684
" <td>item_id-list</td>\n",
603-
" <td>(Tags.CATEGORICAL, Tags.ITEM, Tags.ID, Tags.LIST)</td>\n",
685+
" <td>(Tags.CATEGORICAL, Tags.ID, Tags.LIST, Tags.ITEM)</td>\n",
604686
" <td>DType(name='int64', element_type=&lt;ElementType....</td>\n",
605687
" <td>True</td>\n",
606688
" <td>True</td>\n",
@@ -697,10 +779,10 @@
697779
"</div>"
698780
],
699781
"text/plain": [
700-
"[{'name': 'session_id', 'tags': {<Tags.CATEGORICAL: 'categorical'>}, 'properties': {}, 'dtype': DType(name='int64', element_type=<ElementType.Int: 'int'>, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None),))), 'is_list': False, 'is_ragged': False}, {'name': 'item_id-count', 'tags': {<Tags.CATEGORICAL: 'categorical'>}, 'properties': {'num_buckets': None, 'freq_threshold': 0, 'max_size': 0, 'cat_path': './/categories/unique.item_id.parquet', 'domain': {'min': 0, 'max': 52741, 'name': 'item_id'}, 'embedding_sizes': {'cardinality': 52742, 'dimension': 512}}, 'dtype': DType(name='int32', element_type=<ElementType.Int: 'int'>, element_size=32, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None),))), 'is_list': False, 'is_ragged': False}, {'name': 'item_id-list', 'tags': {<Tags.CATEGORICAL: 'categorical'>, <Tags.ITEM: 'item'>, <Tags.ID: 'id'>, <Tags.LIST: 'list'>}, 'properties': {'num_buckets': None, 'freq_threshold': 0, 'max_size': 0, 'cat_path': './/categories/unique.item_id.parquet', 'domain': {'min': 0, 'max': 52741, 'name': 'item_id'}, 'embedding_sizes': {'cardinality': 52742, 'dimension': 512}, 'value_count': {'min': 0, 'max': 20}}, 'dtype': DType(name='int64', element_type=<ElementType.Int: 'int'>, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None), Dimension(min=0, max=20)))), 'is_list': True, 'is_ragged': True}, {'name': 'et_dayofweek_sin-list', 'tags': {<Tags.CONTINUOUS: 'continuous'>, <Tags.LIST: 'list'>}, 'properties': {'value_count': {'min': 0, 'max': 20}}, 'dtype': DType(name='float64', element_type=<ElementType.Float: 'float'>, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None), Dimension(min=0, max=20)))), 'is_list': True, 'is_ragged': True}, {'name': 'product_recency_days_log_norm-list', 'tags': {<Tags.CONTINUOUS: 'continuous'>, <Tags.LIST: 'list'>}, 'properties': {'value_count': {'min': 0, 'max': 20}}, 'dtype': DType(name='float32', element_type=<ElementType.Float: 'float'>, element_size=32, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None), Dimension(min=0, max=20)))), 'is_list': True, 'is_ragged': True}, {'name': 'category-list', 'tags': {<Tags.CATEGORICAL: 'categorical'>, <Tags.LIST: 'list'>}, 'properties': {'num_buckets': None, 'freq_threshold': 0, 'max_size': 0, 'cat_path': './/categories/unique.category.parquet', 'domain': {'min': 0, 'max': 336, 'name': 'category'}, 'embedding_sizes': {'cardinality': 337, 'dimension': 42}, 'value_count': {'min': 0, 'max': 20}}, 'dtype': DType(name='int64', element_type=<ElementType.Int: 'int'>, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None), Dimension(min=0, max=20)))), 'is_list': True, 'is_ragged': True}, {'name': 'day_index', 'tags': {<Tags.CATEGORICAL: 'categorical'>}, 'properties': {}, 'dtype': DType(name='int64', element_type=<ElementType.Int: 'int'>, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None),))), 'is_list': False, 'is_ragged': False}]"
782+
"[{'name': 'session_id', 'tags': {<Tags.CATEGORICAL: 'categorical'>}, 'properties': {}, 'dtype': DType(name='int64', element_type=<ElementType.Int: 'int'>, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None),))), 'is_list': False, 'is_ragged': False}, {'name': 'item_id-count', 'tags': {<Tags.CATEGORICAL: 'categorical'>}, 'properties': {'num_buckets': None, 'freq_threshold': 0, 'max_size': 0, 'cat_path': './/categories/unique.item_id.parquet', 'domain': {'min': 0, 'max': 52741, 'name': 'item_id'}, 'embedding_sizes': {'cardinality': 52742, 'dimension': 512}}, 'dtype': DType(name='int32', element_type=<ElementType.Int: 'int'>, element_size=32, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None),))), 'is_list': False, 'is_ragged': False}, {'name': 'item_id-list', 'tags': {<Tags.CATEGORICAL: 'categorical'>, <Tags.ID: 'id'>, <Tags.LIST: 'list'>, <Tags.ITEM: 'item'>}, 'properties': {'num_buckets': None, 'freq_threshold': 0, 'max_size': 0, 'cat_path': './/categories/unique.item_id.parquet', 'domain': {'min': 0, 'max': 52741, 'name': 'item_id'}, 'embedding_sizes': {'cardinality': 52742, 'dimension': 512}, 'value_count': {'min': 0, 'max': 20}}, 'dtype': DType(name='int64', element_type=<ElementType.Int: 'int'>, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None), Dimension(min=0, max=20)))), 'is_list': True, 'is_ragged': True}, {'name': 'et_dayofweek_sin-list', 'tags': {<Tags.CONTINUOUS: 'continuous'>, <Tags.LIST: 'list'>}, 'properties': {'value_count': {'min': 0, 'max': 20}}, 'dtype': DType(name='float64', element_type=<ElementType.Float: 'float'>, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None), Dimension(min=0, max=20)))), 'is_list': True, 'is_ragged': True}, {'name': 'product_recency_days_log_norm-list', 'tags': {<Tags.CONTINUOUS: 'continuous'>, <Tags.LIST: 'list'>}, 'properties': {'value_count': {'min': 0, 'max': 20}}, 'dtype': DType(name='float32', element_type=<ElementType.Float: 'float'>, element_size=32, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None), Dimension(min=0, max=20)))), 'is_list': True, 'is_ragged': True}, {'name': 'category-list', 'tags': {<Tags.CATEGORICAL: 'categorical'>, <Tags.LIST: 'list'>}, 'properties': {'num_buckets': None, 'freq_threshold': 0, 'max_size': 0, 'cat_path': './/categories/unique.category.parquet', 'domain': {'min': 0, 'max': 336, 'name': 'category'}, 'embedding_sizes': {'cardinality': 337, 'dimension': 42}, 'value_count': {'min': 0, 'max': 20}}, 'dtype': DType(name='int64', element_type=<ElementType.Int: 'int'>, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None), Dimension(min=0, max=20)))), 'is_list': True, 'is_ragged': True}, {'name': 'day_index', 'tags': {<Tags.CATEGORICAL: 'categorical'>}, 'properties': {}, 'dtype': DType(name='int64', element_type=<ElementType.Int: 'int'>, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None),))), 'is_list': False, 'is_ragged': False}]"
701783
]
702784
},
703-
"execution_count": 14,
785+
"execution_count": 16,
704786
"metadata": {},
705787
"output_type": "execute_result"
706788
}
@@ -719,7 +801,7 @@
719801
},
720802
{
721803
"cell_type": "code",
722-
"execution_count": 15,
804+
"execution_count": 17,
723805
"id": "2d035a88-2146-4b9a-96fd-dd42be86e2a1",
724806
"metadata": {},
725807
"outputs": [],
@@ -747,19 +829,23 @@
747829
},
748830
{
749831
"cell_type": "code",
750-
"execution_count": 16,
832+
"execution_count": 18,
751833
"id": "2b4f5b73-459c-4356-87c8-9afb974cc77d",
752834
"metadata": {},
753835
"outputs": [],
754836
"source": [
755837
"# read in the processed train dataset\n",
756838
"sessions_gdf = cudf.read_parquet(os.path.join(DATA_FOLDER, \"processed_nvt/part_0.parquet\"))\n",
757-
"sessions_gdf = sessions_gdf[sessions_gdf.day_index>=178]"
839+
"if USE_SYNTHETIC:\n",
840+
" THRESHOLD_DAY_INDEX = int(os.environ.get(\"THRESHOLD_DAY_INDEX\", '1'))\n",
841+
" sessions_gdf = sessions_gdf[sessions_gdf.day_index>=THRESHOLD_DAY_INDEX]\n",
842+
"else:\n",
843+
" sessions_gdf = sessions_gdf[sessions_gdf.day_index>=178]"
758844
]
759845
},
760846
{
761847
"cell_type": "code",
762-
"execution_count": 17,
848+
"execution_count": 19,
763849
"id": "e18d9c63",
764850
"metadata": {},
765851
"outputs": [
@@ -783,13 +869,13 @@
783869
"6606149 [-0.7818309228245777, -0.7818309228245777] \n",
784870
"\n",
785871
" product_recency_days_log_norm-list \\\n",
786-
"6606147 [1.5241553, 1.5238751, 1.5239341, 1.5241631, 1... \n",
787-
"6606148 [-0.5330064, 1.521494] \n",
788-
"6606149 [1.5338266, 1.5355074] \n",
872+
"6606147 [1.5241561, 1.523876, 1.523935, 1.5241641, 1.5... \n",
873+
"6606148 [-0.533007, 1.521495] \n",
874+
"6606149 [1.5338274, 1.5355083] \n",
789875
"\n",
790876
" category-list day_index \n",
791-
"6606147 [4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 4, 4] 178 \n",
792-
"6606148 [3, 3] 178 \n",
877+
"6606147 [4, 4, 4, 4, 4, 4, 4, 4, 4, 1, 4, 4] 178 \n",
878+
"6606148 [1, 3] 178 \n",
793879
"6606149 [8, 8] 180 \n"
794880
]
795881
}
@@ -800,15 +886,15 @@
800886
},
801887
{
802888
"cell_type": "code",
803-
"execution_count": 18,
889+
"execution_count": 20,
804890
"id": "5175aeaf",
805891
"metadata": {},
806892
"outputs": [
807893
{
808894
"name": "stderr",
809895
"output_type": "stream",
810896
"text": [
811-
"Creating time-based splits: 100%|██████████| 5/5 [00:02<00:00, 2.37it/s]\n"
897+
"Creating time-based splits: 100%|██████████| 5/5 [00:02<00:00, 2.24it/s]\n"
812898
]
813899
}
814900
],
@@ -823,17 +909,17 @@
823909
},
824910
{
825911
"cell_type": "code",
826-
"execution_count": 19,
912+
"execution_count": 21,
827913
"id": "3bd1bad9",
828914
"metadata": {},
829915
"outputs": [
830916
{
831917
"data": {
832918
"text/plain": [
833-
"583"
919+
"748"
834920
]
835921
},
836-
"execution_count": 19,
922+
"execution_count": 21,
837923
"metadata": {},
838924
"output_type": "execute_result"
839925
}

0 commit comments

Comments
 (0)