Skip to content

Commit fd00d55

Browse files
elephaintjmoralez
andauthored
fix(pandas): use arrays for values and indices in time_features (#143)
Co-authored-by: José Morales <jmoralz92@gmail.com>
1 parent ebdba72 commit fd00d55

File tree

4 files changed

+45
-27
lines changed

4 files changed

+45
-27
lines changed

nbs/feature_engineering.ipynb

Lines changed: 41 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -849,7 +849,7 @@
849849
" if isinstance(times, pd.DatetimeIndex):\n",
850850
" if feature in (\"week\", \"weekofyear\"):\n",
851851
" times = times.isocalendar()\n",
852-
" feat_vals = getattr(times, feature)\n",
852+
" feat_vals = getattr(times, feature).to_numpy()\n",
853853
" else:\n",
854854
" feat_vals = getattr(times.dt, feature)()\n",
855855
" return feat_name, feat_vals\n",
@@ -864,7 +864,7 @@
864864
" if isinstance(df, pd.DataFrame):\n",
865865
" times = pd.Index(unique_times)\n",
866866
" time2pos = {time: i for i, time in enumerate(times)}\n",
867-
" restore_idxs = df[time_col].map(time2pos)\n",
867+
" restore_idxs = df[time_col].map(time2pos).to_numpy()\n",
868868
" for feature in features:\n",
869869
" name, vals = _compute_time_feature(times, feature)\n",
870870
" df[name] = vals[restore_idxs]\n",
@@ -971,6 +971,7 @@
971971
" <th>y</th>\n",
972972
" <th>month</th>\n",
973973
" <th>day</th>\n",
974+
" <th>week</th>\n",
974975
" </tr>\n",
975976
" </thead>\n",
976977
" <tbody>\n",
@@ -981,6 +982,7 @@
981982
" <td>0.428973</td>\n",
982983
" <td>10</td>\n",
983984
" <td>5</td>\n",
985+
" <td>40</td>\n",
984986
" </tr>\n",
985987
" <tr>\n",
986988
" <th>1</th>\n",
@@ -989,6 +991,7 @@
989991
" <td>1.423626</td>\n",
990992
" <td>10</td>\n",
991993
" <td>6</td>\n",
994+
" <td>40</td>\n",
992995
" </tr>\n",
993996
" <tr>\n",
994997
" <th>2</th>\n",
@@ -997,6 +1000,7 @@
9971000
" <td>2.311782</td>\n",
9981001
" <td>10</td>\n",
9991002
" <td>7</td>\n",
1003+
" <td>40</td>\n",
10001004
" </tr>\n",
10011005
" <tr>\n",
10021006
" <th>3</th>\n",
@@ -1005,6 +1009,7 @@
10051009
" <td>3.192191</td>\n",
10061010
" <td>10</td>\n",
10071011
" <td>8</td>\n",
1012+
" <td>40</td>\n",
10081013
" </tr>\n",
10091014
" <tr>\n",
10101015
" <th>4</th>\n",
@@ -1013,6 +1018,7 @@
10131018
" <td>4.148767</td>\n",
10141019
" <td>10</td>\n",
10151020
" <td>9</td>\n",
1021+
" <td>41</td>\n",
10161022
" </tr>\n",
10171023
" <tr>\n",
10181024
" <th>...</th>\n",
@@ -1021,6 +1027,7 @@
10211027
" <td>...</td>\n",
10221028
" <td>...</td>\n",
10231029
" <td>...</td>\n",
1030+
" <td>...</td>\n",
10241031
" </tr>\n",
10251032
" <tr>\n",
10261033
" <th>1096</th>\n",
@@ -1029,6 +1036,7 @@
10291036
" <td>4.058910</td>\n",
10301037
" <td>5</td>\n",
10311038
" <td>10</td>\n",
1039+
" <td>19</td>\n",
10321040
" </tr>\n",
10331041
" <tr>\n",
10341042
" <th>1097</th>\n",
@@ -1037,6 +1045,7 @@
10371045
" <td>5.178157</td>\n",
10381046
" <td>5</td>\n",
10391047
" <td>11</td>\n",
1048+
" <td>19</td>\n",
10401049
" </tr>\n",
10411050
" <tr>\n",
10421051
" <th>1098</th>\n",
@@ -1045,6 +1054,7 @@
10451054
" <td>6.133142</td>\n",
10461055
" <td>5</td>\n",
10471056
" <td>12</td>\n",
1057+
" <td>19</td>\n",
10481058
" </tr>\n",
10491059
" <tr>\n",
10501060
" <th>1099</th>\n",
@@ -1053,6 +1063,7 @@
10531063
" <td>0.403709</td>\n",
10541064
" <td>5</td>\n",
10551065
" <td>13</td>\n",
1066+
" <td>19</td>\n",
10561067
" </tr>\n",
10571068
" <tr>\n",
10581069
" <th>1100</th>\n",
@@ -1061,27 +1072,28 @@
10611072
" <td>1.081779</td>\n",
10621073
" <td>5</td>\n",
10631074
" <td>14</td>\n",
1075+
" <td>20</td>\n",
10641076
" </tr>\n",
10651077
" </tbody>\n",
10661078
"</table>\n",
1067-
"<p>1101 rows × 5 columns</p>\n",
1079+
"<p>1101 rows × 6 columns</p>\n",
10681080
"</div>"
10691081
],
10701082
"text/plain": [
1071-
" unique_id ds y month day\n",
1072-
"0 0 2000-10-05 0.428973 10 5\n",
1073-
"1 0 2000-10-06 1.423626 10 6\n",
1074-
"2 0 2000-10-07 2.311782 10 7\n",
1075-
"3 0 2000-10-08 3.192191 10 8\n",
1076-
"4 0 2000-10-09 4.148767 10 9\n",
1077-
"... ... ... ... ... ...\n",
1078-
"1096 4 2001-05-10 4.058910 5 10\n",
1079-
"1097 4 2001-05-11 5.178157 5 11\n",
1080-
"1098 4 2001-05-12 6.133142 5 12\n",
1081-
"1099 4 2001-05-13 0.403709 5 13\n",
1082-
"1100 4 2001-05-14 1.081779 5 14\n",
1083+
" unique_id ds y month day week\n",
1084+
"0 0 2000-10-05 0.428973 10 5 40\n",
1085+
"1 0 2000-10-06 1.423626 10 6 40\n",
1086+
"2 0 2000-10-07 2.311782 10 7 40\n",
1087+
"3 0 2000-10-08 3.192191 10 8 40\n",
1088+
"4 0 2000-10-09 4.148767 10 9 41\n",
1089+
"... ... ... ... ... ... ...\n",
1090+
"1096 4 2001-05-10 4.058910 5 10 19\n",
1091+
"1097 4 2001-05-11 5.178157 5 11 19\n",
1092+
"1098 4 2001-05-12 6.133142 5 12 19\n",
1093+
"1099 4 2001-05-13 0.403709 5 13 19\n",
1094+
"1100 4 2001-05-14 1.081779 5 14 20\n",
10831095
"\n",
1084-
"[1101 rows x 5 columns]"
1096+
"[1101 rows x 6 columns]"
10851097
]
10861098
},
10871099
"execution_count": null,
@@ -1090,7 +1102,7 @@
10901102
}
10911103
],
10921104
"source": [
1093-
"transformed_df, future_df = time_features(series, freq='D', features=['month', 'day'], h=1)\n",
1105+
"transformed_df, future_df = time_features(series, freq='D', features=['month', 'day', 'week'], h=1)\n",
10941106
"transformed_df"
10951107
]
10961108
},
@@ -1125,6 +1137,7 @@
11251137
" <th>ds</th>\n",
11261138
" <th>month</th>\n",
11271139
" <th>day</th>\n",
1140+
" <th>week</th>\n",
11281141
" </tr>\n",
11291142
" </thead>\n",
11301143
" <tbody>\n",
@@ -1134,46 +1147,51 @@
11341147
" <td>2001-05-15</td>\n",
11351148
" <td>5</td>\n",
11361149
" <td>15</td>\n",
1150+
" <td>20</td>\n",
11371151
" </tr>\n",
11381152
" <tr>\n",
11391153
" <th>1</th>\n",
11401154
" <td>1</td>\n",
11411155
" <td>2001-05-15</td>\n",
11421156
" <td>5</td>\n",
11431157
" <td>15</td>\n",
1158+
" <td>20</td>\n",
11441159
" </tr>\n",
11451160
" <tr>\n",
11461161
" <th>2</th>\n",
11471162
" <td>2</td>\n",
11481163
" <td>2001-05-15</td>\n",
11491164
" <td>5</td>\n",
11501165
" <td>15</td>\n",
1166+
" <td>20</td>\n",
11511167
" </tr>\n",
11521168
" <tr>\n",
11531169
" <th>3</th>\n",
11541170
" <td>3</td>\n",
11551171
" <td>2001-05-15</td>\n",
11561172
" <td>5</td>\n",
11571173
" <td>15</td>\n",
1174+
" <td>20</td>\n",
11581175
" </tr>\n",
11591176
" <tr>\n",
11601177
" <th>4</th>\n",
11611178
" <td>4</td>\n",
11621179
" <td>2001-05-15</td>\n",
11631180
" <td>5</td>\n",
11641181
" <td>15</td>\n",
1182+
" <td>20</td>\n",
11651183
" </tr>\n",
11661184
" </tbody>\n",
11671185
"</table>\n",
11681186
"</div>"
11691187
],
11701188
"text/plain": [
1171-
" unique_id ds month day\n",
1172-
"0 0 2001-05-15 5 15\n",
1173-
"1 1 2001-05-15 5 15\n",
1174-
"2 2 2001-05-15 5 15\n",
1175-
"3 3 2001-05-15 5 15\n",
1176-
"4 4 2001-05-15 5 15"
1189+
" unique_id ds month day week\n",
1190+
"0 0 2001-05-15 5 15 20\n",
1191+
"1 1 2001-05-15 5 15 20\n",
1192+
"2 2 2001-05-15 5 15 20\n",
1193+
"3 3 2001-05-15 5 15 20\n",
1194+
"4 4 2001-05-15 5 15 20"
11771195
]
11781196
},
11791197
"execution_count": null,

settings.ini

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[DEFAULT]
22
repo = utilsforecast
33
lib_name = utilsforecast
4-
version = 0.2.10
4+
version = 0.2.11
55
min_python = 3.8
66
license = apache2
77
black_formatting = True

utilsforecast/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.2.10"
1+
__version__ = "0.2.11"

utilsforecast/feature_engineering.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,7 @@ def _compute_time_feature(
212212
if isinstance(times, pd.DatetimeIndex):
213213
if feature in ("week", "weekofyear"):
214214
times = times.isocalendar()
215-
feat_vals = getattr(times, feature)
215+
feat_vals = getattr(times, feature).to_numpy()
216216
else:
217217
feat_vals = getattr(times.dt, feature)()
218218
return feat_name, feat_vals
@@ -228,7 +228,7 @@ def _add_time_features(
228228
if isinstance(df, pd.DataFrame):
229229
times = pd.Index(unique_times)
230230
time2pos = {time: i for i, time in enumerate(times)}
231-
restore_idxs = df[time_col].map(time2pos)
231+
restore_idxs = df[time_col].map(time2pos).to_numpy()
232232
for feature in features:
233233
name, vals = _compute_time_feature(times, feature)
234234
df[name] = vals[restore_idxs]

0 commit comments

Comments
 (0)