@@ -173,7 +173,7 @@ def load_weather_data(time, city_names, data_source_folder):
173173# hour of the day.
174174#
175175# We want to use the `holidays` package to enrich the time range with some calendar
176- # features such as public holidays in France. In addition, we want to use `skrub`
176+ # features such as public holidays in France. In addition, we want to use `skrub`'s
177177# `DatetimeEncoder` to add some features that are useful for time series forecasting
178178# such as the calendar year, month, day, hour, the day of the week and the day of the
179179# year.
@@ -189,12 +189,12 @@ def load_weather_data(time, city_names, data_source_folder):
189189#
190190# Let's first create some calendar features using `skrub`'s `DatetimeEncoder`.
191191#
192- # 1. Create a `DatetimeEncoder` object and by looking at the documentation, make sure
192+ # 1. Create a `DatetimeEncoder` object and, by looking at the documentation, make sure
193193# to add the weekday and the day of the year. Do not add the total seconds since the
194194# Unix epoch. You can refer to this link:
195195# https://skrub-data.org/stable/reference/generated/skrub.DatetimeEncoder.html
196196# 2. As a first operation, we wish to rename the `time` column to `cal` such that
197- # the all columns corresponding to some calendar features will be prefixed with
197+ # all the columns corresponding to some calendar features will be prefixed with
198198# `cal_`. You can simply call the `rename` method (cf.
199199# https://docs.pola.rs/api/python/stable/reference/dataframe/api/polars.DataFrame.rename.html)
200200# because `time` can be seen as a polars dataframe.
@@ -230,12 +230,12 @@ def load_weather_data(time, city_names, data_source_folder):
230230# 2. Convert the "time" column to the French/Paris timezone.
231231# 3. Extract the French holidays by calling `holidays.country_holidays`. For this
232232# function, you need to extract the minimum and maximum year from the "time" column.
233- # 4. Finally, you need to if a date in holiday is a French holiday. You can call this
234- # column `cal_is_holiday`.
233+ # 4. Finally, you need to add if a date in holiday is a French holiday as a feature.
234+ # You can call this column `cal_is_holiday`.
235235# 5. Apply this function to the `time` `DataOp` and call the resulting variable
236236# `is_french_holiday`.
237- # 6. Finally, we wish to concatenate the `time_encoded` and `is_french_holiday` using
238- # the `.skb.concat` method.
237+ # 6. Finally, we wish to concatenate the `time_encoded` and `is_french_holiday`
238+ # `DataOps`using the `.skb.concat` method.
239239
240240# %%
241241
@@ -259,7 +259,6 @@ def prepare_holidays(time):
259259is_french_holiday = prepare_holidays (time )
260260is_french_holiday
261261
262- # %%
263262calendar = time .skb .concat ([time_encoded , is_french_holiday ], axis = 1 )
264263calendar
265264
@@ -268,7 +267,7 @@ def prepare_holidays(time):
268267#
269268# ## Electricity load data
270269#
271- # Finally we load the electricity load data. This data will both be used as a
270+ # Finally, we load the electricity load data. This data will both be used as a
272271# target variable but also to craft some lagged and window-aggregated features.
273272
274273# %%
@@ -316,7 +315,7 @@ def load_electricity_load_data(time, data_source_folder):
316315
317316# %% [markdown]
318317#
319- # So apparently there a few missing measurements. Let's use linear
318+ # So apparently there are a few missing measurements. Let's use linear
320319# interpolation to fill those missing values.
321320
322321# %%
@@ -341,7 +340,8 @@ def load_electricity_load_data(time, data_source_folder):
341340# We will create 3 hourly lagged features, 1 daily lagged feature, and 1 weekly
342341# lagged feature. We will also create a rolling median and inter-quartile
343342# feature over the last 24 hours and over the last 7 days.
344-
343+ # Inter-quartile features tell us what is the variability of the load over the
344+ # given window.
345345
346346# %%
347347def iqr (col , * , window_size : int ):
0 commit comments