@@ -57,7 +57,7 @@ second datetime variable in our dataset.
5757
5858 dtfs = DatetimeFeatures(
5959 variables = " var_date2" ,
60- features_to_extract = [" month" , " month_end" , " day_of_the_year " ]
60+ features_to_extract = [" month" , " month_end" , " day_of_year " ]
6161 )
6262
6363 df_transf = dtfs.fit_transform(toy_df)
@@ -66,11 +66,11 @@ second datetime variable in our dataset.
6666
6767 .. code :: python
6868
69- var_date1 var_date2_month var_date2_month_end var_date2_doty
70- 0 May- 1989 6 0 173
71- 1 Dec- 2020 2 0 41
72- 2 Jan- 1999 8 0 215
73- 3 Feb- 2002 10 1 305
69+ var_date1 var_date2_month var_date2_month_end var_date2_day_of_year
70+ 0 May- 1989 6 0 173
71+ 1 Dec- 2020 2 0 41
72+ 2 Jan- 1999 8 0 215
73+ 3 Feb- 2002 10 1 305
7474
7575
7676 With `transform() `, the features extracted from the datetime variable are added to the
@@ -214,25 +214,25 @@ And now we mistakenly extract only date features.
214214.. code :: python
215215
216216 dfts = DatetimeFeatures(
217- features_to_extract = [" year" , " month" , " day_of_the_week " ],
217+ features_to_extract = [" year" , " month" , " day_of_week " ],
218218 )
219219 df_transf = dfts.fit_transform(toy_df)
220220
221221 print (df_transf)
222222
223223 .. code :: python
224224
225- not_a_dt var_time1_year var_time1_month var_time1_dotw var_time2_year \
226- 0 not 2021 12 2 2021
227- 1 a 2021 12 2 2021
228- 2 date 2021 12 2 2021
229- 3 time 2021 12 2 2021
225+ not_a_dt var_time1_year var_time1_month var_time1_day_of_week var_time2_year \
226+ 0 not 2021 12 2 2021
227+ 1 a 2021 12 2 2021
228+ 2 date 2021 12 2 2021
229+ 3 time 2021 12 2 2021
230230
231- var_time2_month var_time2_dotw
232- 0 12 2
233- 1 12 2
234- 2 12 2
235- 3 12 2
231+ var_time2_month var_time2_day_of_week
232+ 0 12 2
233+ 1 12 2
234+ 2 12 2
235+ 3 12 2
236236
237237 The transformer will still create features derived from today's date (the date of
238238creating the docs).
@@ -313,15 +313,15 @@ To do this, we leave the parameter `features_to_extract` to `None`.
313313 1 2018 - 01 - 01 01 :00 :00 12 / 01 / 90 23 :01 :02 02 / 28 / 97 10 :10 :55 1
314314 2 2018 - 01 - 01 02 :00 :00 04 / 25 / 01 11 :59 :21 11 / 11 / 03 17 :30 :00 1
315315
316- var_dt1_year var_dt1_dotw var_dt1_dotm var_dt1_hour var_dt1_minute \
317- 0 2018 0 1 0 0
318- 1 2018 0 1 1 0
319- 2 2018 0 1 2 0
316+ var_dt1_year var_dt1_day_of_week var_dt1_day_of_month var_dt1_hour \
317+ 0 2018 0 1 0
318+ 1 2018 0 1
319+ 2 2018 0 1 2
320320
321- var_dt1_second
322- 0 0
323- 1 0
324- 2 0
321+ var_dt1_minute var_dt1_second
322+ 0 0 0
323+ 1 0 0
324+ 2 0 0
325325
326326 Our new dataset contains the original features plus the new variables extracted
327327from them.
@@ -336,8 +336,8 @@ We can find the group of features extracted by the transformer in its attribute.
336336
337337 [' month' ,
338338 ' year' ,
339- ' day_of_the_week ' ,
340- ' day_of_the_month ' ,
339+ ' day_of_week ' ,
340+ ' day_of_month ' ,
341341 ' hour' ,
342342 ' minute' ,
343343 ' second' ]
@@ -368,10 +368,10 @@ We can also extract all supported features automatically.
368368 1 1 1 2018
369369 2 1 1 2018
370370
371- var_dt1_woty var_dt1_dotw ... var_dt1_month_end var_dt1_quarter_start \
372- 0 1 0 ... 0 1
373- 1 1 0 ... 0 1
374- 2 1 0 ... 0 1
371+ var_dt1_week var_dt1_day_of_week ... var_dt1_month_end var_dt1_quarter_start \
372+ 0 1 0 ... 0 1
373+ 1 1 0 ... 0 1
374+ 2 1 0 ... 0 1
375375
376376 var_dt1_quarter_end var_dt1_year_start var_dt1_year_end \
377377 0 0 1 0
@@ -400,10 +400,10 @@ We can find the group of features extracted by the transformer in its attribute.
400400 ' quarter' ,
401401 ' semester' ,
402402 ' year' ,
403- ' week_of_the_year ' ,
404- ' day_of_the_week ' ,
405- ' day_of_the_month ' ,
406- ' day_of_the_year ' ,
403+ ' week ' ,
404+ ' day_of_week ' ,
405+ ' day_of_month ' ,
406+ ' day_of_year ' ,
407407 ' weekend' ,
408408 ' month_start' ,
409409 ' month_end' ,
@@ -467,29 +467,29 @@ from the dataset.
467467
468468 .. code :: python
469469
470- var_date_month var_date_year var_date_dotw var_date_dotm \
471- 0 6 2012 3 21
472- 1 2 1998 1 10
473- 2 8 2010 1 3
474- 3 10 2020 5 31
470+ var_date_month var_date_year var_date_day_of_week var_date_day_of_month \
471+ 0 6 2012 3 21
472+ 1 2 1998 1 10
473+ 2 8 2010 1 3
474+ 3 10 2020 5 31
475475
476476 var_time1_hour var_time1_minute var_time1_second var_dt_month \
477477 0 12 34 45 8
478478 1 23 1 2 12
479479 2 11 59 21 4
480480 3 8 44 23 4
481481
482- var_dt_year var_dt_dotw var_dt_dotm var_dt_hour var_dt_minute \
483- 0 2000 3 31 12 34
484- 1 1990 5 1 23 1
485- 2 2001 2 25 11 59
486- 3 2001 2 25 11 59
482+ var_dt_year var_dt_day_of_week var_dt_day_of_month var_dt_hour \
483+ 0 2000 3 31 12
484+ 1 1990 5 1 23
485+ 2 2001 2 25 11
486+ 3 2001 2 25 11
487487
488- var_dt_second
489- 0 45
490- 1 2
491- 2 21
492- 3 21
488+ var_dt_minute var_dt_second
489+ 0 34 45
490+ 1 1 2
491+ 2 59 21
492+ 3 59 21
493493
494494 As you can see, we do not have the constant features in the transformed dataset.
495495
@@ -562,7 +562,7 @@ timezone.
562562 toy_df = pd.DataFrame({" var_tz" : var_tz})
563563
564564 dfts = DatetimeFeatures(
565- features_to_extract = [" day_of_the_month " , " hour" ],
565+ features_to_extract = [" day_of_month " , " hour" ],
566566 drop_original = False ,
567567 utc = True ,
568568 )
@@ -573,10 +573,10 @@ timezone.
573573
574574 .. code :: python
575575
576- var_tz var_tz_dotm var_tz_hour
577- 0 2000 - 08 - 31 12 :34 :45 - 04 :00 31 16
578- 1 1990 - 12 - 01 23 :01 :02 - 05 :00 2 4
579- 2 2001 - 04 - 25 11 :59 :21 - 04 :00 25 15
576+ var_tz var_tz_day_of_month var_tz_hour
577+ 0 2000 - 08 - 31 12 :34 :45 - 04 :00 31 16
578+ 1 1990 - 12 - 01 23 :01 :02 - 05 :00 2 4
579+ 2 2001 - 04 - 25 11 :59 :21 - 04 :00 25 15
580580
581581
582582 **Case 3 **: given a variable like *var_tz * in the example above, we now want
@@ -587,7 +587,7 @@ is the default option.
587587.. code :: python
588588
589589 dfts = DatetimeFeatures(
590- features_to_extract = [" day_of_the_month " , " hour" ],
590+ features_to_extract = [" day_of_month " , " hour" ],
591591 drop_original = False ,
592592 utc = None ,
593593 )
@@ -598,10 +598,10 @@ is the default option.
598598
599599 .. code :: python
600600
601- var_tz var_tz_dotm var_tz_hour
602- 0 2000 - 08 - 31 12 :34 :45 - 04 :00 31 12
603- 1 1990 - 12 - 01 23 :01 :02 - 05 :00 1 23
604- 2 2001 - 04 - 25 11 :59 :21 - 04 :00 25 11
601+ var_tz var_tz_day_of_month var_tz_hour
602+ 0 2000 - 08 - 31 12 :34 :45 - 04 :00 31 12
603+ 1 1990 - 12 - 01 23 :01 :02 - 05 :00 1 23
604+ 2 2001 - 04 - 25 11 :59 :21 - 04 :00 25 11
605605
606606 Note that the hour extracted from the variable differ in this dataframe respect to the
607607one obtained in **Case 2 **.
0 commit comments