@@ -65,6 +65,80 @@ class KFold(_BaseKFold):
65
65
Each fold is then used once as a validation while the k - 1 remaining
66
66
folds form the training set.
67
67
68
+ **Examples:**
69
+
70
+ >>> import bigframes.pandas as bpd
71
+ >>> from bigframes.ml.model_selection import KFold
72
+ >>> bpd.options.display.progress_bar = None
73
+ >>> X = bpd.DataFrame({"feat0": [1, 3, 5], "feat1": [2, 4, 6]})
74
+ >>> y = bpd.DataFrame({"label": [1, 2, 3]})
75
+ >>> kf = KFold(n_splits=3, random_state=42)
76
+ >>> for i, (X_train, X_test, y_train, y_test) in enumerate(kf.split(X, y)):
77
+ ... print(f"Fold {i}:")
78
+ ... print(f" X_train: {X_train}")
79
+ ... print(f" X_test: {X_test}")
80
+ ... print(f" y_train: {y_train}")
81
+ ... print(f" y_test: {y_test}")
82
+ ...
83
+ Fold 0:
84
+ X_train: feat0 feat1
85
+ 1 3 4
86
+ 2 5 6
87
+ <BLANKLINE>
88
+ [2 rows x 2 columns]
89
+ X_test: feat0 feat1
90
+ 0 1 2
91
+ <BLANKLINE>
92
+ [1 rows x 2 columns]
93
+ y_train: label
94
+ 1 2
95
+ 2 3
96
+ <BLANKLINE>
97
+ [2 rows x 1 columns]
98
+ y_test: label
99
+ 0 1
100
+ <BLANKLINE>
101
+ [1 rows x 1 columns]
102
+ Fold 1:
103
+ X_train: feat0 feat1
104
+ 0 1 2
105
+ 2 5 6
106
+ <BLANKLINE>
107
+ [2 rows x 2 columns]
108
+ X_test: feat0 feat1
109
+ 1 3 4
110
+ <BLANKLINE>
111
+ [1 rows x 2 columns]
112
+ y_train: label
113
+ 0 1
114
+ 2 3
115
+ <BLANKLINE>
116
+ [2 rows x 1 columns]
117
+ y_test: label
118
+ 1 2
119
+ <BLANKLINE>
120
+ [1 rows x 1 columns]
121
+ Fold 2:
122
+ X_train: feat0 feat1
123
+ 0 1 2
124
+ 1 3 4
125
+ <BLANKLINE>
126
+ [2 rows x 2 columns]
127
+ X_test: feat0 feat1
128
+ 2 5 6
129
+ <BLANKLINE>
130
+ [1 rows x 2 columns]
131
+ y_train: label
132
+ 0 1
133
+ 1 2
134
+ <BLANKLINE>
135
+ [2 rows x 1 columns]
136
+ y_test: label
137
+ 2 3
138
+ <BLANKLINE>
139
+ [1 rows x 1 columns]
140
+
141
+
68
142
Args:
69
143
n_splits (int):
70
144
Number of folds. Must be at least 2. Default to 5.
@@ -84,6 +158,41 @@ def train_test_split(
84
158
):
85
159
"""Splits dataframes or series into random train and test subsets.
86
160
161
+ **Examples:**
162
+
163
+ >>> import bigframes.pandas as bpd
164
+ >>> from bigframes.ml.model_selection import train_test_split
165
+ >>> bpd.options.display.progress_bar = None
166
+ >>> X = bpd.DataFrame({"feat0": [0, 2, 4, 6, 8], "feat1": [1, 3, 5, 7, 9]})
167
+ >>> y = bpd.DataFrame({"label": [0, 1, 2, 3, 4]})
168
+ >>> X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
169
+ >>> X_train
170
+ feat0 feat1
171
+ 0 0 1
172
+ 1 2 3
173
+ 4 8 9
174
+ <BLANKLINE>
175
+ [3 rows x 2 columns]
176
+ >>> y_train
177
+ label
178
+ 0 0
179
+ 1 1
180
+ 4 4
181
+ <BLANKLINE>
182
+ [3 rows x 1 columns]
183
+ >>> X_test
184
+ feat0 feat1
185
+ 2 4 5
186
+ 3 6 7
187
+ <BLANKLINE>
188
+ [2 rows x 2 columns]
189
+ >>> y_test
190
+ label
191
+ 2 2
192
+ 3 3
193
+ <BLANKLINE>
194
+ [2 rows x 1 columns]
195
+
87
196
Args:
88
197
*arrays (bigframes.dataframe.DataFrame or bigframes.series.Series):
89
198
A sequence of BigQuery DataFrames or Series that can be joined on
0 commit comments