@@ -65,6 +65,80 @@ class KFold(_BaseKFold):
6565 Each fold is then used once as a validation while the k - 1 remaining
6666 folds form the training set.
6767
68+ **Examples:**
69+
70+ >>> import bigframes.pandas as bpd
71+ >>> from bigframes.ml.model_selection import KFold
72+ >>> bpd.options.display.progress_bar = None
73+ >>> X = bpd.DataFrame({"feat0": [1, 3, 5], "feat1": [2, 4, 6]})
74+ >>> y = bpd.DataFrame({"label": [1, 2, 3]})
75+ >>> kf = KFold(n_splits=3, random_state=42)
76+ >>> for i, (X_train, X_test, y_train, y_test) in enumerate(kf.split(X, y)):
77+ ... print(f"Fold {i}:")
78+ ... print(f" X_train: {X_train}")
79+ ... print(f" X_test: {X_test}")
80+ ... print(f" y_train: {y_train}")
81+ ... print(f" y_test: {y_test}")
82+ ...
83+ Fold 0:
84+ X_train: feat0 feat1
85+ 1 3 4
86+ 2 5 6
87+ <BLANKLINE>
88+ [2 rows x 2 columns]
89+ X_test: feat0 feat1
90+ 0 1 2
91+ <BLANKLINE>
92+ [1 rows x 2 columns]
93+ y_train: label
94+ 1 2
95+ 2 3
96+ <BLANKLINE>
97+ [2 rows x 1 columns]
98+ y_test: label
99+ 0 1
100+ <BLANKLINE>
101+ [1 rows x 1 columns]
102+ Fold 1:
103+ X_train: feat0 feat1
104+ 0 1 2
105+ 2 5 6
106+ <BLANKLINE>
107+ [2 rows x 2 columns]
108+ X_test: feat0 feat1
109+ 1 3 4
110+ <BLANKLINE>
111+ [1 rows x 2 columns]
112+ y_train: label
113+ 0 1
114+ 2 3
115+ <BLANKLINE>
116+ [2 rows x 1 columns]
117+ y_test: label
118+ 1 2
119+ <BLANKLINE>
120+ [1 rows x 1 columns]
121+ Fold 2:
122+ X_train: feat0 feat1
123+ 0 1 2
124+ 1 3 4
125+ <BLANKLINE>
126+ [2 rows x 2 columns]
127+ X_test: feat0 feat1
128+ 2 5 6
129+ <BLANKLINE>
130+ [1 rows x 2 columns]
131+ y_train: label
132+ 0 1
133+ 1 2
134+ <BLANKLINE>
135+ [2 rows x 1 columns]
136+ y_test: label
137+ 2 3
138+ <BLANKLINE>
139+ [1 rows x 1 columns]
140+
141+
68142 Args:
69143 n_splits (int):
70144 Number of folds. Must be at least 2. Default to 5.
@@ -84,6 +158,41 @@ def train_test_split(
84158):
85159 """Splits dataframes or series into random train and test subsets.
86160
161+ **Examples:**
162+
163+ >>> import bigframes.pandas as bpd
164+ >>> from bigframes.ml.model_selection import train_test_split
165+ >>> bpd.options.display.progress_bar = None
166+ >>> X = bpd.DataFrame({"feat0": [0, 2, 4, 6, 8], "feat1": [1, 3, 5, 7, 9]})
167+ >>> y = bpd.DataFrame({"label": [0, 1, 2, 3, 4]})
168+ >>> X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
169+ >>> X_train
170+ feat0 feat1
171+ 0 0 1
172+ 1 2 3
173+ 4 8 9
174+ <BLANKLINE>
175+ [3 rows x 2 columns]
176+ >>> y_train
177+ label
178+ 0 0
179+ 1 1
180+ 4 4
181+ <BLANKLINE>
182+ [3 rows x 1 columns]
183+ >>> X_test
184+ feat0 feat1
185+ 2 4 5
186+ 3 6 7
187+ <BLANKLINE>
188+ [2 rows x 2 columns]
189+ >>> y_test
190+ label
191+ 2 2
192+ 3 3
193+ <BLANKLINE>
194+ [2 rows x 1 columns]
195+
87196 Args:
88197 *arrays (bigframes.dataframe.DataFrame or bigframes.series.Series):
89198 A sequence of BigQuery DataFrames or Series that can be joined on
0 commit comments