@@ -101,18 +101,28 @@ def maxmin_train_test_split(
101101 .. [1] `Mark Ashton et al.
102102 "Identification of Diverse Database Subsets using Property-Based and Fragment-Based Molecular Descriptions"
103103 Quant. Struct.-Act. Relat., 21: 598-604
104- <https://onlinelibrary.wiley.com/doi/10.1002/qsar.200290002>_`
104+ <https://onlinelibrary.wiley.com/doi/10.1002/qsar.200290002>`_
105105
106106 .. [2] `Roger Sayle
107107 "Improved RDKit implementation"
108- <https://github.com/rdkit/UGM_2017/blob/master/Presentations/Sayle_RDKitDiversity_Berlin17.pdf>_`
108+ <https://github.com/rdkit/UGM_2017/blob/master/Presentations/Sayle_RDKitDiversity_Berlin17.pdf>`_
109109
110110 .. [3] `Tim Dudgeon
111111 "Revisiting the MaxMinPicker"
112- <https://rdkit.org/docs/cppapi/classRDPickers_1_1MaxMinPicker.html>_`
112+ <https://rdkit.org/docs/cppapi/classRDPickers_1_1MaxMinPicker.html>`_
113113
114114 .. [4] `Squonk - RDKit MaxMin Picker
115- <https://squonk.it/docs/cells/RDKit%20MaxMin%20Picker>_`
115+ <https://squonk.it/docs/cells/RDKit%20MaxMin%20Picker>`_
116+
117+ Examples
118+ --------
119+ >>> from skfp.model_selection.splitters import maxmin_train_test_split
120+ >>> smiles = ['CCO', 'CCN', 'CCC', 'CCCl', 'CCBr', 'CCI', 'CCF', 'CC=O']
121+ >>> train_smiles, test_smiles = maxmin_train_test_split(
122+ ... smiles, train_size=0.75, test_size=0.25, random_state=42
123+ ... )
124+ >>> train_smiles
125+ ['CCO', 'CCN', 'CCCl', 'CCBr', 'CCI', 'CCF']
116126 """
117127 data_size = len (data )
118128 train_size , test_size = validate_train_test_split_sizes (
@@ -249,18 +259,28 @@ def maxmin_train_valid_test_split(
249259 .. [1] `Mark Ashton et al.
250260 "Identification of Diverse Database Subsets using Property-Based and Fragment-Based Molecular Descriptions"
251261 Quant. Struct.-Act. Relat., 21: 598-604
252- <https://onlinelibrary.wiley.com/doi/10.1002/qsar.200290002>_`
262+ <https://onlinelibrary.wiley.com/doi/10.1002/qsar.200290002>`_
253263
254264 .. [2] `Roger Sayle
255265 "Improved RDKit implementation"
256- <https://github.com/rdkit/UGM_2017/blob/master/Presentations/Sayle_RDKitDiversity_Berlin17.pdf>_`
266+ <https://github.com/rdkit/UGM_2017/blob/master/Presentations/Sayle_RDKitDiversity_Berlin17.pdf>`_
257267
258268 .. [3] `Tim Dudgeon
259269 "Revisiting the MaxMinPicker"
260- <https://rdkit.org/docs/cppapi/classRDPickers_1_1MaxMinPicker.html>_`
270+ <https://rdkit.org/docs/cppapi/classRDPickers_1_1MaxMinPicker.html>`_
261271
262272 .. [4] `Squonk - RDKit MaxMin Picker
263- <https://squonk.it/docs/cells/RDKit%20MaxMin%20Picker>_`
273+ <https://squonk.it/docs/cells/RDKit%20MaxMin%20Picker>`_
274+
275+ Examples
276+ --------
277+ >>> from skfp.model_selection.splitters import maxmin_train_valid_test_split
278+ >>> smiles = ['CCO', 'CCN', 'CCC', 'CCCl', 'CCBr', 'CCI', 'CCF', 'CC=O']
279+ >>> train_smiles, valid_smiles, test_smiles = maxmin_train_valid_test_split(
280+ ... smiles, train_size=0.5, valid_size=0.25, test_size=0.25, random_state=42
281+ ... )
282+ >>> train_smiles
283+ ['CCCl', 'CCBr', 'CCI', 'CCF']
264284 """
265285 data_size = len (data )
266286 train_size , valid_size , test_size = validate_train_valid_test_split_sizes (
@@ -405,6 +425,17 @@ def maxmin_stratified_train_test_split(
405425 See Also
406426 --------
407427 :func:`maxmin_train_test_split` : Regular MaxMin split.
428+
429+ Examples
430+ --------
431+ >>> from skfp.model_selection.splitters import maxmin_stratified_train_test_split
432+ >>> smiles = ['CCO', 'CCN', 'CCC', 'CCCl', 'CCBr', 'CCI', 'CCF', 'CC=O']
433+ >>> labels = [0, 0, 1, 1, 0, 1, 0, 1]
434+ >>> train_smiles, test_smiles, train_labels, test_labels = maxmin_stratified_train_test_split(
435+ ... smiles, labels, train_size=0.75, test_size=0.25, random_state=42
436+ ... )
437+ >>> print('Train SMILES:', train_smiles)
438+ Train SMILES: ['CCO', 'CCBr', 'CCF', 'CCC', 'CCI', 'CC=O']
408439 """
409440 data_arr = np .array (data )
410441 labels = np .array (labels , dtype = int )
@@ -561,6 +592,19 @@ def maxmin_stratified_train_valid_test_split(
561592 See Also
562593 --------
563594 :func:`maxmin_train_valid_test_split` : Regular MaxMin split.
595+
596+ Examples
597+ --------
598+ >>> from skfp.model_selection.splitters import maxmin_stratified_train_valid_test_split
599+ >>> smiles = ['CCO', 'CCN', 'CCC', 'CCCl', 'CCBr', 'CCI', 'CCF', 'CC=O']
600+ >>> labels = [0, 0, 1, 1, 0, 1, 0, 1]
601+ >>> train_smiles, valid_smiles, test_smiles, train_labels, valid_labels, test_labels = (
602+ ... maxmin_stratified_train_valid_test_split(
603+ ... smiles, labels, train_size=0.5, valid_size=0.25, test_size=0.25, random_state=42
604+ ... )
605+ ... )
606+ >>> print('Train SMILES:', train_smiles)
607+ Train SMILES: ['CCBr', 'CCF', 'CCC', 'CCI']
564608 """
565609 data_arr = np .array (data )
566610 labels = np .array (labels , dtype = int )
0 commit comments