Skip to content

Commit 897d750

Browse files
Merge pull request #767 from broadinstitute/dm/sklearn_warning
Note difference in rounding behavior between Sklearn and Onnx models
2 parents 879b873 + f758340 commit 897d750

File tree

1 file changed

+11
-0
lines changed

1 file changed

+11
-0
lines changed

gnomad/sample_qc/ancestry.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,10 @@ def apply_sklearn_classification_model(
170170
except TypeError:
171171
raise TypeError("The supplied model is not an sklearn model!")
172172

173+
logger.warning(
174+
"The use of .onnx files and apply_onnx_classification_model is recommended."
175+
)
176+
173177
classification = fit.predict(data_pd)
174178
probs = fit.predict_proba(data_pd)
175179
probs = pd.DataFrame(probs, columns=[f"prob_{p}" for p in fit.classes_])
@@ -194,6 +198,13 @@ def convert_sklearn_rf_to_onnx(
194198
except TypeError:
195199
raise TypeError("The supplied model is not an sklearn model!")
196200

201+
logger.warning(
202+
"sklearn models have different rounding behavior than ONNX models. Use of sklearn"
203+
"rf models rounds probabilities to two decimal places when used in assign_genetic_ancestry_pcs(),"
204+
"while use of onnx rf models does not. This may lead to subtly different assignment results"
205+
"for samples around probability cutoffs."
206+
)
207+
197208
initial_type = [("float_input", FloatTensorType([None, fit.n_features_in_]))]
198209
onx = convert_sklearn(fit, initial_types=initial_type, target_opset=target_opset)
199210

0 commit comments

Comments
 (0)