@@ -23,11 +23,12 @@ class ElectroShapeFingerprint(BaseFingerprintTransformer):
2323
2424 It first computes atomic partial charges, and then uses both conformational
2525 (spatial) structure, and this electric information, to compute reference
26- points (centroids). First three are like in USR, and last two
27- additionally use partial charge in distance calculation. See the original paper
28- [1]_ for details. For each centroid, the distribution of distances between atoms
29- and the centroid is aggregated using the first three moments (mean, standard
30- deviation, cubic root of skewness). This results in 15 features.
26+ points (centroids). First three are similar to USR: centroid, atom farthest from
27+ centroid, and atom farthest from that atom. The last two additionally use partial
28+ charge in distance calculation. See the original paper [1]_ for details. For each
29+ centroid, the distribution of distances between atoms and the centroid is aggregated
30+ using the first three moments (mean, standard deviation, cubic root of skewness).
31+ This results in 15 features.
3132
3233 This is a 3D fingerprint, and requires molecules with ``conf_id`` integer property
3334 set. They can be generated with :class:`~skfp.preprocessing.ConformerGenerator`.
@@ -150,6 +151,40 @@ def __init__(
150151 self .charge_errors = charge_errors
151152 self .errors = errors
152153
154+ def get_feature_names_out (self , input_features = None ) -> np .ndarray : # noqa: ARG002
155+ """
156+ Get fingerprint output feature names. They correspond to aggregates
157+ of atomic distances to 5 centroid-based points.
158+
159+ Parameters
160+ ----------
161+ input_features : array-like of str or None, default=None
162+ Unused, kept for scikit-learn compatibility.
163+
164+ Returns
165+ -------
166+ feature_names_out : ndarray of str objects
167+ ElectroShape feature names.
168+ """
169+ feature_names = [
170+ "centroid_dists_mean" ,
171+ "centroid_dists_std" ,
172+ "centroid_dists_skewness_cubic_root" ,
173+ "farthest_atom_from_centroid_mean" ,
174+ "farthest_atom_from_centroid_std" ,
175+ "farthest_atom_from_centroid_skewness_cubic_root" ,
176+ "farthest_atom_from_farthest_to_centroid_mean" ,
177+ "farthest_atom_from_farthest_to_centroid_std" ,
178+ "farthest_atom_from_farthest_to_centroid_cubic_root" ,
179+ "centroid_highest_partial_charge_mean" ,
180+ "centroid_highest_partial_charge_std" ,
181+ "centroid_highest_partial_charge_skewness_cubic_root" ,
182+ "centroid_lowest_partial_charge_mean" ,
183+ "centroid_lowest_partial_charge_std" ,
184+ "centroid_lowest_partial_charge_skewness_cubic_root" ,
185+ ]
186+ return np .asarray (feature_names , dtype = object )
187+
153188 def transform (
154189 self , X : Sequence [str | Mol ], copy : bool = False
155190 ) -> np .ndarray | csr_array :
@@ -280,7 +315,7 @@ def _get_centroid_distances(
280315 else :
281316 vec_c = (norm (vec_a ) / (2 * cross_ab_norm )) * cross_ab
282317
283- # geometric mean centroid moved in the direction of smallest and largest charge
318+ # geometric mean centroid moved in the direction of largest and smallest charge
284319 # note that charges were already scaled before
285320 c4 = np .append (c1 [:3 ] + vec_c , np .max (charges ))
286321 c5 = np .append (c1 [:3 ] + vec_c , np .min (charges ))
0 commit comments