@@ -110,6 +110,8 @@ def validate_map_to_stable_id(
110110 Returns:
111111 Result: Containing a result dataframe with associated metadata.
112112 """
113+ Validator ._ensure_field_valid (self ._field , self ._dataframe )
114+
113115 response : requests .Response = _client ()._post_to_sid_endpoint (
114116 "sid/lookup/batch" ,
115117 self ._dataframe [self ._field ].to_list (),
@@ -142,3 +144,40 @@ def validate_map_to_stable_id(
142144 ],
143145 )
144146 )
147+
148+ @staticmethod
149+ def _ensure_field_valid (field : str , dataframe : pl .DataFrame ) -> None :
150+ """Ensure that all values are numeric and valid.
151+
152+ This is necessary for SID mapping.
153+
154+ Args:
155+ field (str): The identifier field.
156+ dataframe (pl.DataFrame): The dataframe to validate.
157+
158+ Raises:
159+ ValueError: If the field does not exist in the dataframe.
160+ """
161+ if field not in dataframe .columns :
162+ raise ValueError (f"Field '{ field } ' does not exist in the dataframe." )
163+
164+ if dataframe .select (pl .col (field )).to_series ().has_nulls ():
165+ raise ValueError (
166+ f"Field '{ field } ' contains None/NaN values which are invalid for SID mapping."
167+ )
168+
169+ allowed_pattern = r"^\d+$" # only numeric
170+ invalid_entries = (
171+ dataframe .select (
172+ pl .col (field ).str .contains (allowed_pattern ).alias ("is_valid" ),
173+ pl .col (field ),
174+ )
175+ .filter (~ pl .col ("is_valid" ))
176+ .select (pl .col (field ))
177+ )
178+
179+ if not invalid_entries .is_empty ():
180+ invalid_values = invalid_entries .select (pl .col (field )).to_series ().to_list ()
181+ raise ValueError (
182+ f"Field '{ field } ' contains non-numeric values which are invalid for SID mapping: { invalid_values } "
183+ )
0 commit comments