@@ -265,6 +265,49 @@ def clean_gh_url(cls, user: dict[str, str]) -> dict[str, str]:
265265
266266 return user
267267
268+ @field_validator (
269+ "categories" ,
270+ mode = "before" ,
271+ )
272+ @classmethod
273+ def clean_categories (cls , categories : list [str ]) -> list [str ]:
274+ """Make sure each category in the list is a valid value.
275+
276+ Valid pyos software categories are:
277+ citation-management-bibliometrics, data-retrieval,
278+ data-extraction, data-processing-munging, data-deposition",
279+ data-validation-testing, data-visualization-analysis,
280+ workflow-automation-versioning, database-interoperability,
281+ scientific-software-wrappers, geospatial, education
282+
283+ Parameters
284+ ----------
285+ categories : list[str]
286+ List of categories to clean.
287+
288+ Returns
289+ -------
290+ list[str]
291+ List of cleaned categories.
292+ """
293+
294+ valid_categories = {
295+ "data-processing" : "data-processing-munging" ,
296+ "scientific-software" : "scientific-software-wrapper" ,
297+ "data-validation" : "data-validation-testing" ,
298+ }
299+
300+ cleaned_cats = []
301+ for category in categories :
302+ for valid_prefix , valid_cat in valid_categories .items ():
303+ if category .startswith (valid_prefix ):
304+ cleaned_cats .append (valid_cat )
305+ break
306+ else :
307+ # No match found, keep the original category
308+ cleaned_cats .append (category )
309+ return cleaned_cats
310+
268311
269312@dataclass
270313class ProcessIssues :
0 commit comments