biopragmatics · cthoyt · Feb 10, 2026 · Feb 10, 2026 · Feb 10, 2026 · Feb 10, 2026
diff --git a/src/pyobo/identifier_utils/api.py b/src/pyobo/identifier_utils/api.py
@@ -3,8 +3,9 @@
 from __future__ import annotations
 
 import logging
+from collections.abc import Callable
 from functools import lru_cache, wraps
-from typing import Annotated, ClassVar
+from typing import Annotated, ClassVar, ParamSpec, TypeVar
 
 import bioregistry
 import click
@@ -266,7 +267,11 @@ def _parse_str_or_curie_or_uri_helper(
         return rv
 
 
-def wrap_norm_prefix(f):
+S = ParamSpec("S")
+T = TypeVar("T")
+
+
+def wrap_norm_prefix(f: Callable[S, T]) -> Callable[S, T]:
     """Decorate a function that take in a prefix to auto-normalize, or return None if it can't be normalized."""
 
     @wraps(f)

diff --git a/src/pyobo/sources/plastchem.py b/src/pyobo/sources/plastchem.py
@@ -0,0 +1,239 @@
+"""Import PlastChem."""
+
+from collections import Counter
+from collections.abc import Iterable
+from typing import Any
+
+import pandas as pd
+from tabulate import tabulate
+from tqdm import tqdm
+
+from pyobo import Obo, get_grounder
+from pyobo.struct import Reference, Term, TypeDef, default_reference
+from pyobo.struct.typedef import (
+    exact_match,
+    has_canonical_smiles,
+    has_inchi,
+    has_isomeric_smiles,
+    has_role,
+)
+from pyobo.utils.path import ensure_path
+
+__all__ = ["PlastChemGetter"]
+
+PREFIX = "plastchem"
+URL = "https://zenodo.org/records/10701706/files/plastchem_db_v1.0.xlsx?download=1"
+VERSION = "1.0"
+
+# See page 45 of the report for explanation
+HAZARD_LISTS = {
+    "Red": "chemicals of concern",
+    # The Red List contains the 3651 chemicals of concern that are currently not regulated internationally. These chemicals are hazardous according to well-established criteria (one or more hazard criteria) and should be regulated.
+    "Orange": "less hazardous",
+    # The Orange List covers 1168 chemicals that have been classified as less hazardous (e.g., carcinogenic, mutagenic category 2). They may be further watched, as additional hazard traits may be identified.
+    "Watch": "under assessment",
+    # For the 28 chemicals on the Watch List, a hazard evaluation is currently under development or inconclusive. Similar to the Orange List, it includes chemicals that have potential to become chemicals of concern once fully assessed.
+    "White": "not hazardous",
+    # The chemicals on the White List are classified as not hazardous but their hazard profiles are incomplete. While there is some level of evidence that White List chemicals are not of concern, the incomplete hazard assessment warrants prioritization for further evaluation to provide a complete hazard profile.
+    "Grey": "no hazard data",
+    # The largest list, the Grey List, includes 10 345 plastic chemicals without hazard information. Those chemicals constitute the biggest knowledge gap as their hazard properties are unknown based on the authoritative sources consulted. In the absence of this information, no regulatory action is possible at this point.
+    "MEA": "regulated globally",  # Basel, Stochholm, Minamata
+}
+HAZARD_LIST_REFERENCES = {
+    f"{listn}_list": default_reference(PREFIX, f"{listn}_list") for listn in HAZARD_LISTS
+}
+
+HAZARD_LIST_ROOT = default_reference(PREFIX, "list")
+
+TYPEDEF = TypeDef(reference=default_reference(PREFIX, "onList"))
+
+
+class PlastChemGetter(Obo):
+    """An ontology representation of PlastChem."""
+
+    ontology = PREFIX
+    static_version = VERSION
+    typedef = [
+        TYPEDEF,
+        has_inchi,
+        has_canonical_smiles,
+        has_isomeric_smiles,
+        exact_match,
+        has_role,
+    ]
+
+    def iter_terms(self, force: bool = False) -> Iterable[Term]:
+        """Iterate over terms in the ontology."""
+        return get_terms()
+
+
+def get_terms() -> Iterable[Term]:
+    """Do it."""
+    yield Term(reference=HAZARD_LIST_ROOT)
+    for hazard_list_reference in HAZARD_LIST_REFERENCES.values():
+        term = Term(reference=hazard_list_reference)
+        term.append_parent(HAZARD_LIST_ROOT)
+        yield term
+
+    echa_counter: Counter[str] = Counter()
+    echa_examples = {}
+    function_counter: Counter[str] = Counter()
+    function_examples = {}
+    chebi_grounder = get_grounder("chebi")
+
+    path = ensure_path(PREFIX, url=URL, version=VERSION)
+    df = pd.read_excel(path, sheet_name="Full database", dtype=str, skiprows=1)
+    # TODO group by CAS number and add alt-id annotations
+    for _, row in df.iterrows():
+        if pd.isna(row["plastchem_ID"]):
+            continue
+
+        name: str | None
+        if pd.notna(row["pubchem_name"]):
+            name = row["pubchem_name"]
+        elif pd.notna(row["iupac_name"]):
+            name = row["iupac_name"]
+        else:
+            name = None
+        term = Term.from_triple(PREFIX, row["plastchem_ID"], name)
+
+        cas = row.pop("cas")
+        cas_fixed = row.pop("cas_fixed")
+        if pd.notna(cas_fixed) and pd.notna(cas):
+            if cas != cas_fixed.lstrip("'"):
+                pass
+            term.append_exact_match(Reference(prefix="cas", identifier=cas))
+
+        if pd.notna(pubchem_id := row.pop("pubchem_cid")):
+            term.append_exact_match(Reference(prefix="pubchem", identifier=pubchem_id))
+
+        if pd.notna(canonical_smiles := row.pop("canonical_smiles")):
+            term.annotate_string(has_canonical_smiles, canonical_smiles)
+        if pd.notna(isomeric_smiles := row.pop("isomeric_smiles")):
+            term.annotate_string(has_isomeric_smiles, isomeric_smiles)
+        if pd.notna(inchi := row.pop("inchi")):
+            term.annotate_string(has_inchi, inchi)
+        if pd.notna(inchikey := row.pop("inchikey")):
+            term.append_exact_match(Reference(prefix="inchikey", identifier=inchikey))
+
+        if pd.notna(echa_grouping := row.pop("ECHA_grouping")):
+            echa_counter[echa_grouping] += 1
+            if echa_grouping not in echa_examples:
+                if match := chebi_grounder.get_best_match(echa_grouping):
+                    echa_examples[echa_grouping] = match.curie, match.name
+                elif match := chebi_grounder.get_best_match(echa_grouping.rstrip("s")):
+                    echa_examples[echa_grouping] = match.curie, match.name
+
+        # TODO add hazard lists?
+
+        # NIAS means non-intentionally added substance
+        for func in _get_sep(row, "Harmonized_functions"):
+            func = func.replace("_", " ").lower()
+            if role := CHEBI_ROLE_MAP.get(func):
+                term.append_relationship(has_role, role)
+
+            function_counter[func] += 1
+            if func not in function_examples and name is not None:
+                if match := chebi_grounder.get_best_match(name):
+                    if match.curie != "chebi:15702":
+                        function_examples[func] = match.curie, match.name
+                elif match := chebi_grounder.get_best_match(name.rstrip("s")):
+                    if match.curie != "chebi:15702":
+                        function_examples[func] = match.curie, match.name
+
+        # TODO ECHA_grouping
+        # TODO ground to chebi:
+        #  - Harmonized_functions
+        #  - original_function_plasticmap
+        #  - original_function_cpp
+        #  - original_primary_function_aurisano
+        #  - original_other_function_aurisano
+        #  - industrial_sector_plasticmap
+
+        yield term
+
+    tqdm.write(
+        tabulate(
+            [
+                (
+                    echa_name,
+                    m.curie if (m := chebi_grounder.get_best_match(echa_name)) else None,
+                    count,
+                )
+                for echa_name, count in echa_counter.most_common()
+            ],
+            headers=["ECHA", "chebi", "count"],
+        )
+    )
+    tqdm.write("")
+
+    rows = [
+        (
+            function_name,
+            function_curie.curie if (function_curie := CHEBI_ROLE_MAP.get(function_name)) else "",
+            *function_examples.get(function_name, (None, None)),
+            count,
+        )
+        for function_name, count in function_counter.most_common()
+    ]
+    rows = [r for r in rows if not r[1]]
+    tqdm.write(
+        tabulate(
+            rows,
+            headers=["function", "chebi", "example_chebi", "example_chebi_name", "count"],
+        )
+    )
+
+
+CHEBI_ROLE_MAP = {
+    "plasticizer": Reference.from_curie("CHEBI:79056", name="plasticiser"),
+    "catalyst": Reference.from_curie("CHEBI:35223", name="catalyst"),
+    "monomer": Reference.from_curie("CHEBI:74236", name="polymerization monomer"),
+    "antioxidant": Reference.from_curie("CHEBI:22586", name="antioxidant"),
+    "flame retardant": Reference.from_curie("CHEBI:79314"),
+    "blowing agent": Reference.from_curie("CHEBI:747328"),
+    "filler": Reference.from_curie("CHEBI:747333"),
+    "stabilizer": Reference.from_curie("CHEBI:747331"),
+    "colorant": Reference.from_curie("CHEBI:37958"),  # TODO add synonym
+    "pigment": Reference.from_curie("CHEBI:37958"),  # TODO add synonym
+    "lubricant": Reference.from_curie("CHEBI:747329"),
+    "biocide": Reference.from_curie("CHEBI:33281"),  # TODO add synonym
+    "solvent": Reference.from_curie("CHEBI:46787"),
+    "emulsifier": Reference.from_curie("CHEBI:63046"),
+    "surfactant": Reference.from_curie("CHEBI:35195"),
+    "anti-fog additive": Reference.from_curie("CHEBI:747327"),
+    "other processing aids": Reference.from_curie(
+        "CHEBI:747334"
+    ),  # this is the super class for processing aid
+    "antistatic agent": Reference.from_curie("CHEBI:747335"),
+    "adhesive": Reference.from_curie("CHEBI:747337"),
+    "unspecified additive": Reference.from_curie("CHEBI:747326"),  # parent class
+    "heat stabilizer": Reference.from_curie("CHEBI:747338"),
+    "light stabilizer": Reference.from_curie("CHEBI:747339"),
+    "viscosity modifier": Reference.from_curie("CHEBI:747340"),
+    "impact modifier": Reference.from_curie("CHEBI:747341"),
+    "initiator": Reference.from_curie("CHEBI:747342"),
+    "crosslinking agent": Reference.from_curie("CHEBI:50684"),
+    "odor agent": Reference.from_curie("CHEBI:747343"),
+    "impurity": Reference.from_curie("CHEBI:143130"),
+    "ultraviolet-absorbing agent": Reference.from_curie("CHEBI:73335"),
+    "polymerization aid": Reference.from_curie("CHEBI:747345"),
+    # Non-intentionally added substances (NIAS)
+    "nias": Reference.from_curie("CHEBI:747346"),
+    # the following map up to NIAS
+    "intermediate": Reference.from_curie("CHEBI:747346"),
+    "degradation product": Reference.from_curie("CHEBI:747346"),
+    # TODO not sure how to model this. other starting
+    #  substances are initiator and monomer
+    "unspecified raw material": None,
+}
+
+
+def _get_sep(row: dict[str, Any], key: str) -> list[str]:
+    if pd.notna(row[key]):
+        return row[key].split(";")
+    return []
+
+
+if __name__ == "__main__":
+    PlastChemGetter.cli(["--owl", "--obo", "--force"])
diff --git a/src/pyobo/struct/typedef.py b/src/pyobo/struct/typedef.py
@@ -28,6 +28,7 @@
     "from_species",
     "gene_product_enables",
     "gene_product_member_of",
+    "has_canonical_smiles",
     "has_contributor",
     "has_creator",
     "has_curation_status",
@@ -37,6 +38,7 @@
     "has_gene_product",
     "has_homepage",
     "has_inchi",
+    "has_isomeric_smiles",
     "has_mailbox",
     "has_mature",
     "has_member",
@@ -329,6 +331,14 @@
 )
 
 has_smiles = TypeDef(reference=v.has_smiles, is_metadata_tag=True).append_xref(v.debio_has_smiles)
+has_canonical_smiles = TypeDef(reference=v.has_canonical_smiles, is_metadata_tag=True).append_xref(
+    v.debio_has_smiles
+)
+has_isomeric_smiles = TypeDef(reference=v.has_isomeric_smiles, is_metadata_tag=True).append_xref(
+    v.debio_has_smiles
+)
+
+# https://chemkg.github.io/chemrof/isomeric_smiles_string/
 
 has_inchi = TypeDef(reference=v.has_inchi, is_metadata_tag=True).append_xref(v.debio_has_inchi)
 

diff --git a/src/pyobo/struct/vocabulary.py b/src/pyobo/struct/vocabulary.py
@@ -84,6 +84,8 @@ def _c(c: curies.Reference) -> Reference:
 
 debio_has_smiles = Reference(prefix="debio", identifier="0000022", name="has SMILES")
 has_smiles = Reference(prefix="chemrof", identifier="smiles_string")
+has_canonical_smiles = Reference(prefix="chemrof", identifier="canonical_smiles_string")
+has_isomeric_smiles = Reference(prefix="chemrof", identifier="isomeric_smiles_string")
 
 is_mentioned_by = Reference(prefix="mito", identifier="isMentionedBy", name="is mentioned by")
 mentions = Reference(prefix="mito", identifier="mentions", name="mentions")