janosh
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 2 additions & 2 deletions b/‎.pre-commit-config.yaml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎assets/scripts/cluster/composition/cluster_compositions_matbench.py‎
Lines changed: 31 additions & 26 deletions b/‎assets/scripts/cluster/composition/cluster_compositions_matbench.py‎
Lines changed: 31 additions & 26 deletions
diff --git a/‎assets/scripts/ptable/ptable_heatmap_splits_plotly.py‎
Lines changed: 3 additions & 3 deletions b/‎assets/scripts/ptable/ptable_heatmap_splits_plotly.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎assets/scripts/ptable/ptable_scatter_plotly.py‎
Lines changed: 3 additions & 3 deletions b/‎assets/scripts/ptable/ptable_scatter_plotly.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎assets/scripts/track_pymatviz_citations.py‎
Lines changed: 4 additions & 1 deletion b/‎assets/scripts/track_pymatviz_citations.py‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎assets/scripts/treemap/py_pkg_treemap.py‎
Lines changed: 7 additions & 6 deletions b/‎assets/scripts/treemap/py_pkg_treemap.py‎
Lines changed: 7 additions & 6 deletions
diff --git a/‎pymatviz/brillouin.py‎
Lines changed: 2 additions & 2 deletions b/‎pymatviz/brillouin.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎pymatviz/classify/confusion_matrix.py‎
Lines changed: 4 additions & 4 deletions b/‎pymatviz/classify/confusion_matrix.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎pymatviz/classify/curves.py‎
Lines changed: 7 additions & 1 deletion b/‎pymatviz/classify/curves.py‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎pymatviz/cluster/composition/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎pymatviz/cluster/composition/__init__.py‎
Lines changed: 1 addition & 0 deletions
@@ -8,7 +8,7 @@ default_install_hook_types: [pre-commit, commit-msg]
 
 repos:
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.14.3
+    rev: v0.14.4
     hooks:
       - id: ruff-check
         args: [--fix]
@@ -80,7 +80,7 @@ repos:
           - --fix
 
   - repo: https://github.com/pre-commit/mirrors-eslint
-    rev: v9.39.0
+    rev: v9.39.1
     hooks:
       - id: eslint
         types: [file]
 
@@ -25,14 +25,14 @@
 
 import pymatviz as pmv
 import pymatviz.cluster.composition as pcc
+from pymatviz.cluster.composition import EmbeddingMethod as Embed
+from pymatviz.cluster.composition import ProjectionMethod as Project
 from pymatviz.enums import Key
 
 
 if TYPE_CHECKING:
     import plotly.graph_objects as go
 
-    from pymatviz.cluster.composition import ProjectionMethod
-
 
 pmv.set_plotly_template("pymatviz_white")
 module_dir = os.path.dirname(__file__)
@@ -61,7 +61,7 @@ def process_dataset(
     target_label: str,
     target_symbol: str,
     embed_method: pcc.EmbeddingMethod,
-    projection: ProjectionMethod,
+    projection: pcc.ProjectionMethod,
     n_components: int,
     **kwargs: Any,
 ) -> go.Figure:
@@ -110,7 +110,7 @@ def process_dataset(
         # Create embeddings
         if embed_method == "one-hot":
             embeddings = pcc.one_hot_encode(compositions)
-        elif embed_method in ["magpie", "matscholar_el"]:
+        elif embed_method in (Embed.magpie, Embed.matscholar_el):
             embeddings = pcc.matminer_featurize(compositions, preset=embed_method)
         else:
             raise ValueError(f"Unknown {embed_method=}")
@@ -144,14 +144,15 @@ def annotate_top_points(row: pd.Series) -> dict[str, Any] | None:
         text = f"{comp_str}<br>{prop_val}"
         return dict(text=text, font_size=11, bgcolor="rgba(240, 240, 240, 0.5)")
 
-    if "embeddings" not in df_plot:
-        df_plot["embeddings"] = [embeddings_dict.get(comp) for comp in compositions]
+    embed_col = "embeddings"
+    if embed_col not in df_plot:
+        df_plot[embed_col] = [embeddings_dict.get(comp) for comp in compositions]
 
     fig = pmv.cluster_compositions(
         df_in=df_plot,
         composition_col="composition",
         prop_name=target_label,
-        embedding_method="embeddings",
+        embedding_method=embed_col,
         projection=projection,
         n_components=n_components,
         marker_size=8,
@@ -185,68 +186,72 @@ def annotate_top_points(row: pd.Series) -> dict[str, Any] | None:
     return fig
 
 
-mb_jdft2d = (
+mb_jdft2d: tuple[str, str, str, str] = (
     "matbench_jdft2d",
     "exfoliation_en",
     "Exfoliation Energy (meV/atom)",
     "E<sub>ex</sub>",
 )
-mb_steels = (
+mb_steels: tuple[str, str, str, str] = (
     "matbench_steels",
     "yield strength",
     "Yield Strength (MPa)",
     "σ",
 )
-mb_dielectric = (
+mb_dielectric: tuple[str, str, str, str] = (
     "matbench_dielectric",
     "n",
     "Refractive index",
     "n",
 )
-mb_perovskites = (
+mb_perovskites: tuple[str, str, str, str] = (
     "matbench_perovskites",
     "e_form",
     "Formation energy (eV/atom)",
     "E<sub>f</sub>",
 )
-mb_phonons = (
+mb_phonons: tuple[str, str, str, str] = (
     "matbench_phonons",
     "last phdos peak",
     "Max Phonon Peak (cm⁻¹)",
     "ν<sub>max</sub>",
 )
-mb_bulk_modulus = (
+mb_bulk_modulus: tuple[str, str, str, str] = (
     "matbench_log_kvrh",
     "log10(K_VRH)",
     "Bulk Modulus (GPa)",
     "K<sub>VRH</sub>",
 )
 plot_combinations: list[  # type: ignore[invalid-assignment]
-    tuple[
-        str, str, str, str, pcc.EmbeddingMethod, ProjectionMethod, int, dict[str, Any]
-    ]
+    tuple[str, str, str, str, Embed, Project, int, dict[str, Any]]
 ] = [
     # 1. Steels with PCA (2D) - shows clear linear trends
-    (*mb_steels, "magpie", "pca", 2, dict(x=0.01, xanchor="left")),
+    (*mb_steels, Embed.magpie, Project.pca, 2, dict(x=0.01, xanchor="left")),
     # 2. Steels with t-SNE (2D) - shows non-linear clustering
-    (*mb_steels, "magpie", "tsne", 2, dict(x=0.01, xanchor="left")),
+    (*mb_steels, Embed.magpie, Project.tsne, 2, dict(x=0.01, xanchor="left")),
     # TODO umap-learn seemingly not installed by uv run in CI, fix later
     # 3. JDFT2D with UMAP (2D) - shows modern non-linear projection
-    # (*mb_jdft2d, "magpie", "umap", 2, dict(x=0.01, xanchor="left")),
+    # (*mb_jdft2d, Embed.magpie, Project.umap, 2, dict(x=0.01, xanchor="left")),
     # 4. JDFT2D with one-hot encoding and PCA (3D) - shows raw element relationships
-    (*mb_jdft2d, "one-hot", "pca", 3, dict()),
+    (*mb_jdft2d, Embed.one_hot, Project.pca, 3, dict()),
     # 5. Steels with Matscholar embedding and t-SNE (3D) - shows advanced embedding
-    (*mb_steels, "matscholar_el", "tsne", 3, dict(x=0.5, y=0.8)),
+    (*mb_steels, Embed.matscholar_el, Project.tsne, 3, dict(x=0.5, y=0.8)),
     # 6. Dielectric with PCA (2D) - shows clear linear trends
-    (*mb_dielectric, "magpie", "pca", 2, dict(x=0.01, xanchor="left")),
+    (*mb_dielectric, Embed.magpie, Project.pca, 2, dict(x=0.01, xanchor="left")),
     # 7. Perovskites with PCA (2D) - shows clear linear trends
-    (*mb_perovskites, "magpie", "pca", 2, dict(x=0.01, xanchor="left")),
+    (*mb_perovskites, Embed.magpie, Project.pca, 2, dict(x=0.01, xanchor="left")),
     # 8. Phonons with PCA (2D) - shows clear linear trends
-    (*mb_phonons, "magpie", "pca", 2, dict(x=0.01, xanchor="left")),
+    (*mb_phonons, Embed.magpie, Project.pca, 2, dict(x=0.01, xanchor="left")),
     # 9. Bulk Modulus with PCA (2D) - shows clear linear trends
-    (*mb_bulk_modulus, "magpie", "pca", 2, dict(x=0.99, y=0.96, yanchor="top")),
+    (
+        *mb_bulk_modulus,
+        Embed.magpie,
+        Project.pca,
+        2,
+        dict(x=0.99, y=0.96, yanchor="top"),
+    ),
     # 10. Perovskites with t-SNE (3D) - shows raw element relationships
-    (*mb_perovskites, "magpie", "tsne", 3, dict()),
+    (*mb_perovskites, Embed.magpie, Project.tsne, 3, dict()),
 ]
 
 for (
 
@@ -31,7 +31,7 @@
     cbar_title = f"Periodic Table Heatmap with {n_splits}-fold split"
     fig = pmv.ptable_heatmap_splits_plotly(
         data=data_dict,
-        orientation=orientation,  # type: ignore[arg-type]
+        orientation=orientation,
         colorscale="RdYlBu",  # Single colorscale will be used for all splits
         colorbar=dict(title=cbar_title),
     )
@@ -44,7 +44,7 @@
     ]
     fig = pmv.ptable_heatmap_splits_plotly(
         data=data_dict,
-        orientation=orientation,  # type: ignore[arg-type]
+        orientation=orientation,
         colorscale=colorscales,
         colorbar=colorbars,
     )
@@ -63,7 +63,7 @@
     ]
     fig = pmv.ptable_heatmap_splits_plotly(
         data=data_dict,
-        orientation=orientation,  # type: ignore[arg-type]
+        orientation=orientation,
         colorscale=sequential_colors,
         colorbar=colorbars,
     )
 
@@ -70,10 +70,10 @@
     ),
 ]:
     fig = pmv.ptable_scatter_plotly(
-        elem_data_dict,  # type: ignore[arg-type]
+        elem_data_dict,
         mode=mode,  # type: ignore[arg-type]
-        line_kwargs=line_kwargs,  # type: ignore[arg-type]
-        color_elem_strategy=color_strategy,  # type: ignore[arg-type]
+        line_kwargs=line_kwargs,
+        color_elem_strategy=color_strategy,
         scale=1.2,
         marker_kwargs=marker_kwargs,
         symbol_kwargs=symbol_kwargs,
 
@@ -238,7 +238,10 @@ def update_readme(
     """Update readme with papers sorted by citations and year."""
     sorted_papers = sorted(
         papers,
-        key=lambda p: (-p["citations"], -p.get("year") or float("inf")),  # type: ignore[operator]
+        key=lambda p: (
+            -p["citations"],
+            float("inf") if p.get("year") is None else -p["year"],
+        ),
     )
 
     with open(readme_path, encoding="utf-8") as file:
 
@@ -54,7 +54,7 @@
 }.items():
     fig = pmv.py_pkg_treemap(
         "pymatviz",
-        group_by=group_by,  # type: ignore[arg-type]
+        group_by=group_by,
         show_counts="value",
         color_discrete_sequence=clr_scheme,
         # Only include files with at least 20 lines
@@ -124,7 +124,7 @@ def custom_module_formatter(module: str, count: int, _total: int) -> str:
 coverage_data_file = "https://github.com/user-attachments/files/21545087/2025-07-31-pymatgen-coverage.json"
 # coverage_data_file = f"{pmv.ROOT}/tmp/2025-07-31-pymatgen-coverage.json"
 
-fig_coverage_range = pmv.py_pkg_treemap(
+fig_cvrg_range = pmv.py_pkg_treemap(
     pymatgen,
     color_by="coverage",
     coverage_data_file=coverage_data_file,  # Use existing coverage data
@@ -133,7 +133,8 @@ def custom_module_formatter(module: str, count: int, _total: int) -> str:
     show_counts="value",
     color_continuous_scale="RdYlGn",  # Red-Yellow-Green scale for coverage
 )
-title_range = "pymatgen: Coverage Heatmap with Manual Range (0-100%)"
-fig_coverage_range.layout.title.update(text=title_range, x=0.5, y=0.97, font_size=18)
-fig_coverage_range.show()
-pmv.io.save_and_compress_svg(fig_coverage_range, "py-pkg-treemap-pymatgen-coverage")
+# title_range = "pymatgen: Coverage Heatmap with Manual Range (0-100%)"
+# fig_cvrg_range.layout.title.update(text=title_range, x=0.5, y=0.97, font_size=18)
+fig_cvrg_range.layout.margin = dict(l=0, r=0, b=0, t=0)
+fig_cvrg_range.show()
+pmv.io.save_and_compress_svg(fig_cvrg_range, "py-pkg-treemap-pymatgen-coverage")
@@ -300,9 +300,9 @@ def brillouin_zone_3d(
         if subplot_title is not False:
             title_func = subplot_title or get_subplot_title
             if title_func is get_subplot_title:
-                anno = title_func(structure, struct_key, idx, subplot_title)  # type: ignore[call-arg]
+                anno = title_func(structure, struct_key, idx, subplot_title)
             else:
-                anno = title_func(structure, struct_key)  # type: ignore[call-arg]
+                anno = title_func(structure, struct_key)
                 if not isinstance(anno, (str, dict)):
                     raise TypeError("Subplot title must be a string or dict")
                 if isinstance(anno, str):
 
@@ -10,8 +10,8 @@
 def confusion_matrix(
     conf_mat: Sequence[Sequence[int]] | np.ndarray | None = None,
     *,
-    y_true: Sequence[str | int] | None = None,
-    y_pred: Sequence[str | int] | None = None,
+    y_true: Sequence[str | int] | np.ndarray | None = None,
+    y_pred: Sequence[str | int] | np.ndarray | None = None,
     x_labels: tuple[str, ...] | None = None,
     y_labels: tuple[str, ...] | None = None,
     annotations: Sequence[Sequence[str]]
@@ -227,9 +227,9 @@ def confusion_matrix(
             font_size=22,
         ) | (metrics_kwargs or {})
         fig.add_annotation(**metrics_defaults)
-        if metrics_defaults.get("y", 0) >= 1:  # type: ignore[operator]
+        if metrics_defaults.get("y", 0) >= 1:
             fig.layout.margin.t = 60
-        if metrics_defaults.get("y", 0) <= 0:  # type: ignore[operator]
+        if metrics_defaults.get("y", 0) <= 0:
             fig.layout.margin.b = 60
 
     # Update axes formatting
 
@@ -44,7 +44,13 @@ def _standardize_input(
                 f"when passing a DataFrame, targets must be a column name, got "
                 f"{type(targets).__name__}"
             )
-        targets, probs_positive = df_to_arrays(df, targets, probs_positive)  # type: ignore[arg-type]
+        if isinstance(probs_positive, dict):
+            raise TypeError(
+                f"when passing a DataFrame, probs_positive must be a column name "
+                f"(str) or array, not dict. Got {type(probs_positive).__name__}. "
+                f"Pass df=None to use dict of predictions."
+            )
+        targets, probs_positive = df_to_arrays(df, targets, probs_positive)
 
     if isinstance(probs_positive, dict):
         # Convert array values to dicts if needed
 
@@ -7,6 +7,7 @@
 from pymatviz.cluster.composition.embed import matminer_featurize, one_hot_encode
 from pymatviz.cluster.composition.plot import (
     EmbeddingMethod,
+    ProjectionCallable,
     ProjectionMethod,
     cluster_compositions,
 )
Original file line number	Diff line number	Diff line change
`@@ -31,7 +31,7 @@`
`31`	`31`	`cbar_title = f"Periodic Table Heatmap with {n_splits}-fold split"`
`32`	`32`	`fig = pmv.ptable_heatmap_splits_plotly(`
`33`	`33`	`data=data_dict,`
`34`		`- orientation=orientation, # type: ignore[arg-type]`
	`34`	`+ orientation=orientation,`
`35`	`35`	`colorscale="RdYlBu", # Single colorscale will be used for all splits`
`36`	`36`	`colorbar=dict(title=cbar_title),`
`37`	`37`	`)`
`@@ -44,7 +44,7 @@`
`44`	`44`	`]`
`45`	`45`	`fig = pmv.ptable_heatmap_splits_plotly(`
`46`	`46`	`data=data_dict,`
`47`		`- orientation=orientation, # type: ignore[arg-type]`
	`47`	`+ orientation=orientation,`
`48`	`48`	`colorscale=colorscales,`
`49`	`49`	`colorbar=colorbars,`
`50`	`50`	`)`
`@@ -63,7 +63,7 @@`
`63`	`63`	`]`
`64`	`64`	`fig = pmv.ptable_heatmap_splits_plotly(`
`65`	`65`	`data=data_dict,`
`66`		`- orientation=orientation, # type: ignore[arg-type]`
	`66`	`+ orientation=orientation,`
`67`	`67`	`colorscale=sequential_colors,`
`68`	`68`	`colorbar=colorbars,`
`69`	`69`	`)`
Original file line number	Diff line number	Diff line change
`@@ -7,6 +7,7 @@`
`7`	`7`	`from pymatviz.cluster.composition.embed import matminer_featurize, one_hot_encode`
`8`	`8`	`from pymatviz.cluster.composition.plot import (`
`9`	`9`	`EmbeddingMethod,`
	`10`	`+ ProjectionCallable,`
`10`	`11`	`ProjectionMethod,`
`11`	`12`	`cluster_compositions,`
`12`	`13`	`)`