Merge pull request #5460 from plotly/fix-docs-build

LiamConnors · web-flow · commit 7c3774961d85 · 2026-01-08T09:57:59.000-05:00
Fix failing docs build
diff --git a/doc/python/dendrogram.md b/doc/python/dendrogram.md
@@ -79,7 +79,7 @@ fig.show()
 
 #### Plot a Dendrogram with a Heatmap
 
-See also the [Dash Bio demo](https://dash-bio.plotly.host/dash-clustergram/).
+This example uses randomly generated sample data to demonstrate how to plot a dendrogram with a heatmap.
 
 ```python
 import plotly.graph_objects as go
@@ -89,12 +89,11 @@ import numpy as np
 from scipy.spatial.distance import pdist, squareform
 
 
-# get data
-data = np.genfromtxt("http://files.figshare.com/2133304/ExpRawData_E_TABM_84_A_AFFY_44.tab",
-                     names=True,usecols=tuple(range(1,30)),dtype=float, delimiter="\t")
-data_array = data.view((float, len(data.dtype.names)))
-data_array = data_array.transpose()
-labels = data.dtype.names
+# Generate sample data
+np.random.seed(1)
+X = np.random.rand(15, 15)
+labels = [f'Sample_{i}' for i in range(15)]
+data_array = X
 
 # Initialize figure by creating upper dendrogram
 fig = ff.create_dendrogram(data_array, orientation='bottom', labels=labels)
diff --git a/doc/python/ml-pca.md b/doc/python/ml-pca.md
@@ -105,16 +105,16 @@ fig.show()
 
 When you will have too many features to visualize, you might be interested in only visualizing the most relevant components. Those components often capture a majority of the [explained variance](https://en.wikipedia.org/wiki/Explained_variation), which is a good way to tell if those components are sufficient for modelling this dataset.
 
-In the example below, our dataset contains 8 features, but we only select the first 2 components.
+In the example below, our dataset contains 10 features, but we only select the first 2 components.
 
 ```python
 import pandas as pd
 import plotly.express as px
 from sklearn.decomposition import PCA
-from sklearn.datasets import fetch_california_housing
+from sklearn.datasets import load_diabetes
 
-housing = fetch_california_housing(as_frame=True)
-df = housing.data
+diabetes = load_diabetes()
+df = pd.DataFrame(diabetes.data, columns=diabetes.feature_names)
 n_components = 2
 
 pca = PCA(n_components=n_components)
@@ -123,11 +123,11 @@ components = pca.fit_transform(df)
 total_var = pca.explained_variance_ratio_.sum() * 100
 
 labels = {str(i): f"PC {i+1}" for i in range(n_components)}
-labels['color'] = 'Median Price'
+labels['color'] = 'Disease Progression'
 
 fig = px.scatter_matrix(
     components,
-    color=housing.target,
+    color=diabetes.target,
     dimensions=range(n_components),
     labels=labels,
     title=f'Total Explained Variance: {total_var:.2f}%',