diff --git a/doc/python/dendrogram.md b/doc/python/dendrogram.md index 0fe8968f20..994ac0e408 100644 --- a/doc/python/dendrogram.md +++ b/doc/python/dendrogram.md @@ -79,7 +79,7 @@ fig.show() #### Plot a Dendrogram with a Heatmap -See also the [Dash Bio demo](https://dash-bio.plotly.host/dash-clustergram/). +This example uses randomly generated sample data to demonstrate how to plot a dendrogram with a heatmap. ```python import plotly.graph_objects as go @@ -89,12 +89,11 @@ import numpy as np from scipy.spatial.distance import pdist, squareform -# get data -data = np.genfromtxt("http://files.figshare.com/2133304/ExpRawData_E_TABM_84_A_AFFY_44.tab", - names=True,usecols=tuple(range(1,30)),dtype=float, delimiter="\t") -data_array = data.view((float, len(data.dtype.names))) -data_array = data_array.transpose() -labels = data.dtype.names +# Generate sample data +np.random.seed(1) +X = np.random.rand(15, 15) +labels = [f'Sample_{i}' for i in range(15)] +data_array = X # Initialize figure by creating upper dendrogram fig = ff.create_dendrogram(data_array, orientation='bottom', labels=labels) diff --git a/doc/python/ml-pca.md b/doc/python/ml-pca.md index 1776d3be39..fc96eaf7aa 100644 --- a/doc/python/ml-pca.md +++ b/doc/python/ml-pca.md @@ -105,16 +105,16 @@ fig.show() When you will have too many features to visualize, you might be interested in only visualizing the most relevant components. Those components often capture a majority of the [explained variance](https://en.wikipedia.org/wiki/Explained_variation), which is a good way to tell if those components are sufficient for modelling this dataset. -In the example below, our dataset contains 8 features, but we only select the first 2 components. +In the example below, our dataset contains 10 features, but we only select the first 2 components. ```python import pandas as pd import plotly.express as px from sklearn.decomposition import PCA -from sklearn.datasets import fetch_california_housing +from sklearn.datasets import load_diabetes -housing = fetch_california_housing(as_frame=True) -df = housing.data +diabetes = load_diabetes() +df = pd.DataFrame(diabetes.data, columns=diabetes.feature_names) n_components = 2 pca = PCA(n_components=n_components) @@ -123,11 +123,11 @@ components = pca.fit_transform(df) total_var = pca.explained_variance_ratio_.sum() * 100 labels = {str(i): f"PC {i+1}" for i in range(n_components)} -labels['color'] = 'Median Price' +labels['color'] = 'Disease Progression' fig = px.scatter_matrix( components, - color=housing.target, + color=diabetes.target, dimensions=range(n_components), labels=labels, title=f'Total Explained Variance: {total_var:.2f}%',