Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -390,5 +390,4 @@ def get_heatmap_df(agg_df: pd.DataFrame, xbins: int, nprocs: int) -> pd.DataFram
cats = cats.mul(agg_df["length"], axis=0)
cats.index = agg_df["rank"]
hmap_df = cats.groupby("rank").sum()
hmap_df = hmap_df.reindex(index=range(nprocs), fill_value=0.0)
return hmap_df
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ def get_x_axis_tick_labels(
return x_ticklabels


def get_y_axis_ticks(ax: Any, n_ylabels: int = 6) -> npt.NDArray[np.float64]:
def get_y_axis_ticks(ax: Any, n_procs: Optional[int] = None, n_ylabels: int = 6) -> npt.NDArray[np.float64]:
"""
Creates the y-axis tick mark locations.

Expand All @@ -150,7 +150,10 @@ def get_y_axis_ticks(ax: Any, n_ylabels: int = 6) -> npt.NDArray[np.float64]:

"""
# get the original y-axis tick locations
initial_yticks = ax.get_yticks()
if n_procs is None:
initial_yticks = ax.get_yticks()
else:
initial_yticks = np.arange(0, n_procs) + 0.5
if len(initial_yticks) < n_ylabels:
# if there are less tick marks available than requested,
# use the original tick mark locations
Expand Down Expand Up @@ -246,7 +249,7 @@ def set_x_axis_ticks_and_labels(
jointgrid.ax_joint.set_xticklabels(xticklabels, minor=False)


def set_y_axis_ticks_and_labels(jointgrid: Any, n_ylabels: int = 6):
def set_y_axis_ticks_and_labels(jointgrid: Any, n_procs: int, n_ylabels: int = 6):
"""
Sets the y-axis tick mark locations and labels.

Expand All @@ -257,9 +260,8 @@ def set_y_axis_ticks_and_labels(jointgrid: Any, n_ylabels: int = 6):
n_ylabels: The number of y-axis tick mark labels to create. Default is 6.

"""
# retrieve the y-axis tick mark locations and labels
yticks = get_y_axis_ticks(ax=jointgrid.ax_joint, n_ylabels=n_ylabels)
yticklabels = get_y_axis_tick_labels(ax=jointgrid.ax_joint, n_ylabels=n_ylabels)
yticks = get_y_axis_ticks(ax=jointgrid.ax_joint, n_procs=n_procs, n_ylabels=n_ylabels)
yticklabels = yticks - 0.5
# set the new y-axis tick locations and labels
jointgrid.ax_joint.set_yticks(yticks)
jointgrid.ax_joint.set_yticklabels(yticklabels, minor=False)
Expand Down Expand Up @@ -383,7 +385,6 @@ def plot_heatmap(
hmap_df = report.heatmaps[submodule].to_df(ops=ops)
# mirror the DXT approach to heatmaps by
# adding all-zero rows for inactive ranks
hmap_df = hmap_df.reindex(index=range(nprocs), fill_value=0.0)
xbins = hmap_df.shape[1]

# build the joint plot with marginal histograms
Expand All @@ -397,14 +398,14 @@ def plot_heatmap(
colorbar_kws = {"label": colorbar_label}
# create the heatmap object using the heatmap data,
# and assign it to the jointplot main figure
hmap = sns.heatmap(
hmap_df,
ax=jgrid.ax_joint,
# choose a color map that is not white at any value
cmap="YlOrRd",
norm=LogNorm(),
cbar_kws=colorbar_kws,
)

x, y = np.meshgrid(np.arange(xbins),
np.asarray(hmap_df.index))
# x and y both have shape (active_ranks, xbins)
# rather than (nprocs, xbins)
hmap = jgrid.ax_joint.scatter(x, y, c=hmap_df, cmap="YlOrRd", norm=LogNorm(), marker="s")
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Either for my own reference, or if i.e., a student comes back to look at this later--one idea might be a custom rectangle/marker if we could control the dimensions with sufficient granularity. I believe that since the number of ranks and bin widths are fixed for a given plot, we should only have to do the calculation once and then be able to reuse that for each "scatter marker."

Maybe something similar to: https://stackoverflow.com/a/58552620/2942522

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we can sort that out, I think it is worth it, since we potentially eliminate thousands of float64 0s, in case that is not clear.

jgrid.ax_joint.set_ylim(0, nprocs)
jgrid.fig.colorbar(hmap, ax=jgrid.ax_joint, orientation='vertical')

# add text for x-axis bin count
xbin_label = f"Time bins: {xbins}"
Expand All @@ -419,14 +420,14 @@ def plot_heatmap(
)

# make the heatmap border visible
for _, spine in hmap.spines.items():
for _, spine in jgrid.ax_joint.spines.items():
spine.set_visible(True)

# if there is more than 1 process,
# create the horizontal bar graph
if nprocs > 1:
jgrid.ax_marg_y.barh(
y=np.arange(nprocs),
y=hmap_df.index,
width=hmap_df.sum(axis=1),
align="edge",
facecolor="black",
Expand All @@ -450,7 +451,7 @@ def plot_heatmap(
jgrid.ax_joint.set_xlim(0.0, xbin_max)
# set the x and y tick locations and labels using the runtime
set_x_axis_ticks_and_labels(jointgrid=jgrid, tmax=runtime, bin_max=xbin_max, n_xlabels=4)
set_y_axis_ticks_and_labels(jointgrid=jgrid, n_ylabels=6)
set_y_axis_ticks_and_labels(jointgrid=jgrid, n_procs=nprocs, n_ylabels=6)

# cleanup the marginal bar graph ticks and tick labels
remove_marginal_graph_ticks_and_labels(
Expand All @@ -470,9 +471,6 @@ def plot_heatmap(
# so set the subplot dimensions to fill the space
adjust_for_colorbar(jointgrid=jgrid, fig_right=0.92, cbar_x0=0.82)

# invert the y-axis so rank values are increasing
jgrid.ax_joint.invert_yaxis()

# set the axis labels
jgrid.ax_joint.set_xlabel("Time (s)")
jgrid.ax_joint.set_ylabel("Rank")
Expand Down
9 changes: 2 additions & 7 deletions darshan-util/pydarshan/darshan/tests/test_heatmap_handling.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,24 +332,19 @@ def test_get_aggregate_data(log_file, expected_agg_data, mod, ops):
"sample-dxt-simple.darshan",
1,
["read", "write"],
np.array([[4040, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0 , 0 ,0]]).reshape(16, 1),
np.array([[4040]]),
),
(
"sample-dxt-simple.darshan",
4,
["read", "write"],
np.vstack((
np.array([[0, 0, 0, 4040]]),
np.zeros((15, 4)))),
),
(
"sample-dxt-simple.darshan",
10,
["read", "write"],
np.vstack((
np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 4040]]),
np.zeros((15, 10)))),
),
# `dxt.darshan` is complex enough to warrant changing the
# selected operations
Expand Down Expand Up @@ -571,7 +566,7 @@ def test_get_heatmap_df(
# check the data is conserved
assert actual_hmap_data.values.sum() == 4040
# make sure the output array is the correct shape
assert actual_hmap_data.shape == (16, xbins)
assert actual_hmap_data.shape == (1, xbins)
# make sure the output data contains identical values
assert_allclose(actual_hmap_data.values, expected_hmap_data)

Expand Down
8 changes: 5 additions & 3 deletions darshan-util/pydarshan/darshan/tests/test_plot_dxt_heatmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,11 +266,13 @@ def test_set_y_axis_ticks_and_labels(

# generate a joint plot object, then add the heatmap to it
jointgrid = sns.jointplot(kind="hist", bins=[xbins, nprocs])
sns.heatmap(data, ax=jointgrid.ax_joint)
x, y = np.meshgrid(np.arange(xbins),
np.asarray(data.index))
jointgrid.ax_joint.scatter(x, y, c=data)

# set the x-axis ticks and tick labels
# set the y-axis ticks and tick labels
plot_dxt_heatmap.set_y_axis_ticks_and_labels(
jointgrid=jointgrid, n_ylabels=n_ylabels
jointgrid=jointgrid, n_ylabels=n_ylabels, n_procs=nprocs,
)

# collect the actual x-axis tick labels
Expand Down