Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
83a15bb
changes in functions generate_samples and generate_counts to allow ge…
ennanco Feb 24, 2022
5dab608
added unitary tests for generate_samples and generate_counts funtions
ennanco Feb 28, 2022
3eb9c65
Repaired problems with some tests
ennanco Mar 2, 2022
e40d6b9
Repaired several examples due to the inclussion of the new generate_s…
ennanco Mar 2, 2022
7f5f918
Change the string format to make it compatible with Python v2
ennanco Mar 2, 2022
e0d9df0
Adding compatibility in generate_samples for python v2
ennanco Mar 2, 2022
4e18668
Adding adaptations to made it retrocompatible with the examples
ennanco Mar 19, 2022
b0c9c7b
Fixing style
ennanco Mar 19, 2022
3374d0d
Fixing test_data.py according to python style sheet
ennanco Mar 21, 2022
9c546e0
Fixing indentation
ennanco Mar 21, 2022
7805d3f
Fixing indentation
ennanco Mar 21, 2022
4cab536
Fixing doctring in generete_counts and changing generate_samples for …
ennanco Mar 22, 2022
b1d0ec6
Fixing spacing style in some comments
ennanco Mar 22, 2022
f64fb19
Adding unitary test for generate_data
ennanco Mar 22, 2022
5443acb
Adding unitary test for generate_data
ennanco Mar 22, 2022
5731bc6
Adding unitary test for generate_data
ennanco Mar 22, 2022
ede49b5
Merge branch 'jnothman:master' into master
ennanco Jun 20, 2022
062e337
Update upsetplot/data.py
ennanco Jan 2, 2023
3d884c4
Update upsetplot/data.py
ennanco Jan 2, 2023
b000f15
Update upsetplot/data.py
ennanco Jan 2, 2023
684be8c
Update upsetplot/data.py
ennanco Jan 2, 2023
ce55bd0
Update examples/plot_vertical.py
ennanco Jan 2, 2023
35ef9bf
Merge branch 'jnothman:master' into master
ennanco Jan 3, 2023
746f679
Merge branch 'jnothman:master' into master
ennanco Jan 11, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions examples/plot_vertical.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,16 @@
plot(example, orientation='vertical', show_counts='%d', show_percentages=True)
plt.suptitle('With counts and percentages shown')
plt.show()

#########################################################################

from upsetplot import plotting

# An UpSetplot with additional plots on vertical and tuning some visual parameters
fig = plotting.UpSet(example, orientation='vertical', show_counts=True, facecolor="grey", element_size=75)
fig.add_catplot('swarm', 'value', palette='colorblind')
fig.add_catplot('swarm', 'value1', palette='colorblind')
fig.add_catplot('swarm', 'value2', palette='colorblind')
fig.plot()
plt.show()

25 changes: 15 additions & 10 deletions upsetplot/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import numpy as np


def generate_samples(seed=0, n_samples=10000, n_categories=3):
def generate_samples(seed=0, n_samples=10000, n_categories=3, len_samples=1):
"""Generate artificial samples assigned to set intersections

Parameters
Expand All @@ -25,6 +25,7 @@ def generate_samples(seed=0, n_samples=10000, n_categories=3):
DataFrame
Field 'value' is a weight or score for each element.
Field 'index' is a unique id for each element.
Field(s) 'value{i}' additional values for multiple-feature samples
Index includes a boolean indicator mask for each category.

Note: Further fields may be added in future versions.
Expand All @@ -35,18 +36,21 @@ def generate_samples(seed=0, n_samples=10000, n_categories=3):
corresponding to these samples.
"""
rng = np.random.RandomState(seed)
df = pd.DataFrame({'value': np.zeros(n_samples)})
df = pd.DataFrame(np.zeros((n_samples, len_samples)))
valuename_lst = [f'value{i}' if i >0 else 'value' for i in range(len_samples)]
df.columns = valuename_lst

for i in range(n_categories):
r = rng.rand(n_samples)
df['cat%d' % i] = r > rng.rand()
df['value'] += r
r = rng.rand(n_samples, len_samples)
df[f'cat{i}'] = r[:,0] > rng.rand()
df[valuename_lst] += r

df.reset_index(inplace=True)
df.set_index(['cat%d' % i for i in range(n_categories)], inplace=True)
df.set_index([f'cat{i}' for i in range(n_categories)], inplace=True)
return df


def generate_counts(seed=0, n_samples=10000, n_categories=3):
def generate_counts(seed=0, n_samples=10000, n_categories=3, len_samples=1):
"""Generate artificial counts corresponding to set intersections

Parameters
Expand All @@ -69,8 +73,9 @@ def generate_counts(seed=0, n_samples=10000, n_categories=3):
derived from.
"""
df = generate_samples(seed=seed, n_samples=n_samples,
n_categories=n_categories)
return df.value.groupby(level=list(range(n_categories))).count()
n_categories=n_categories, len_samples=len_samples)
df.drop('index', axis=1, inplace=True)
return df.groupby(level=list(range(n_categories))).count()
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think counting is meaningful for the extra columns. Maybe we should use a different aggregate?

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or maybe we shouldn't offer this functionality in generate_counts, making things somewhat simpler.



def generate_data(seed=0, n_samples=10000, n_sets=3, aggregated=False):
Expand All @@ -79,7 +84,7 @@ def generate_data(seed=0, n_samples=10000, n_sets=3, aggregated=False):
DeprecationWarning)
if aggregated:
return generate_counts(seed=seed, n_samples=n_samples,
n_categories=n_sets)
n_categories=n_sets)['value']
else:
return generate_samples(seed=seed, n_samples=n_samples,
n_categories=n_sets)['value']
Expand Down