Skip to content

Commit ca7acfa

Browse files
committed
adding example of plotting
1 parent c8e1455 commit ca7acfa

File tree

7 files changed

+67
-131
lines changed

7 files changed

+67
-131
lines changed

examples/package_tree/calculate_similarity.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
###############################################################################
3333

3434
# Option 1: Get a family manually
35-
package_set1 = get_packages(family='docker-os')
35+
package_set1 = get_packages(family='docker-library')
3636

3737
# Option 2: Specify your own package directory (arg is packages=packages)
3838
package_directory = '%s/examples/package_image/packages' %(base)
@@ -46,7 +46,7 @@
4646
###############################################################################
4747

4848
# Option 1: specify another (same or different) family of packages
49-
package_set2 = get_packages(family='docker-library')
49+
package_set2 = get_packages(family='docker-os')
5050

5151
# Option 2: Same as above
5252
# Option 3: Don't specify any packages, use defaults
126 KB
Loading
Lines changed: 5 additions & 122 deletions
Original file line numberDiff line numberDiff line change
@@ -1,127 +1,10 @@
1-
from scipy.spatial.distance import pdist, squareform
2-
from plotly.tools import FigureFactory as FF
3-
from plotly.graph_objs import *
4-
import plotly.plotly as py
5-
from glob import glob
6-
import numpy as np
7-
import pickle
1+
# A quick example of making a package tree with data derived from calculate_similarity.py
82

9-
base = '/home/vanessa/Documents/Dropbox/Code/singularity/singularity-python'
10-
analysis_directory = "%s/examples/package_tree" %(base)
11-
12-
13-
# From https://plot.ly/python/dendrogram/, thanks plotly! :)
3+
from singularity.views.trees import make_package_tree
144

155
data = pickle.load(open('comparisons.pkl','rb'))['files.txt']
16-
17-
#lookup = {x:x.split("-")[0] for x in data.index.tolist()}
18-
# labels = [lookup[x] for x in data.index.tolist()]
19-
20-
labels = data.index.tolist()
21-
# Initialize figure by creating upper dendrogram
22-
figure = FF.create_dendrogram(data, orientation='bottom', labels=labels)
23-
for i in range(len(figure['data'])):
24-
figure['data'][i]['yaxis'] = 'y2'
25-
26-
# Create Side Dendrogram
27-
dendro_side = FF.create_dendrogram(data, orientation='right')
28-
for i in range(len(dendro_side['data'])):
29-
dendro_side['data'][i]['xaxis'] = 'x2'
30-
31-
# Add Side Dendrogram Data to Figure
32-
figure['data'].extend(dendro_side['data'])
33-
34-
# Create Heatmap
35-
dendro_leaves = dendro_side['layout']['yaxis']['ticktext']
36-
dendro_leaves = list(map(int, dendro_leaves))
37-
data_dist = pdist(data)
38-
heat_data = squareform(data_dist)
39-
heat_data = heat_data[dendro_leaves,:]
40-
heat_data = heat_data[:,dendro_leaves]
41-
42-
heatmap = Data([
43-
Heatmap(
44-
x = dendro_leaves,
45-
y = dendro_leaves,
46-
z = heat_data,
47-
colorscale = 'YIGnBu'
48-
)
49-
])
50-
51-
heatmap[0]['x'] = figure['layout']['xaxis']['tickvals']
52-
heatmap[0]['y'] = dendro_side['layout']['yaxis']['tickvals']
53-
54-
# Add Heatmap Data to Figure
55-
figure['data'].extend(Data(heatmap))
56-
57-
# Edit Layout
58-
figure['layout'].update({'width':800, 'height':800,
59-
'showlegend':False, 'hovermode': 'closest',
60-
})
61-
62-
# Edit xaxis
63-
figure['layout']['xaxis'].update({'domain': [.15, 1],
64-
'mirror': False,
65-
'showgrid': False,
66-
'showline': False,
67-
'zeroline': False,
68-
'ticks':""})
69-
# Edit xaxis2
70-
figure['layout'].update({'xaxis2': {'domain': [0, .15],
71-
'mirror': False,
72-
'showgrid': False,
73-
'showline': False,
74-
'zeroline': False,
75-
'showticklabels': False,
76-
'ticks':""}})
77-
78-
# Edit yaxis
79-
figure['layout']['yaxis'].update({'domain': [0, .85],
80-
'mirror': False,
81-
'showgrid': False,
82-
'showline': False,
83-
'zeroline': False,
84-
'showticklabels': False,
85-
'ticks': ""})
86-
# Edit yaxis2
87-
figure['layout'].update({'yaxis2':{'domain':[.825, .975],
88-
'mirror': False,
89-
'showgrid': False,
90-
'showline': False,
91-
'zeroline': False,
92-
'showticklabels': False,
93-
'ticks':""}})
94-
95-
# Plot!
96-
py.iplot(figure, filename='dendrogram_with_heatmap')
97-
98-
99-
# AND TREE
100-
from matplotlib import pyplot as plt
101-
from scipy.cluster.hierarchy import dendrogram, linkage
102-
import numpy as np
103-
Z = linkage(data, 'ward')
104-
105-
from scipy.cluster.hierarchy import cophenet
106-
from scipy.spatial.distance import pdist
107-
108-
c, coph_dists = cophenet(Z, pdist(data))
109-
c
110-
111-
112-
plt.figure(figsize=(10, 8))
113-
plt.scatter(Z[:,0], Z[:,1]) # plot all points
6+
plt = make_package_tree(matrix=data)
1147
plt.show()
1158

116-
labels = [x.replace(':latest','') for x in data.index.tolist()]
117-
plt.figure(figsize=(25, 10))
118-
plt.title('Docker Library Similarity')
119-
plt.xlabel('image index')
120-
plt.ylabel('distance')
121-
dendrogram(
122-
Z,
123-
leaf_rotation=90., # rotates the x axis labels
124-
leaf_font_size=8., # font size for the x axis labels
125-
labels=labels
126-
)
127-
plt.show()
9+
# or save to file
10+
plt.savefig('examples/package_tree/docker-library.png')

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ pandas
55
requests
66
selenium
77
simplejson
8+
scikit-learn
89
pygments
910
oauth2client
1011
google-api-python-client

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
description="Command line tool for working with singularity-hub and packaging singularity containers.",
2828
keywords='singularity containers hub reproducibility package science',
2929

30-
install_requires = ['Flask','gitpython','flask-restful','selenium','simplejson','pygments',
30+
install_requires = ['Flask','gitpython','flask-restful','selenium','simplejson','scikit-learn','pygments',
3131
'requests','oauth2client','google-api-python-client','pandas'],
3232

3333
entry_points = {

singularity/analysis/compare.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -125,9 +125,9 @@ def compare_packages(packages_set1=None,packages_set2=None,by=None):
125125
package_folder = "%s/analysis/packages" %get_installdir()
126126

127127
if packages_set1 == None:
128-
package_set1 = glob("%s/docker-os/*.zip" %(package_folder))
128+
package_set1 = glob("%s/docker-library/*.zip" %(package_folder))
129129
if packages_set2 == None:
130-
package_set2 = glob("%s/docker-library/*.zip" %(package_folder))
130+
package_set2 = glob("%s/docker-os/*.zip" %(package_folder))
131131

132132
if by == None:
133133
by = ['files.txt']

singularity/views/trees.py

Lines changed: 56 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,22 +6,36 @@
66
'''
77

88
import json
9-
import os
10-
import re
11-
import requests
9+
10+
from matplotlib import pyplot as plt
11+
from scipy.cluster.hierarchy import (
12+
dendrogram,
13+
linkage
14+
)
15+
16+
from scipy.cluster.hierarchy import cophenet
17+
from scipy.spatial.distance import pdist
18+
import numpy as np
19+
1220
from singularity.logman import bot
1321

1422
from singularity.views.utils import get_container_contents
1523
from singularity.analysis.compare import (
24+
calculate_similarity,
1625
compare_containers,
17-
calculate_similarity
26+
compare_packages
1827
)
1928

29+
2030
from singularity.package import (
2131
load_package,
2232
package
2333
)
2434

35+
import os
36+
import pandas
37+
import re
38+
import requests
2539
import shutil
2640
import sys
2741
import tempfile
@@ -176,3 +190,41 @@ def make_container_tree(folders,files,path_delim="/",parse_files=True):
176190
result['files'] = file_lookup
177191

178192
return result
193+
194+
195+
###################################################################################
196+
# DENDROGRAM
197+
###################################################################################
198+
199+
200+
def make_package_tree(matrix=None,labels=None,width=25,height=10,title=None):
201+
'''make package tree will make a dendrogram comparing a matrix of packages
202+
:param matrix: a pandas df of packages, with names in index and columns
203+
:param labels: a list of labels corresponding to row names, will be
204+
pulled from rows if not defined
205+
:param title: a title for the plot, if not defined, will be left out.
206+
:returns a plot that can be saved with savefig
207+
'''
208+
if not isinstance(matrix,pandas.DataFrame):
209+
bot.log.info("No pandas DataFrame (matrix) of similarities defined, will use default.")
210+
matrix = compare_packages()['files.txt']
211+
title = 'Docker Library Similarity to Base OS'
212+
213+
Z = linkage(matrix, 'ward')
214+
c, coph_dists = cophenet(Z, pdist(matrix))
215+
216+
if labels == None:
217+
labels = matrix.index.tolist()
218+
219+
plt.figure(figsize=(width, height))
220+
221+
if title != None:
222+
plt.title(title)
223+
224+
plt.xlabel('image index')
225+
plt.ylabel('distance')
226+
dendrogram(Z,
227+
leaf_rotation=90., # rotates the x axis labels
228+
leaf_font_size=8., # font size for the x axis labels
229+
labels=labels)
230+
return plt

0 commit comments

Comments
 (0)