Skip to content

Commit 49e2861

Browse files
committed
updating scripts for cameron to see
1 parent ba04516 commit 49e2861

File tree

8 files changed

+388
-189
lines changed

8 files changed

+388
-189
lines changed

examples/run_singularity/singularity_client.py

Lines changed: 29 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -4,37 +4,43 @@
44

55
from singularity.cli import Singularity
66

7-
# The default will ask for your sudo password, and then not ask again to
8-
# run commands. It is not stored anywhere, however you should not save / pickle
9-
# the object as it will expose your password.
7+
# Create a client
108
S = Singularity()
119

1210
# Get general help:
1311
S.help()
1412

1513
# These are the defaults, which can be specified
16-
S = Singularity(sudo=True,verbose=False)
17-
18-
# Let's define a path to an image
19-
# wget http://www.vbmis.com/bmi/project/singularity/package_image/ubuntu:latest-2016-04-06.img
20-
image_path = 'ubuntu:latest-2016-04-06.img'
21-
22-
# Run singularity --exec
23-
S.execute(image_path=image_path,command='ls')
24-
# $'docker2singularity.sh\nget_docker_container_id.sh\nget_docker_meta.py\nmakeBases.py\nsingularity\nubuntu:latest-2016-04-06.img\n'
25-
# These are the defaults, which can be specified
14+
S = Singularity(sudo=False,sudopw=None,debug=False)
15+
16+
# Create an image
17+
image = S.create('myimage.img')
18+
19+
# Import into it
20+
S.importcmd(image,'docker://ubuntu:latest')
21+
22+
# Execute command to container
23+
result = S.execute(image,command='cat /singularity')
24+
print(result)
25+
'''
26+
#!/bin/sh
27+
28+
if test -x /bin/bash; then
29+
exec /bin/bash "$@"
30+
elif test -x /bin/sh; then
31+
exec /bin/sh "$@"
32+
else
33+
echo "ERROR: No valid shell within container"
34+
exit 255
35+
fi
36+
'''
2637

2738
# For any function you can get the docs:
2839
S.help(command="exec")
2940

30-
# or return as string
31-
help = S.help(command="exec",stdout=False)
32-
33-
# export an image, default export_type="tar" , pipe=False , output_file = None will produce file in tmp
34-
tmptar = S.export(image_path=image_path)
35-
36-
# create an empty image
37-
S.create(image_path='test.img')
41+
# export an image as a byte array
42+
byte_array = S.export(image,pipe=True)
3843

39-
# import a docker image
40-
S.importcmd(image_path,input_source='docker://ubuntu:latest')
44+
# Get an in memory tar
45+
from singularity.reproduce import get_memory_tar
46+
tar = get_memory_tar(image)

examples/singularity_hub/compare_builds.py

Lines changed: 151 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,17 @@
1818
# Let's keep images in a temporary folder
1919
base = "/home/vanessa/Documents/Work/singularity/hub"
2020
storage = "%s/containers" %base
21-
if not os.path.exists(storage):
22-
os.mkdir(storage)
23-
os.chdir(storage)
21+
clones = "%s/clones" %storage # same image downloaded multiple times
22+
replicates = "%s/replicates" %storage # these are quasi replicates
23+
# had to change runscripts to commit
24+
replication = "%s/quasi_replicates" %storage # these are exact replicates, from same
25+
hub = "%s/collections" %storage
26+
27+
# Create all folders for images
28+
paths = [storage,replicates,clones,replication,hub]
29+
for pathy in paths:
30+
if not os.path.exists(pathy):
31+
os.mkdir(pathy)
2432

2533
# We will keep a table of information
2634
columns = ['name','build_time_seconds','size','commit','estimated_os']
@@ -32,10 +40,13 @@ def get_top_os(x):
3240
return sorted(x.items(), key=lambda x: (x[1],x[0]), reverse=True)[0][0]
3341

3442
#############################################################################
35-
# Task 1: Download the containers and metadata! (different images)
43+
# Task 1: Get Containers
3644
#############################################################################
3745

46+
# SINGULARITY HUB HAS QUASI REPLICATES, complete metadata
47+
3848
# Retrieve the container based on the name
49+
os.chdir(replicates)
3950
collection = shub.get_collection(container_name)
4051
results['repo_name'] = container_name
4152
results['collection'] = collection
@@ -65,91 +76,179 @@ def get_top_os(x):
6576
pickle.dump(results,open(result_file),'wb'))
6677

6778

79+
# IDENTICAL
80+
81+
os.chdir(clones)
82+
chosen_one = results['df'].index[10]
83+
manifest = results['containers'][chosen_one]
84+
for num in range(0,100):
85+
clone_name = "%s-%s" %(manifest['name'].replace('/','-'),num)
86+
image = shub.pull_container(manifest,
87+
download_folder=clones,
88+
name="%s.img.gz" %(clone_name))
89+
90+
91+
# EXACT REPLICATES
92+
93+
runscript = '''Bootstrap:docker
94+
From: ubuntu:latest
95+
96+
%runscript
97+
exec "Hello World!"
98+
'''
99+
100+
os.chdir(replication)
101+
with open('Singularity','w') as filey:
102+
filey.writelines(runscript)
103+
104+
from singularity.cli import Singularity
105+
cli = Singularity()
106+
107+
for num in range(0,100):
108+
container_name = 'ubuntu-hello-world-%s.img' %(num)
109+
cli.create(container_name)
110+
cli.bootstrap(container_name,'Singularity')
111+
112+
container_uri = '%s-%s' %(container_name,manifest['version'])
113+
containers[container_uri] = manifest
114+
115+
116+
# ALL SINGULARITY HUB
117+
containers = shub.get_containers()
118+
os.chdir(hub)
119+
for container_name,container in containers.items():
120+
for branch, manifest in container.items():
121+
name = manifest['name'].replace('/','-')
122+
image = shub.pull_container(manifest,
123+
download_folder=hub,
124+
name="%s-%s.img.gz" %(name,branch))
125+
126+
pickle.dump(containers,open('%s/container_manifests.pkl' %(hub),'wb'))
127+
128+
68129
#############################################################################
69130
# Task 2: Develop levels of reproducibility
70131
#############################################################################
71132

72133
from singularity.reproduce import (
134+
assess_differences,
73135
get_content_hashes,
74136
get_image_hash,
75137
get_levels
76138
)
77139

78-
levels = get_levels(version=2.2)
140+
levels = get_levels()
79141
result_file = '%s/results-%s.pkl' %(base,container_name.replace('/','-'))
80142
results = pickle.load(open(result_file,'rb'))
81143

82-
os.chdir(storage)
83-
image_files = glob("*.img")
84144

145+
# Let's assess what files are identical across pairs of images in different sets
85146

86-
# Let's assess what files are identical across the images. We can use this to develop
87-
# our subsequent levels.
88-
# Here we will use the 100 files in the folder, and find files/folders consistent across
89-
# we will not include the runscript, since we know this was changed.
90-
identical_across = get_content_hashes(image_files[0],level='IDENTICAL',version=2.2)
91-
image_files.pop(0)
92-
not_identical = []
147+
# Quasi Replicate: meaning same base os, different build host, slightly different runscript
148+
os.chdir(replication)
149+
image_files = glob('*.img')
150+
diffs = assess_differences(image_files[0],image_files[1],levels=levels)
151+
pickle.dump(diffs,open('%s/diff_quasi_replicate_pair.pkl' %base,'wb'))
93152

94-
for image_file in image_files:
95-
hashes = get_content_hashes(image_file,level='IDENTICAL',version=2.2)
96-
for hash_path,hash_val in hashes.items():
97-
if hash_path in identical_across:
98-
if not identical_across[hash_path] == hashes[hash_path]:
99-
del identical_across[hash_path]
100-
not_identical.append(hash_path)
101-
102-
# From the above we learn that all files are identical except for those
103-
# in:
104-
105-
#['./.run',
106-
# './etc/hosts',
107-
# './singularity',
108-
# './etc/mtab',
109-
# './.exec',
110-
# './etc/resolv.conf',
111-
# './.shell',
112-
# './environment']
113-
114-
# Since we know that the images were produced by way of changing the runscript,
115-
# and this influences the singularity metadata folders, we can conclude that we would
116-
# see differences for REPLICATE in /etc/hosts and /etc/mtab and /etc/resolv.conf
117-
118-
# Identical: logically, if we compare an image to itself, all files are the same
119153
# Replicate: if we produce an equivalent image at a different time, we might have
120154
# variance in package directories (anything involving variable with mirrors, etc)
121-
# Environment/Runscript/Labels: these are logical to compare, we compare the hash of
122-
# just a few specific files in the image
123155

156+
os.chdir(replicates)
157+
image_files = glob('*.img')
158+
diffs = assess_differences(image_files[0],image_files[1],levels=levels)
159+
pickle.dump(diffs,open('%s/diff_replicate_pair.pkl' %base,'wb'))
160+
161+
# Identical: all files are the same
162+
163+
os.chdir(clones)
164+
image_files = glob('*.img')
165+
diffs = assess_differences(image_files[0],image_files[1],levels=levels)
166+
pickle.dump(diffs,open('%s/diff_clone_pair.pkl' %base,'wb'))
167+
168+
# Different images, same OS
124169

125170
#############################################################################
126171
# Task 3: Assess levels of reproducibility
127172
#############################################################################
128173

129174
# The first thing we want to do is evaluate our metrics for reproducibility.
175+
dfs = dict()
130176

131-
# Question 1: What files are consistent across the same image?
132-
# LEVEL IDENTICAL
133-
# Here we will download the same image 10 times, create a sha1 sum of the files,
134-
# and determine which sets of files should be consistent for the same image file
177+
# ASSESS IDENTICAL IMAGES ACROSS ALL LEVELS
178+
179+
os.chdir(clones)
180+
image_files = glob("*.img")
181+
levels = get_levels(version=2.2)
135182

183+
hashes = pandas.DataFrame(columns=list(levels.keys()))
136184

137-
# Question 2: What files are consistent across the same image, different downloads?
138-
# LEVEL REPLICATE
185+
for image_file in image_files:
186+
print('Processing %s' %(image_file))
187+
hashy = get_image_hashes(image_file,levels=levels)
188+
hashes.loc[image_file,:] = hashy
189+
190+
191+
dfs['IDENTICAL'] = hashes
192+
for col in hashes.columns.tolist():
193+
print("%s: %s" %(col,hashes[col].unique().tolist()))
194+
195+
# IDENTICAL: ['364715054c17c29338787bd231e58d90caff154b']
196+
# RUNSCRIPT: ['da39a3ee5e6b4b0d3255bfef95601890afd80709']
197+
# ENVIRONMENT: ['22ff3c5c5fa63d3f08a48669d90fcb1459e6e74b']
198+
# RECIPE: ['0e0efcb05fb4727f77b999d135c8a58a8ce468d5']
199+
200+
201+
# Question 2: What files are consistent across the same image, different builds?
139202
# An image that is a replicate should be assessed as identical using the "REPLICATE"
140-
# criteria.
203+
# criteria, but not identical
204+
205+
# RECIPES
206+
207+
os.chdir(replication)
208+
image_files = glob('*.img')
209+
hashes = pandas.DataFrame(columns=list(levels.keys()))
210+
211+
for image_file in image_files:
212+
print('Processing %s' %(image_file))
213+
hashy = get_image_hashes(image_file,levels=levels)
214+
hashes.loc[image_file,:] = hashy
215+
216+
217+
dfs['RECIPES'] = hashes
218+
for col in hashes.columns.tolist():
219+
print("%s: %s" %(col,len(hashes[col].unique().tolist())))
220+
221+
222+
223+
# QUASI REPLICATES
224+
# These have the same base, but different metadata folders.
141225

226+
os.chdir(replicates)
142227
image_files = glob("*.img")
228+
levels = get_levels(version=2.2)
229+
230+
hashes = pandas.DataFrame(columns=list(levels.keys()))
231+
232+
for image_file in image_files:
233+
print('Processing %s' %(image_file))
234+
hashy = get_image_hashes(image_file,levels=levels)
235+
hashes.loc[image_file,:] = hashy
236+
237+
dfs['QUASI_REPLICATE'] = hashes
238+
for col in hashes.columns.tolist():
239+
print("%s: %s" %(col,len(hashes[col].unique().tolist())))
240+
241+
242+
243+
pickle.dump(dfs,open('reproducibility_dfs.pkl','wb'))
244+
143245

144246
# Let's assess what files are identical across the images. We can use this to develop
145247
# our subsequent levels.
146248
# Here we will use the 100 files in the folder, and find files/folders consistent across
147249
# we will not include the runscript, since we know this was changed.
148-
level_names = ['IDENTICAL',
149-
'REPLICATE',
150-
'RUNSCRIPT']
151250

152-
dfs = dict()
251+
153252

154253
def generate_replication_df(level_name,image_files,version,skip_files=None):
155254

@@ -168,20 +267,6 @@ def generate_replication_df(level_name,image_files,version,skip_files=None):
168267
dfs['IDENTICAL'] = generate_replication_df('IDENTICAL',image_files,version=2.2)
169268
dfs['REPLICATE'] = generate_replication_df('REPLICATE',image_files,version=2.2, skip_files=['/singularity'])
170269

171-
# Finally, if we compare runscripts only, we should see two container versions
172-
hashes = []
173-
174-
for image_file in image_files:
175-
hashy = get_image_hash(image_file,level="RUNSCRIPT",version=2.2)
176-
hashes.append(hashy)
177-
178-
uniques = dict()
179-
for hashy in hashes:
180-
if hashy in uniques:
181-
uniques[hashy] +=1
182-
else:
183-
uniques[hashy] = 1
184-
185270

186271
# Outputs:
187272
# A function that exports, reads tarfile into memory (or disk?) and generates a list of

0 commit comments

Comments
 (0)