singularityhub
diff --git a/‎examples/run_singularity/singularity_client.py‎
Lines changed: 29 additions & 23 deletions b/‎examples/run_singularity/singularity_client.py‎
Lines changed: 29 additions & 23 deletions
diff --git a/‎examples/singularity_hub/compare_builds.py‎
Lines changed: 151 additions & 66 deletions b/‎examples/singularity_hub/compare_builds.py‎
Lines changed: 151 additions & 66 deletions
@@ -4,37 +4,43 @@
 
 from singularity.cli import Singularity
 
-# The default will ask for your sudo password, and then not ask again to
-# run commands. It is not stored anywhere, however you should not save / pickle
-# the object as it will expose your password. 
+# Create a client
 S = Singularity()
 
 # Get general help:
 S.help()
 
 # These are the defaults, which can be specified
-S = Singularity(sudo=True,verbose=False)
-
-# Let's define a path to an image
-# wget http://www.vbmis.com/bmi/project/singularity/package_image/ubuntu:latest-2016-04-06.img
-image_path = 'ubuntu:latest-2016-04-06.img'
-
-# Run singularity --exec
-S.execute(image_path=image_path,command='ls')
-# $'docker2singularity.sh\nget_docker_container_id.sh\nget_docker_meta.py\nmakeBases.py\nsingularity\nubuntu:latest-2016-04-06.img\n'
-# These are the defaults, which can be specified
+S = Singularity(sudo=False,sudopw=None,debug=False)
+
+# Create an image
+image = S.create('myimage.img')
+
+# Import into it
+S.importcmd(image,'docker://ubuntu:latest')
+
+# Execute command to container
+result = S.execute(image,command='cat /singularity')
+print(result)
+'''
+#!/bin/sh
+
+if test -x /bin/bash; then
+    exec /bin/bash "$@"
+elif test -x /bin/sh; then
+    exec /bin/sh "$@"
+else
+    echo "ERROR: No valid shell within container"
+    exit 255
+fi
+'''
 
 # For any function you can get the docs:
 S.help(command="exec")
 
-# or return as string
-help = S.help(command="exec",stdout=False)
-
-# export an image, default export_type="tar" , pipe=False , output_file = None will produce file in tmp
-tmptar = S.export(image_path=image_path)
-
-# create an empty image
-S.create(image_path='test.img')
+# export an image as a byte array
+byte_array = S.export(image,pipe=True)
 
-# import a docker image
-S.importcmd(image_path,input_source='docker://ubuntu:latest')
+# Get an in memory tar
+from singularity.reproduce import get_memory_tar
+tar = get_memory_tar(image)
@@ -18,9 +18,17 @@
 # Let's keep images in a temporary folder
 base = "/home/vanessa/Documents/Work/singularity/hub"
 storage = "%s/containers" %base
-if not os.path.exists(storage):
-    os.mkdir(storage)
-os.chdir(storage)
+clones = "%s/clones" %storage # same image downloaded multiple times
+replicates = "%s/replicates" %storage # these are quasi replicates
+                                      # had to change runscripts to commit
+replication = "%s/quasi_replicates" %storage # these are exact replicates, from same
+hub = "%s/collections" %storage 
+
+# Create all folders for images
+paths = [storage,replicates,clones,replication,hub]
+for pathy in paths:
+    if not os.path.exists(pathy):
+        os.mkdir(pathy)
 
 # We will keep a table of information
 columns = ['name','build_time_seconds','size','commit','estimated_os']
@@ -32,10 +40,13 @@ def get_top_os(x):
     return sorted(x.items(), key=lambda x: (x[1],x[0]), reverse=True)[0][0]
 
 #############################################################################
-# Task 1: Download the containers and metadata! (different images)
+# Task 1: Get Containers 
 #############################################################################
 
+# SINGULARITY HUB HAS QUASI REPLICATES, complete metadata
+
 # Retrieve the container based on the name
+os.chdir(replicates)
 collection = shub.get_collection(container_name)
 results['repo_name'] = container_name
 results['collection'] = collection
@@ -65,91 +76,179 @@ def get_top_os(x):
 pickle.dump(results,open(result_file),'wb'))
 
 
+# IDENTICAL
+
+os.chdir(clones)
+chosen_one = results['df'].index[10]
+manifest = results['containers'][chosen_one]
+for num in range(0,100):
+    clone_name = "%s-%s" %(manifest['name'].replace('/','-'),num)
+    image = shub.pull_container(manifest,
+                                download_folder=clones,
+                                name="%s.img.gz" %(clone_name))
+
+
+# EXACT REPLICATES
+
+runscript = '''Bootstrap:docker
+From: ubuntu:latest
+
+%runscript
+exec "Hello World!"
+'''
+
+os.chdir(replication)
+with open('Singularity','w') as filey:
+    filey.writelines(runscript)
+
+from singularity.cli import Singularity
+cli = Singularity()
+
+for num in range(0,100):
+    container_name = 'ubuntu-hello-world-%s.img' %(num)
+    cli.create(container_name)
+    cli.bootstrap(container_name,'Singularity')
+    
+    container_uri = '%s-%s' %(container_name,manifest['version'])
+   containers[container_uri] = manifest
+
+
+# ALL SINGULARITY HUB
+containers = shub.get_containers()
+os.chdir(hub)
+for container_name,container in containers.items():
+    for branch, manifest in container.items():        
+       name = manifest['name'].replace('/','-')
+       image = shub.pull_container(manifest,
+                                   download_folder=hub,
+                                   name="%s-%s.img.gz" %(name,branch))       
+
+pickle.dump(containers,open('%s/container_manifests.pkl' %(hub),'wb'))
+
+
 #############################################################################
 # Task 2: Develop levels of reproducibility
 #############################################################################
 
 from singularity.reproduce import (
+    assess_differences,
     get_content_hashes,
     get_image_hash,
     get_levels
 )
 
-levels = get_levels(version=2.2)
+levels = get_levels()
 result_file = '%s/results-%s.pkl' %(base,container_name.replace('/','-'))
 results = pickle.load(open(result_file,'rb'))
 
-os.chdir(storage)
-image_files = glob("*.img")
 
+# Let's assess what files are identical across pairs of images in different sets
 
-# Let's assess what files are identical across the images. We can use this to develop
-# our subsequent levels.
-# Here we will use the 100 files in the folder, and find files/folders consistent across
-# we will not include the runscript, since we know this was changed.
-identical_across = get_content_hashes(image_files[0],level='IDENTICAL',version=2.2)
-image_files.pop(0)
-not_identical = []
+# Quasi Replicate: meaning same base os, different build host, slightly different runscript
+os.chdir(replication)
+image_files = glob('*.img')
+diffs = assess_differences(image_files[0],image_files[1],levels=levels)
+pickle.dump(diffs,open('%s/diff_quasi_replicate_pair.pkl' %base,'wb'))
 
-for image_file in image_files:
-    hashes = get_content_hashes(image_file,level='IDENTICAL',version=2.2)
-    for hash_path,hash_val in hashes.items():
-        if hash_path in identical_across:
-            if not identical_across[hash_path] == hashes[hash_path]:
-                del identical_across[hash_path]
-                not_identical.append(hash_path)
-
-# From the above we learn that all files are identical except for those
-# in:
-
-#['./.run',
-# './etc/hosts',
-# './singularity',
-# './etc/mtab',
-# './.exec',
-# './etc/resolv.conf',
-# './.shell',
-# './environment']
-
-# Since we know that the images were produced by way of changing the runscript,
-# and this influences the singularity metadata folders, we can conclude that we would
-# see differences for REPLICATE in /etc/hosts and /etc/mtab and /etc/resolv.conf
-
-# Identical: logically, if we compare an image to itself, all files are the same
 # Replicate: if we produce an equivalent image at a different time, we might have
 #            variance in package directories (anything involving variable with mirrors, etc)
-# Environment/Runscript/Labels: these are logical to compare, we compare the hash of
-#             just a few specific files in the image
 
+os.chdir(replicates)
+image_files = glob('*.img')
+diffs = assess_differences(image_files[0],image_files[1],levels=levels)
+pickle.dump(diffs,open('%s/diff_replicate_pair.pkl' %base,'wb'))
+
+# Identical: all files are the same
+
+os.chdir(clones)
+image_files = glob('*.img')
+diffs = assess_differences(image_files[0],image_files[1],levels=levels)
+pickle.dump(diffs,open('%s/diff_clone_pair.pkl' %base,'wb'))
+
+# Different images, same OS
 
 #############################################################################
 # Task 3: Assess levels of reproducibility
 #############################################################################
 
 # The first thing we want to do is evaluate our metrics for reproducibility.
+dfs = dict()
 
-# Question 1: What files are consistent across the same image?
-# LEVEL IDENTICAL
-# Here we will download the same image 10 times, create a sha1 sum of the files,
-# and determine which sets of files should be consistent for the same image file
+# ASSESS IDENTICAL IMAGES ACROSS ALL LEVELS
+
+os.chdir(clones)
+image_files = glob("*.img")
+levels = get_levels(version=2.2)
 
+hashes = pandas.DataFrame(columns=list(levels.keys()))
 
-# Question 2: What files are consistent across the same image, different downloads?
-# LEVEL REPLICATE
+for image_file in image_files:
+    print('Processing %s' %(image_file))
+    hashy = get_image_hashes(image_file,levels=levels)
+    hashes.loc[image_file,:] = hashy
+
+
+dfs['IDENTICAL'] = hashes
+for col in hashes.columns.tolist():
+    print("%s: %s" %(col,hashes[col].unique().tolist()))
+
+# IDENTICAL: ['364715054c17c29338787bd231e58d90caff154b']
+# RUNSCRIPT: ['da39a3ee5e6b4b0d3255bfef95601890afd80709']
+# ENVIRONMENT: ['22ff3c5c5fa63d3f08a48669d90fcb1459e6e74b']
+# RECIPE: ['0e0efcb05fb4727f77b999d135c8a58a8ce468d5']
+
+
+# Question 2: What files are consistent across the same image, different builds?
 # An image that is a replicate should be assessed as identical using the "REPLICATE"
-# criteria. 
+# criteria, but not identical
+
+# RECIPES
+
+os.chdir(replication)
+image_files = glob('*.img')
+hashes = pandas.DataFrame(columns=list(levels.keys()))
+
+for image_file in image_files:
+    print('Processing %s' %(image_file))
+    hashy = get_image_hashes(image_file,levels=levels)
+    hashes.loc[image_file,:] = hashy
+
+
+dfs['RECIPES'] = hashes
+for col in hashes.columns.tolist():
+    print("%s: %s" %(col,len(hashes[col].unique().tolist())))
+
+
+
+# QUASI REPLICATES
+# These have the same base, but different metadata folders.
 
+os.chdir(replicates)
 image_files = glob("*.img")
+levels = get_levels(version=2.2)
+
+hashes = pandas.DataFrame(columns=list(levels.keys()))
+
+for image_file in image_files:
+    print('Processing %s' %(image_file))
+    hashy = get_image_hashes(image_file,levels=levels)
+    hashes.loc[image_file,:] = hashy
+
+dfs['QUASI_REPLICATE'] = hashes
+for col in hashes.columns.tolist():
+    print("%s: %s" %(col,len(hashes[col].unique().tolist())))
+
+
+
+pickle.dump(dfs,open('reproducibility_dfs.pkl','wb'))
+
 
 # Let's assess what files are identical across the images. We can use this to develop
 # our subsequent levels.
 # Here we will use the 100 files in the folder, and find files/folders consistent across
 # we will not include the runscript, since we know this was changed.
-level_names = ['IDENTICAL',
-               'REPLICATE',
-               'RUNSCRIPT']
 
-dfs = dict()
+
 
 def generate_replication_df(level_name,image_files,version,skip_files=None):
 
@@ -168,20 +267,6 @@ def generate_replication_df(level_name,image_files,version,skip_files=None):
 dfs['IDENTICAL'] = generate_replication_df('IDENTICAL',image_files,version=2.2)
 dfs['REPLICATE'] = generate_replication_df('REPLICATE',image_files,version=2.2, skip_files=['/singularity'])
 
-# Finally, if we compare runscripts only, we should see two container versions
-hashes = []
-
-for image_file in image_files:
-    hashy = get_image_hash(image_file,level="RUNSCRIPT",version=2.2)
-    hashes.append(hashy)
-
-uniques = dict()
-for hashy in hashes:
-    if hashy in uniques:
-        uniques[hashy] +=1
-    else:
-        uniques[hashy] = 1
-
 
 # Outputs:
 # A function that exports, reads tarfile into memory (or disk?) and generates a list of