1818)
1919from singularity .analysis .utils import get_package_base
2020from singularity .package import package as make_package
21- from singularity .utils import get_installdir
21+ from singularity .utils import (
22+ get_installdir ,
23+ update_dict ,
24+ update_dict_sum
25+ )
26+
2227from singularity .views .utils import get_container_contents
2328
2429from singularity .package import (
3540
3641
3742
38- def get_diff (container = None ,package = None ,sudopw = None ):
43+ def get_diff (container = None ,image_package = None ,sudopw = None ):
3944 '''get diff will return a dictionary of folder paths and files that
4045 are in an image or package vs. all standard operating systems. The
4146 algorithm is explained below.
4247 :param container: if provided, will use container as image. Can also provide
43- :param package : if provided, can be used instead of container
48+ :param image_package : if provided, can be used instead of container
4449 :param sudopw: needed if a package isn't provided (will prompt user)
4550
4651 ::notes
@@ -51,14 +56,14 @@ def get_diff(container=None,package=None,sudopw=None):
5156 3) organize custom files into dict based on folder name
5257
5358 '''
54- if package == None :
55- package = make_package (container ,remove_image = True ,sudopw = sudopw )
59+ if image_package == None :
60+ image_package = make_package (container ,remove_image = True ,sudopw = sudopw )
5661
5762 # Find the most similar os
58- most_similar = estimate_os (package = package ,sudopw = sudopw )
63+ most_similar = estimate_os (image_package = image_package ,sudopw = sudopw )
5964 similar_package = "%s/docker-os/%s.img.zip" % (get_package_base (),most_similar )
6065
61- comparison = compare_containers (image_package1 = package ,
66+ comparison = compare_containers (image_package1 = image_package ,
6267 image_package2 = similar_package ,
6368 by = 'files.txt' )['files.txt' ]
6469
@@ -86,30 +91,34 @@ def get_diff(container=None,package=None,sudopw=None):
8691###################################################################################
8792
8893
89- def estimate_os (container = None ,package = None ,sudopw = None ,return_top = True ):
94+ def estimate_os (container = None ,image_package = None ,sudopw = None ,return_top = True ):
9095 '''estimate os will compare a package to singularity python's database of
9196 operating system images, and return the docker image most similar
9297 :param return_top: return only the most similar (estimated os) default True
93- :param package : the package created from the image to estimate.
98+ :param image_package : the package created from the image to estimate.
9499 '''
95- if package == None :
96- package = make_package (container ,remove_image = True ,sudopw = sudopw )
100+ if image_package == None :
101+ image_package = make_package (container ,remove_image = True ,sudopw = sudopw )
97102
98- comparison = compare_packages (packages_set1 = [package ])['files.txt' ].transpose ()
103+ comparison = compare_packages (packages_set1 = [image_package ])['files.txt' ].transpose ()
99104 comparison .columns = ['SCORE' ]
100105 most_similar = comparison ['SCORE' ].idxmax ()
101- print ("Most similar OS found to be " ,most_similar )
106+ print ("Most similar OS found to be " , most_similar )
102107 if return_top == True :
103108 return most_similar
104109 return comparison
105110
106111
107- def get_tags (container = None ,package = None ,sudopw = None ,search_folders = None ):
112+ def get_tags (container = None ,image_package = None ,sudopw = None ,search_folders = None ,diff = None ,
113+ return_unique = True ):
108114 '''get tags will return a list of tags that describe the software in an image,
109115 meaning inside of a paricular folder. If search_folder is not defined, uses lib
110116 :param container: if provided, will use container as image. Can also provide
111- :param package : if provided, can be used instead of container
117+ :param image_package : if provided, can be used instead of container
112118 :param search_folders: specify one or more folders to look for tags
119+ :param diff: the difference between a container and it's parent OS from get_diff
120+ if None, will be derived.
121+ :param return_unique: return unique files in folders. Default True.
113122 Default is 'bin'
114123
115124 ::notes
@@ -120,9 +129,10 @@ def get_tags(container=None,package=None,sudopw=None,search_folders=None):
120129 3) organize custom files into dict based on folder name
121130 4) return search_folders as tags
122131 '''
123- folders = get_diff (container = container ,
124- package = package ,
125- sudopw = sudopw )
132+ if diff == None :
133+ diff = get_diff (container = container ,
134+ image_package = image_package ,
135+ sudopw = sudopw )
126136
127137 if search_folders == None :
128138 search_folders = 'bin'
@@ -132,10 +142,77 @@ def get_tags(container=None,package=None,sudopw=None,search_folders=None):
132142
133143 tags = []
134144 for search_folder in search_folders :
135- if search_folder in folders :
145+ if search_folder in diff :
136146 bot .logger .info ("Adding tags for folder %s" ,search_folder )
137- tags = tags + folders [search_folder ]
147+ tags = tags + diff [search_folder ]
138148 else :
139149 bot .logger .info ("Did not find folder %s in difference." ,search_folder )
140- tags = numpy .unique (tags ).tolist ()
150+
151+ if return_unique == True :
152+ tags = numpy .unique (tags ).tolist ()
141153 return tags
154+
155+
156+ ###################################################################################
157+ # COUNTING ########################################################################
158+ ###################################################################################
159+
160+
161+ def file_counts (container = None ,patterns = None ,image_package = None ,sudopw = None ,diff = None ):
162+ '''file counts will return a list of files that match one or more regular expressions.
163+ if no patterns is defined, a default of readme is used. All patterns and files are made
164+ case insensitive.
165+ :param container: if provided, will use container as image. Can also provide
166+ :param image_package: if provided, can be used instead of container
167+ :param patterns: one or more patterns (str or list) of files to search for.
168+ :param diff: the difference between a container and it's parent OS from get_diff
169+ if not provided, will be generated.
170+ '''
171+ if diff == None :
172+ diff = get_diff (container = container ,
173+ image_package = image_package ,
174+ sudopw = sudopw )
175+
176+ if patterns == None :
177+ patterns = 'readme'
178+
179+ if not isinstance (patterns ,list ):
180+ patterns = [patterns ]
181+
182+ count = 0
183+ for folder , items in diff .items ():
184+ for pattern in patterns :
185+ count += len ([x for x in items if re .search (pattern .lower (),x .lower ())])
186+ bot .logger .info ("Total files matching patterns is %s" ,count )
187+ return count
188+
189+
190+ def extension_counts (container = None ,image_package = None ,sudopw = None ,diff = None ,return_counts = True ):
191+ '''extension counts will return a dictionary with counts of file extensions for
192+ an image.
193+ :param container: if provided, will use container as image. Can also provide
194+ :param image_package: if provided, can be used instead of container
195+ :param diff: the difference between a container and it's parent OS from get_diff
196+ :param return_counts: return counts over dict with files. Default True
197+ '''
198+ if diff == None :
199+ diff = get_diff (container = container ,
200+ image_package = image_package ,
201+ sudopw = sudopw )
202+
203+ extensions = dict ()
204+ for folder , items in diff .items ():
205+ for item in items :
206+ filename ,ext = os .path .splitext (item )
207+ if ext == '' :
208+ if return_counts == False :
209+ extensions = update_dict (extensions ,'no-extension' ,item )
210+ else :
211+ extensions = update_dict_sum (extensions ,'no-extension' )
212+ else :
213+ if return_counts == False :
214+ extensions = update_dict (extensions ,ext ,item )
215+ else :
216+ extensions = update_dict_sum (extensions ,ext )
217+
218+ return extensions
0 commit comments