@@ -51,20 +51,17 @@ def worker(work_queue, done_queue):
5151 spinner = spinning_cursor ()
5252 p = current_process ()
5353 for nif_path in iter (work_queue .get , 'STOP' ):
54- sys .stdout .write ("\033 [K" )
55- sys .stdout .write (next (spinner ))
56- sys .stdout .write (" [{0}][{1}][{2}]" .format (work_queue .qsize (), p .name , nif_path ))
54+ sys .stdout .write ("\r \b \033 [K{0} [{1}][{2}][{3}]" .format (
55+ next (spinner ), work_queue .qsize (), p .name , nif_path ))
5756 sys .stdout .flush ()
58- sys .stdout .write ('\r \b ' )
5957 assets = []
6058 try :
6159 # assets.append('DEADBEEF')
6260 assets = retrieve_assets_from_nif (nif_path )
6361 except Exception :
6462 pass
65- done_queue .put ((p . name , nif_path , assets ))
63+ done_queue .put ((nif_path , assets ))
6664 done_queue .put ('STOP' )
67- print ("{} finished." .format (p .name ))
6865 return True
6966
7067
@@ -75,6 +72,7 @@ def main():
7572 spinner = spinning_cursor ()
7673 manifest = []
7774 additional_assets = []
75+ additional_assets_nif = {}
7876
7977 # setup multi-processing job
8078 workers = 8
@@ -86,11 +84,8 @@ def main():
8684 for row in uixr_data :
8785 row_license = row .get ('license' ).lower ()
8886 if row_license == 'cc0' or row_license == 'cc-by' or row_license == 'cc-by-nc' :
89- sys .stdout .write ("\033 [K" )
90- sys .stdout .write (next (spinner ))
91- sys .stdout .write (" [{0}]" .format (row .get ('asset' )))
87+ sys .stdout .write ("\r \b \033 [K{0} [{1}]" .format (next (spinner ), row .get ('asset' )))
9288 sys .stdout .flush ()
93- sys .stdout .write ('\r \b ' )
9489 file_path = os .path .join (ASSET_PATH , row .get ('asset' ))
9590 if not os .path .exists (file_path ):
9691 print ("WARNING: asset not found -> {0}" .format (row .get ('asset' )))
@@ -101,14 +96,13 @@ def main():
10196 work_queue .put (file_path )
10297
10398 # let multiprocessing parse the nifs
104- print ("Parsing NIFs for additional sub-assets: " )
99+ print ("\n \n Parsing NIFs for additional sub-assets: " )
105100 for i in range (workers ):
106101 p = Process (target = worker , args = (work_queue , done_queue ))
107102 processes .append (p )
108103 work_queue .put ('STOP' )
109104 p .start ()
110105
111- print ("Building assets to validate." )
112106 stops = 0
113107 while True :
114108 item = done_queue .get ()
@@ -118,10 +112,11 @@ def main():
118112 break
119113 else :
120114 # for worker_name, nif_name, nif_assets in iter(done_queue.get, 'STOP'):
121- _ , _ , nif_assets = item
115+ nif_path , nif_assets = item
122116 additional_assets += nif_assets
117+ additional_assets_nif [nif_path ] = nif_assets
123118
124- print ("Filtering assets." )
119+ print ("\n \n Filtering assets." )
125120 # remove duplicates
126121 additional_assets = set (additional_assets )
127122
@@ -135,9 +130,19 @@ def main():
135130 if filename in additional_assets :
136131 additional_assets .remove (filename )
137132
138- print ("Gathering sub-assets: " )
133+ # match assets back to NIF
134+ additional_assets = list (additional_assets )
135+ for i in range (len (additional_assets )):
136+ asset = additional_assets [i ]
137+ nifs_found = []
138+ for nif , assets in additional_assets_nif .items ():
139+ if asset in assets :
140+ nifs_found .append (nif )
141+ additional_assets [i ] = (asset , [nif .replace (ASSET_PATH , '' ) for nif in nifs_found ])
142+
143+ print ("\n \n Gathering sub-assets: " )
139144 # iterate through all sub assets
140- for nif_asset in additional_assets :
145+ for nif_asset , nifs in additional_assets :
141146 found = False
142147 na_filename , _ = os .path .splitext (nif_asset .lower ())
143148 na_filename += '.*'
@@ -147,11 +152,8 @@ def main():
147152 relative_asset_path = os .path .relpath (os .path .realpath (asset_path ), ASSET_PATH )
148153 if relative_asset_path in tar_ball .getnames ():
149154 break # file already exists, skip
150- sys .stdout .write ("\033 [K" )
151- sys .stdout .write (next (spinner ))
152- sys .stdout .write (" [{0}]" .format (relative_asset_path ))
155+ sys .stdout .write ("\r \b \033 [K{0} [{1}]" .format (next (spinner ), relative_asset_path ))
153156 sys .stdout .flush ()
154- sys .stdout .write ('\r \b ' )
155157 f .seek (0 ) # reset to beginning of csv file
156158 for row in uixr_data :
157159 if find_match (row .get ('asset' ), relative_asset_path ):
@@ -163,12 +165,10 @@ def main():
163165 elif row_license == 'cc-by-nc' :
164166 break # good to go, break
165167 else :
166- print ("WARNING: Non-CC license asset -> {0}" .format (relative_asset_path ))
167- found = False
168- tar_ball .add (asset_path , relative_asset_path )
168+ print ("\n WARNING: Non-CC license asset -> {0} ({1})\n " .format (
169+ relative_asset_path , nifs ))
169170 if not found :
170- print ("WARNING: sub-asset not found -> {0}" .format (nif_asset ))
171-
171+ print ("\n WARNING: sub-asset not found -> {0} ({1})\n " .format (nif_asset , nifs ))
172172 tar_ball .close ()
173173
174174
0 commit comments