@@ -29,8 +29,8 @@ def parse_args():
2929 parser .add_argument ('--n_test' ,type = int ,help = 'Maximum number of samples to use for testing (None=all)' ,default = None )
3030 parser .add_argument ('--n_chunks' , type = int , help = 'Number of chucks to use (increase if memory limited)' ,default = 20 )
3131 parser .add_argument ('--split_date' , type = str , help = 'Day (yymmdd) to split train and test' ,default = '190601' )
32+ parser .add_argument ('--append' ,action = 'store_true' ,help = 'Wrtie chunks into one single file instead of individual files' )
3233
33-
3434 args = parser .parse_args ()
3535 return args
3636
@@ -56,19 +56,23 @@ def main(args):
5656
5757 logger .info ('Reading/writing training data to %s' % ('%s/nowcast_training.h5' % args .output_location ))
5858 read_write_chunks ('%s/nowcast_training.h5' % args .output_location ,trn_generator ,args .n_chunks ,
59- args .input_types , args .output_types )
59+ args .input_types , args .output_types , append = args . append )
6060 logger .info ('Reading/writing testing data to %s' % ('%s/nowcast_testing.h5' % args .output_location ))
6161 read_write_chunks ('%s/nowcast_testing.h5' % args .output_location ,tst_generator ,args .n_chunks ,
62- args .input_types , args .output_types )
62+ args .input_types , args .output_types , append = args . append )
6363
6464
65- def read_write_chunks ( filename , generator , n_chunks , input_types , output_types ):
65+ def read_write_chunks ( out_filename , generator , n_chunks , input_types , output_types , append = False ):
6666 logger = logging .getLogger (__name__ )
6767 chunksize = len (generator )// n_chunks
6868 # get first chunk
6969 logger .info ('Gathering chunk 0/%s:' % n_chunks )
7070 X ,Y = generator .load_batches (n_batches = chunksize ,offset = 0 ,progress_bar = True )
71+
7172 # Create datasets
73+ fn ,ext = os .path .splitext (out_filename )
74+ cs = '' if append else '_000'
75+ filename = fn + cs + ext
7276 for i ,x in enumerate (X ):
7377 with h5py .File (filename , 'w' ) as hf :
7478 hf .create_dataset ('IN_%s' % input_types [i ], data = x , maxshape = (None ,x .shape [1 ],x .shape [2 ],x .shape [3 ]))
@@ -77,22 +81,33 @@ def read_write_chunks( filename, generator, n_chunks, input_types, output_types
7781 hf .create_dataset ('OUT_%s' % output_types [i ], data = y , maxshape = (None ,y .shape [1 ],y .shape [2 ],y .shape [3 ]))
7882 # Gather other chunks
7983 for c in range (1 ,n_chunks + 1 ):
84+ cs = '' if append else '_%.3d' % c
85+ filename = fn + c + ext
8086 offset = c * chunksize
8187 n_batches = min (chunksize ,len (generator )- offset )
8288 if n_batches < 0 : # all done
8389 break
8490 logger .info ('Gathering chunk %d/%s:' % (c ,n_chunks ))
8591 X ,Y = generator .load_batches (n_batches = n_batches ,offset = offset ,progress_bar = True )
86- for i ,x in enumerate (X ):
87- with h5py .File (filename , 'a' ) as hf :
88- k = 'IN_%s' % input_types [i ]
89- hf [k ].resize ((hf [k ].shape [0 ] + x .shape [0 ]), axis = 0 )
90- hf [k ][- x .shape [0 ]:] = x
91- for i ,y in enumerate (Y ):
92- with h5py .File (filename , 'a' ) as hf :
93- k = 'OUT_%s' % output_types [i ]
94- hf [k ].resize ((hf [k ].shape [0 ] + y .shape [0 ]), axis = 0 )
95- hf [k ][- y .shape [0 ]:] = y
92+ if append :
93+ for i ,x in enumerate (X ):
94+ with h5py .File (filename , 'a' ) as hf :
95+ k = 'IN_%s' % input_types [i ]
96+ hf [k ].resize ((hf [k ].shape [0 ] + x .shape [0 ]), axis = 0 )
97+ hf [k ][- x .shape [0 ]:] = x
98+ for i ,y in enumerate (Y ):
99+ with h5py .File (filename , 'a' ) as hf :
100+ k = 'OUT_%s' % output_types [i ]
101+ hf [k ].resize ((hf [k ].shape [0 ] + y .shape [0 ]), axis = 0 )
102+ hf [k ][- y .shape [0 ]:] = y
103+ else : # write to a new file
104+ for i ,x in enumerate (X ):
105+ with h5py .File (filename , 'w' ) as hf :
106+ hf .create_dataset ('IN_%s' % input_types [i ], data = x , maxshape = (None ,x .shape [1 ],x .shape [2 ],x .shape [3 ]))
107+ for i ,y in enumerate (Y ):
108+ with h5py .File (filename , 'a' ) as hf :
109+ hf .create_dataset ('OUT_%s' % output_types [i ], data = y , maxshape = (None ,y .shape [1 ],y .shape [2 ],y .shape [3 ]))
110+
96111
97112
98113class NowcastGenerator (SEVIRGenerator ):
0 commit comments