Skip to content

Commit 97236db

Browse files
author
Mark Veillette
committed
added append option to make_dataset
1 parent 49c0a2e commit 97236db

File tree

1 file changed

+29
-14
lines changed

1 file changed

+29
-14
lines changed

radar_nowcasting/make_dataset.py

Lines changed: 29 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,8 @@ def parse_args():
2929
parser.add_argument('--n_test',type=int,help='Maximum number of samples to use for testing (None=all)',default=None)
3030
parser.add_argument('--n_chunks', type=int, help='Number of chucks to use (increase if memory limited)',default=20)
3131
parser.add_argument('--split_date', type=str, help='Day (yymmdd) to split train and test',default='190601')
32+
parser.add_argument('--append',action='store_true',help='Wrtie chunks into one single file instead of individual files')
3233

33-
3434
args = parser.parse_args()
3535
return args
3636

@@ -56,19 +56,23 @@ def main(args):
5656

5757
logger.info('Reading/writing training data to %s' % ('%s/nowcast_training.h5' % args.output_location))
5858
read_write_chunks('%s/nowcast_training.h5' % args.output_location,trn_generator,args.n_chunks,
59-
args.input_types, args.output_types)
59+
args.input_types, args.output_types,append=args.append)
6060
logger.info('Reading/writing testing data to %s' % ('%s/nowcast_testing.h5' % args.output_location))
6161
read_write_chunks('%s/nowcast_testing.h5' % args.output_location,tst_generator,args.n_chunks,
62-
args.input_types, args.output_types)
62+
args.input_types, args.output_types,append=args.append)
6363

6464

65-
def read_write_chunks( filename, generator, n_chunks, input_types, output_types ):
65+
def read_write_chunks( out_filename, generator, n_chunks, input_types, output_types, append=False ):
6666
logger = logging.getLogger(__name__)
6767
chunksize = len(generator)//n_chunks
6868
# get first chunk
6969
logger.info('Gathering chunk 0/%s:' % n_chunks)
7070
X,Y=generator.load_batches(n_batches=chunksize,offset=0,progress_bar=True)
71+
7172
# Create datasets
73+
fn,ext=os.path.splitext(out_filename)
74+
cs = '' if append else '_000'
75+
filename=fn+cs+ext
7276
for i,x in enumerate(X):
7377
with h5py.File(filename, 'w') as hf:
7478
hf.create_dataset('IN_%s' % input_types[i], data=x, maxshape=(None,x.shape[1],x.shape[2],x.shape[3]))
@@ -77,22 +81,33 @@ def read_write_chunks( filename, generator, n_chunks, input_types, output_types
7781
hf.create_dataset('OUT_%s' % output_types[i], data=y, maxshape=(None,y.shape[1],y.shape[2],y.shape[3]))
7882
# Gather other chunks
7983
for c in range(1,n_chunks+1):
84+
cs = '' if append else '_%.3d' % c
85+
filename=fn+c+ext
8086
offset = c*chunksize
8187
n_batches = min(chunksize,len(generator)-offset)
8288
if n_batches<0: # all done
8389
break
8490
logger.info('Gathering chunk %d/%s:' % (c,n_chunks))
8591
X,Y=generator.load_batches(n_batches=n_batches,offset=offset,progress_bar=True)
86-
for i,x in enumerate(X):
87-
with h5py.File(filename, 'a') as hf:
88-
k='IN_%s' % input_types[i]
89-
hf[k].resize((hf[k].shape[0] + x.shape[0]), axis = 0)
90-
hf[k][-x.shape[0]:] = x
91-
for i,y in enumerate(Y):
92-
with h5py.File(filename, 'a') as hf:
93-
k='OUT_%s' % output_types[i]
94-
hf[k].resize((hf[k].shape[0] + y.shape[0]), axis = 0)
95-
hf[k][-y.shape[0]:] = y
92+
if append:
93+
for i,x in enumerate(X):
94+
with h5py.File(filename, 'a') as hf:
95+
k='IN_%s' % input_types[i]
96+
hf[k].resize((hf[k].shape[0] + x.shape[0]), axis = 0)
97+
hf[k][-x.shape[0]:] = x
98+
for i,y in enumerate(Y):
99+
with h5py.File(filename, 'a') as hf:
100+
k='OUT_%s' % output_types[i]
101+
hf[k].resize((hf[k].shape[0] + y.shape[0]), axis = 0)
102+
hf[k][-y.shape[0]:] = y
103+
else: # write to a new file
104+
for i,x in enumerate(X):
105+
with h5py.File(filename, 'w') as hf:
106+
hf.create_dataset('IN_%s' % input_types[i], data=x, maxshape=(None,x.shape[1],x.shape[2],x.shape[3]))
107+
for i,y in enumerate(Y):
108+
with h5py.File(filename, 'a') as hf:
109+
hf.create_dataset('OUT_%s' % output_types[i], data=y, maxshape=(None,y.shape[1],y.shape[2],y.shape[3]))
110+
96111

97112

98113
class NowcastGenerator(SEVIRGenerator):

0 commit comments

Comments
 (0)