-
Notifications
You must be signed in to change notification settings - Fork 6
Expand file tree
/
Copy pathFUN_nc_copy_with_limit.m
More file actions
403 lines (337 loc) · 16.1 KB
/
FUN_nc_copy_with_limit.m
File metadata and controls
403 lines (337 loc) · 16.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
function FUN_nc_copy_with_limit( filename0, filename1, dim_limit_name, dim_limit_val, is_compressed_output, varargin )
% This will copy the original netcdf file within a specific time-space range.
%
% For complex cases, "FUN_nc_OpenDAP_with_limit" is recommended.
% -------------------------------------------------------------------------
% INPUT:
% filename0 [char] : source of the netcdf file
% filename1 [char]: Name of output netcdf file
% dim_limit_name [cell]: which axises you want to set the limit
% dim_limit_val [cell]: the limit of each axises
% is_compressed_output : True: variables in the output file will be compressed,
% (The nondemensional variables will not be compressed even if it is set to true);
%
% More optional parameters can be found in optional parameters section below.
%
% -------------------------------------------------------------------------
% Output: None
%
% Notice: To recongnize the axis correctly, there must be one variable
% named as by each axis! For example, if a dimension is named "x", then
% there must be a variable named "x" in the netcdf file.
%
% -------------------------------------------------------------------------
% Example:
% dim_limit_name = {'lon','lat'};
% dim_limit_val = {[90 180],[0 50]};
%
% filename0 = 'sss_binned_L3_MON_SCI_V3.0_CAP_2013.nc';
% filename1 = ['part' datestr(now,'HH_MM_SS') '.nc'];
%
% FUN_nc_copy_with_limit( filename0, filename1, dim_limit_name, dim_limit_val )
% -------------------------------------------------------------------------
%
% v1.26 by L. Chi (helped by Codex):
% Support non-contiguous 1-based index lists in dim_limit_val, using
% the same discrete-index behavior as FUN_nc_varget_enhanced_region_2_multifile.
%
% v1.25 by L. Chi: use onCleanup to make sure the netcdf files will be closed
% % even if an error occurs
% v1.24 by L. Chi: Output warning information for out of range dimension.
%
% v1.23 by L. Chi: support copy variables by indexes at specific dimensions, see "dim_varname" below
% v1.22 by L. Chi: support rare data types
% V1.21 by L. Chi:
% It is possible to specify a dimension whose chunksize will be
% forced to 1. see optional paramter "chunksize1_dim_name".
%
% V1.20 by L. Chi:
% Estimate the chunksize automatically.
% (Please not that if the chunksize in the sources (filename0) will not be used in the destination (filename1))
% This may be included in a future version.
% V1.10 by L. Chi:
% Support nondemensional variables.
% V1.00
% By L.Chi V1.00 2016-10-24 (L.Chi.Ocean@outlook.com)
% -------------------------------------------------------------------------
% ---- default values --------------------------------------------------
if ~exist('is_compressed_output','var') || isempty( is_compressed_output )
is_compressed_output = true;
end
% ---- optional parameters --------------------------------------------------
% dim_varname [cell, optional]: name of the variable defining the axis at each dimension.
% + by default, each axis is defined by a variable sharing the same name as the dimension.
% + "dim_varname{1} = nan" will force the dimension assicated with
% an vector defined as 1, 2, 3, ... Nx, where Nx is the length
% of the dimension, ingnoring the variable shares the same name
% with this dimension (if it exists)
% + dim_varname can also caontain arrays to set the longitude,
% latitude, time, etc, manually instead of reading them from the
% netcdf file. E.g., dim_varname = { [-82:1/4:-55], [26:1/4:45]};
[dim_varname, varargin] = FUN_codetools_read_from_varargin( varargin, 'dim_varname', dim_limit_name );
% is_auto_chunksize: replace the default setting for chunksize by a customed equation in Easy_NetCDF
[is_auto_chunksize, varargin] = FUN_codetools_read_from_varargin( varargin, 'is_auto_chunksize', false );
[chunksize1_dim_name, varargin] = FUN_codetools_read_from_varargin( varargin, 'chunksize1_dim_name', [] );
% is_add_preset_att: add some preset attributes in the output files, like "Copy Source", "Copy Date", "Copy Range".
[is_add_preset_att, varargin] = FUN_codetools_read_from_varargin( varargin, 'is_add_preset_att', true );
% Skip the whole file if any empty dim found!
% set is_skip_with_any_empty_dim = false will cause error message if empty
% dimension appears
[is_skip_with_any_empty_dim, varargin] = FUN_codetools_read_from_varargin( varargin, 'is_skip_with_any_empty_dim', false );
% variables to be included. var_included is empty => including all
% variables
[var_included, varargin] = FUN_codetools_read_from_varargin( varargin, 'var_included', {} );
% variables to be exclueded
[var_excluded, varargin] = FUN_codetools_read_from_varargin( varargin, 'var_excluded', {} );
if length( varargin ) > 0
error('Unkown parameters found!')
end
%% Load the original data
info0 = ncinfo(filename0);
ncid0 = netcdf.open( filename0, 'NOWRITE' );
cleanup_ncid0 = onCleanup(@() netcdf.close(ncid0));
%% prepare dimensions
for ii = 1:length(info0.Dimensions)
% decide wehter this dim should be loaded partly.
dim_cmp_loc = strcmp( info0.Dimensions(ii).Name, dim_limit_name );
if any( dim_cmp_loc )
% load by part
% interface
tem = 1:length(dim_limit_name);
ij = tem(dim_cmp_loc);% for dim_limit_name & dim_limit_val
dim_name_now = dim_limit_name{ij};
dim_varname_now = dim_varname{ij};
% determine the dimension variable
if ischar(dim_varname_now) || isstring(dim_varname_now)
varid_now = netcdf.inqVarID(ncid0, dim_varname_now ) ;
var_now = netcdf.getVar(ncid0, varid_now ) ;
elseif isnan(dim_varname_now)
dimid_now = netcdf.inqDimID(ncid0, dim_name_now ) ;
[~, dimlen] = netcdf.inqDim(ncid0, dimid_now);
var_now = 1:dimlen;
elseif isnumeric(dim_varname_now)
var_now = dim_varname_now;
else
error('dim_varname can only be char, nan, or numeric array!')
end
[start, count, ind] = FUN_nc_varget_sub_genStartCount( var_now, dim_limit_val{ij} );
if count == 0
warning([' Dim: ' dim_name_now ' (min: ' num2str(min(var_now)) ' - max: ' num2str(max(var_now)) ') out of the required range: ' num2str(dim_limit_val{ij}(1) ) ' - ' num2str(dim_limit_val{ij}(2) ) ])
end
info1.Dim(ii).Name = dim_name_now;
info1.Dim(ii).Length = count;
info1.Dim(ii).MatInd = ii; % Location of this variable in the Dim Matrix
%info1.Dim(ii).originalVal = var_now;
info1.Dim(ii).start = start;
info1.Dim(ii).count = count;
info1.Dim(ii).ind = ind;
info1.Dim(ii).is_seleted = true;
else
info1.Dim(ii).Name = info0.Dimensions(ii).Name;
info1.Dim(ii).Length = info0.Dimensions(ii).Length;
info1.Dim(ii).MatInd = ii;
%info1.Dim(ii).originalVal = [];
info1.Dim(ii).start = 0;
info1.Dim(ii).count = info1.Dim(ii).Length;
info1.Dim(ii).ind = 1:info1.Dim(ii).Length ;
info1.Dim(ii).is_seleted = false;
end
end
if is_skip_with_any_empty_dim && any( [info1.Dim(:).count] ==0)
warning(' dimension with zero count found, skip copying file!')
return
end
%% open new file and write dimensions
ncid1 = netcdf.create(filename1,'NETCDF4');
cleanup_ncid1 = onCleanup(@() netcdf.close(ncid1));
for ii = 1:length( info1.Dim )
dimID1(ii) = netcdf.defDim(ncid1, info1.Dim(ii).Name , info1.Dim(ii).Length );
end
% set global ATT
for ii = 1:length(info0.Attributes)
netcdf.putAtt( ncid1, netcdf.getConstant('NC_GLOBAL'), info0.Attributes(ii).Name, info0.Attributes(ii).Value);
end
is_copy_with_incontinuous_dim = any(isnan([info1.Dim.start]) & [info1.Dim.count] > 0);
if is_add_preset_att
netcdf.putAtt( ncid1, netcdf.getConstant('NC_GLOBAL'), 'Copy Source', filename0 );
netcdf.putAtt( ncid1, netcdf.getConstant('NC_GLOBAL'), 'Copy Date', datestr(now) );
for ii = 1:length( dim_limit_name )
if ~is_copy_with_incontinuous_dim
netcdf.putAtt( ncid1, netcdf.getConstant('NC_GLOBAL'), ['Copy Range-' num2str(ii)], [dim_limit_name{ii} ' ' num2str( dim_limit_val{ii} )] );
end
end
end
%% load/write variable
for iv = 1:length(info0.Variables)
if isempty( var_included ) || any(strcmpi( var_included, info0.Variables(iv).Name ) )
else
fprintf(' var %s is skipped since it is not listed in "var_included" \n', info0.Variables(iv).Name)
continue
end
if ~isempty( var_excluded ) && any(strcmpi( var_excluded, info0.Variables(iv).Name ) )
fprintf(' var %s is skipped since it is listed in "var_excluded" \n', info0.Variables(iv).Name)
continue
end
% Prepare for varialbes
VarDim_now = info0.Variables(iv).Dimensions;
if isempty( VarDim_now )
% A variable can be defined without any dimensional info.
is_var_with_dim = false;
VarDimIND_now = [];
else
is_var_with_dim = true;
end
for id = 1:length( VarDim_now )
VarDimIND_now(id) = FUN_struct_value_for_specific_name( info1.Dim, 'Name', VarDim_now(id).Name, 'MatInd' );
end
start = [];
count = [];
strid = [];
for id = 1:length( VarDimIND_now )
start = [start info1.Dim( VarDimIND_now(id) ).start];
count = [count info1.Dim( VarDimIND_now(id) ).count];
strid = [strid 1];%stride
end
% Define Variable -----------------------------------------------------
if iv > 1
netcdf.reDef(ncid1)
end
[var_type, is_dv_success] = FUN_nc_defVar_datatypeconvert(info0.Variables(iv).Datatype);
% searching variable tpye from netcdf.getConstantNames
if ~is_dv_success
disp('finding data type by searching netcdf.getConstantNames')
var_type = FUN_nc_get_var_type_by_name( ncid0, info0.Variables(iv).Name );
disp(['datatype for var [' info0.Variables(iv).Name '] is [' var_type ']']);
end
varID1 = netcdf.defVar( ncid1, ...
info0.Variables(iv).Name, ...
var_type, ...
dimID1( VarDimIND_now ) );
% set compression level
if is_compressed_output && is_var_with_dim
%It is not necessary to compress a variable without dimensions.
netcdf.defVarDeflate( ncid1, varID1, true, true, 1);%compression level-1 basic
end
% set chunk size (not necessary for non-dimensional var)
if ~isempty(chunksize1_dim_name)
ind_dim_chunk1 = find(strcmpi({VarDim_now.Name},chunksize1_dim_name));
if ~isempty( ind_dim_chunk1 )
if length( VarDim_now ) == 1
% skip the variable with only one dimension, which is chunksize1_dim_name.
else
tmp_chunksize = count;
tmp_chunksize(ind_dim_chunk1) = 1;
netcdf.defVarChunking( ncid1, varID1, 'CHUNKED', tmp_chunksize );
end
end
elseif is_auto_chunksize && is_var_with_dim
tmp_bytes_per_val = FUN_nc_internal_bytes_per_value( info0.Variables(iv).Datatype );
tmp_chunksize = FUN_nc_internal_calc_chunk( count, tmp_bytes_per_val );
netcdf.defVarChunking( ncid1, varID1, 'CHUNKED', tmp_chunksize );
end
% Add attribute ----------------------------
for ii = 1:length(info0.Variables(iv).Attributes)
if strcmp( info0.Variables(iv).Attributes(ii).Name, '_FillValue')
% _FillValue can only be written by specific commends.
netcdf.defVarFill( ncid1, varID1, false, info0.Variables(iv).Attributes(ii).Value )
else
netcdf.putAtt( ncid1, varID1, info0.Variables(iv).Attributes(ii).Name, info0.Variables(iv).Attributes(ii).Value);
end
end
netcdf.endDef(ncid1)
% write varialbe ------------------------------------------------------
varID0 = netcdf.inqVarID( ncid0, info0.Variables(iv).Name );
if is_var_with_dim
if is_copy_with_incontinuous_dim
var_dim_info = repmat(struct('Name', [], 'start', 0, 'count', 0, 'ind', []), 1, length(VarDimIND_now));
for id = 1:length(VarDimIND_now)
dim_info_now = info1.Dim( VarDimIND_now(id) );
var_dim_info(id).Name = dim_info_now.Name;
var_dim_info(id).start = dim_info_now.start;
var_dim_info(id).count = dim_info_now.count;
var_dim_info(id).ind = dim_info_now.ind;
end
var_value = FUN_nc_varget_from_vardiminfo( filename0, info0.Variables(iv).Name, var_dim_info );
else
var_value = netcdf.getVar( ncid0, varID0, start, count, strid );
end
else
var_value = netcdf.getVar( ncid0, varID0 );
end
netcdf.putVar( ncid1, varID1, var_value);
netcdf.sync( ncid1 );
clear VarDim_now VarDimIND_now varID1 varID0 var_value
end
% this is unnecessary since the files will be cloesd by onCleanup
% netcdf.close(ncid0);
% netcdf.close(ncid1);
%% test =========================================================
% % % clear all
% % % close all
% % % clc
% % %
% % % %%
% % % dim_limit_name = {'lon','lat'};
% % % dim_limit_val = {[ -90 -50 ]+360, [ 20 70 ]};
% % % filename0 = 'EN.4.1.1.f.analysis.g10.201412.nc';
% % % filename1 = 'EN4201412_seleted6.nc';
% % % FUN_nc_copy_with_limit( filename0, filename1, dim_limit_name, dim_limit_val );
% % %
% % %
% % % %%
% % % lon1 = FUN_nc_varget( filename1,'lon');
% % % lat1 = FUN_nc_varget( filename1,'lat');
% % % depth1 = FUN_nc_varget( filename1,'depth');
% % % t1 = FUN_nc_varget_enhanced( filename1,'temperature');
% % %
% % % %%
% % % figure('position',[100 86 523 862])
% % % subplot(3,1,1)
% % % q_pcolor(lon1,lat1, squeeze( t1(:,:,1) )');
% % % title('t1')
% % %
% % % [ out_dim, t2 ] = FUN_NC_varget_enhanced_region_2( filename0, 'temperature', {'lon','lat','depth','time'}, {[ -90 -50 ]+360, [ 20 70 ],[-inf inf],[-inf inf] });
% % %
% % % subplot(3,1,2)
% % % q_pcolor(lon1,lat1, squeeze( t2(:,:,1) )');
% % % title('t2')
% % %
% % % subplot(3,1,3)
% % % q_pcolor(lon1,lat1, squeeze( t1(:,:,1) - t2(:,:,1) )');
% % % title('t2')
%% test2 ==================================================================
% % % % % For non-contiguous 1-based index lists
% % % %
% % % % filelist = dir('W:\Data_climate\CMIP6\history_AWI-CM-MR_mon\zos_*.nc');
% % % % filelist = filelist(1)
% % % %
% % % %
% % % % lon = FUN_nc_varget_enhanced( fullfile(filelist(1).folder, filelist(1).name), 'lon');
% % % % lat = FUN_nc_varget_enhanced( fullfile(filelist(1).folder, filelist(1).name), 'lat');
% % % %
% % % % ind = find(lon>= 120 & lon <= 125 & lat >= 35 & lat <= 40);
% % % %
% % % %
% % % % % filelist = dir('Demo_*.nc');
% % % % varname = 'zos';
% % % % dim_name = { 'ncells' }; % In the demo files, the meridional dimension is named as "y".
% % % % dim_limit = { ind };
% % % % merge_dim_name = 'time'; % merge data in "time" dimension.
% % % % time_var_name = 'time'; % convert values in "time" to matlab units (days since 0000-01-00 00:00).
% % % % dim_varname = {nan}; % This is to force the function to read values for the meridional dimension from the variable "lat".
% % % %
% % % % [ out_dim, data ] = FUN_nc_varget_enhanced_region_2_multifile( filelist, varname, dim_name, dim_limit, merge_dim_name, time_var_name, dim_varname );
% % % %
% % % % [ ~, lon ] = FUN_nc_varget_enhanced_region_2_multifile( filelist, 'lon', dim_name, dim_limit, merge_dim_name, time_var_name, dim_varname );
% % % % [ ~, lat ] = FUN_nc_varget_enhanced_region_2_multifile( filelist, 'lat', dim_name, dim_limit, merge_dim_name, time_var_name, dim_varname );
% % % %
% % % %
% % % % FUN_nc_copy_with_limit( fullfile(filelist(1).folder, filelist(1).name), 'test1.nc', {'ncells'}, {ind}, false, 'dim_varname',{nan} )
% % % %
% % % %
% % % % lon2 = FUN_nc_varget_enhanced( 'test1.nc','lon');
% % % % lat2 = FUN_nc_varget_enhanced( 'test1.nc','lat');
% % % % data2 = FUN_nc_varget_enhanced( 'test1.nc','zos');
% % % %
% % % %
% % % % isequal(data,data2)