|
8 | 8 | # -------------------------------------------------------------------------------------------------- |
9 | 9 |
|
10 | 10 | import isodate |
| 11 | +import netCDF4 as nc |
11 | 12 | import numpy as np |
12 | 13 | import os |
13 | 14 | import r2d2 |
14 | | -import netCDF4 as nc |
| 15 | +import shutil |
15 | 16 | from typing import Union |
16 | 17 |
|
17 | 18 | from datetime import timedelta, datetime as dt |
@@ -180,8 +181,22 @@ def execute(self) -> None: |
180 | 181 | try: |
181 | 182 | r2d2.fetch(**fetch_criteria) |
182 | 183 | self.logger.info(f"Successfully fetched {target_file}") |
183 | | - except Exception as e: |
184 | | - self.logger.info(f"Failed to fetch {target_file}: {str(e)}") |
| 184 | + except Exception: |
| 185 | + self.logger.info( |
| 186 | + f"Failed to fetch {target_file}. " |
| 187 | + "Fetch empty observation instead." |
| 188 | + ) |
| 189 | + |
| 190 | + # fetch empty obs |
| 191 | + r2d2.fetch( |
| 192 | + item='observation', |
| 193 | + provider='empty_provider', |
| 194 | + observation_type='empty_type', |
| 195 | + file_extension='nc4', |
| 196 | + window_start='19700101T030000Z', |
| 197 | + window_length='PT6H', |
| 198 | + target_file=target_file, |
| 199 | + ) |
185 | 200 |
|
186 | 201 | # Check how many of the combine_input_files exist in the cycle directory. |
187 | 202 | # If all of them are missing proceed without creating an observation input |
@@ -441,85 +456,106 @@ def read_and_combine(self, input_filenames: list, output_filename: str) -> None: |
441 | 456 | existing_files = [f for f in input_filenames if os.path.exists(f)] |
442 | 457 | input_filenames = existing_files |
443 | 458 |
|
444 | | - # Loop through the input files and get the total dimension size for each dimension |
445 | | - # Location requires special handling to get the cumulative sum of the dimension size |
446 | | - # --------------------------------------------------------------------------------- |
447 | | - out_dim_size = {'Location': 0} |
448 | | - for input_filename in input_filenames: |
449 | | - with nc.Dataset(input_filename, 'r') as ds: |
450 | | - for dim_name, dim in ds.dimensions.items(): |
451 | | - if dim_name == 'Location': |
452 | | - out_dim_size[dim_name] += dim.size |
| 459 | + # Remove empty files from input_filenames |
| 460 | + # ------------------------------------------------------------- |
| 461 | + valid_files = [] |
| 462 | + |
| 463 | + for fname in input_filenames: |
| 464 | + try: |
| 465 | + with nc.Dataset(fname, 'r') as ds: |
| 466 | + if 'Location' in ds.dimensions and ds.dimensions['Location'].size > 0: |
| 467 | + valid_files.append(fname) |
453 | 468 | else: |
454 | | - out_dim_size[dim_name] = dim.size |
455 | | - |
456 | | - with nc.Dataset(output_filename, 'w') as out_ds: |
457 | | - # Open the input NetCDF files for reading |
458 | | - # --------------------------------------- |
459 | | - self.logger.info(f"Combining files {input_filenames} ") |
460 | | - |
461 | | - # Create an output file template based on the first input file |
462 | | - # ------------------------------------------------------------ |
463 | | - with nc.Dataset(input_filenames[0], 'r') as ds: |
464 | | - # Access groups and create dimensions |
465 | | - # ----------------------------------- |
466 | | - input_groups = ds.groups.keys() |
467 | | - |
468 | | - for dim_name, dim in ds.dimensions.items(): |
469 | | - out_ds.createDimension(dim_name, out_dim_size[dim_name]) |
470 | | - |
471 | | - # Loop through groups and process variables |
472 | | - # ----------------------------------------- |
473 | | - for group_name in input_groups: |
474 | | - group = ds[group_name] |
475 | | - |
476 | | - # Create the groups in output file |
477 | | - # -------------------------------- |
478 | | - out_group = out_ds.createGroup(group_name) |
479 | | - |
480 | | - # Access variables within a group |
481 | | - # ------------------------------- |
482 | | - variables_in_group = group.variables.keys() |
483 | | - |
484 | | - # Loop over variables from input files, combine, and write to the new file |
485 | | - # ------------------------------------------------------------------------ |
486 | | - for var_name in variables_in_group: |
487 | | - list_data = [] |
488 | | - |
489 | | - # Get the dimensions of the variable |
490 | | - # ---------------------------------- |
491 | | - var_dims = group[var_name].dimensions |
492 | | - |
493 | | - # Loop over all the files and combine the variable data into a list |
494 | | - # Channel dimensions remain the same, so we can break the loop |
495 | | - # ---------------------------------------------------------------- |
496 | | - for input_file in input_filenames: |
497 | | - list_data.append(self.get_data(input_file, group_name, var_name)) |
498 | | - # Only break if the first dimension is Channel |
499 | | - if var_dims[0] == 'Channel': |
500 | | - break |
501 | | - |
502 | | - # Concatenate the masked arrays along the first dimension |
503 | | - # -------------------------------------------------------- |
504 | | - variable_data = np.ma.concatenate(list_data, axis=0) |
505 | | - |
506 | | - # Fill value needs to be assigned while creating variables |
507 | | - # -------------------------------------------------------- |
508 | | - subset_var = out_group.createVariable( |
509 | | - var_name, |
510 | | - variable_data.dtype, |
511 | | - var_dims, |
512 | | - fill_value=group[var_name].getncattr('_FillValue') |
513 | | - ) |
514 | | - for attr_name in group[var_name].ncattrs(): |
515 | | - if attr_name == '_FillValue': |
516 | | - continue |
517 | | - subset_var.setncattr( |
518 | | - attr_name, group[var_name].getncattr(attr_name) |
519 | | - ) |
| 469 | + empty_template = fname |
| 470 | + except OSError: |
| 471 | + continue |
520 | 472 |
|
521 | | - # Write subset data to the new file |
| 473 | + input_filenames = valid_files |
| 474 | + |
| 475 | + if input_filenames: |
| 476 | + # Loop through the input files and get the total dimension size for each dimension |
| 477 | + # Location requires special handling to get the cumulative sum of the dimension size |
| 478 | + # --------------------------------------------------------------------------------- |
| 479 | + out_dim_size = {'Location': 0} |
| 480 | + for input_filename in input_filenames: |
| 481 | + with nc.Dataset(input_filename, 'r') as ds: |
| 482 | + for dim_name, dim in ds.dimensions.items(): |
| 483 | + if dim_name == 'Location': |
| 484 | + out_dim_size[dim_name] += dim.size |
| 485 | + else: |
| 486 | + out_dim_size[dim_name] = dim.size |
| 487 | + |
| 488 | + with nc.Dataset(output_filename, 'w') as out_ds: |
| 489 | + # Open the input NetCDF files for reading |
| 490 | + # --------------------------------------- |
| 491 | + self.logger.info(f"Combining files {input_filenames} ") |
| 492 | + |
| 493 | + # Create an output file template based on the first input file |
| 494 | + # ------------------------------------------------------------ |
| 495 | + with nc.Dataset(input_filenames[0], 'r') as ds: |
| 496 | + # Access groups and create dimensions |
| 497 | + # ----------------------------------- |
| 498 | + input_groups = ds.groups.keys() |
| 499 | + |
| 500 | + for dim_name, dim in ds.dimensions.items(): |
| 501 | + out_ds.createDimension(dim_name, out_dim_size[dim_name]) |
| 502 | + |
| 503 | + # Loop through groups and process variables |
| 504 | + # ----------------------------------------- |
| 505 | + for group_name in input_groups: |
| 506 | + group = ds[group_name] |
| 507 | + |
| 508 | + # Create the groups in output file |
522 | 509 | # -------------------------------- |
523 | | - subset_var[:] = variable_data |
| 510 | + out_group = out_ds.createGroup(group_name) |
| 511 | + |
| 512 | + # Access variables within a group |
| 513 | + # ------------------------------- |
| 514 | + variables_in_group = group.variables.keys() |
| 515 | + |
| 516 | + # Loop over variables from input files, combine, and write to the new file |
| 517 | + # ------------------------------------------------------------------------ |
| 518 | + for var_name in variables_in_group: |
| 519 | + list_data = [] |
| 520 | + |
| 521 | + # Get the dimensions of the variable |
| 522 | + # ---------------------------------- |
| 523 | + var_dims = group[var_name].dimensions |
| 524 | + |
| 525 | + # Loop over all the files and combine the variable data into a list |
| 526 | + # Channel dimensions remain the same, so we can break the loop |
| 527 | + # ---------------------------------------------------------------- |
| 528 | + for input_file in input_filenames: |
| 529 | + list_data.append(self.get_data(input_file, group_name, var_name)) |
| 530 | + # Only break if the first dimension is Channel |
| 531 | + if var_dims[0] == 'Channel': |
| 532 | + break |
| 533 | + |
| 534 | + # Concatenate the masked arrays along the first dimension |
| 535 | + # -------------------------------------------------------- |
| 536 | + variable_data = np.ma.concatenate(list_data, axis=0) |
| 537 | + |
| 538 | + # Fill value needs to be assigned while creating variables |
| 539 | + # -------------------------------------------------------- |
| 540 | + subset_var = out_group.createVariable( |
| 541 | + var_name, |
| 542 | + variable_data.dtype, |
| 543 | + var_dims, |
| 544 | + fill_value=group[var_name].getncattr('_FillValue') |
| 545 | + ) |
| 546 | + for attr_name in group[var_name].ncattrs(): |
| 547 | + if attr_name == '_FillValue': |
| 548 | + continue |
| 549 | + subset_var.setncattr( |
| 550 | + attr_name, group[var_name].getncattr(attr_name) |
| 551 | + ) |
| 552 | + |
| 553 | + # Write subset data to the new file |
| 554 | + # -------------------------------- |
| 555 | + subset_var[:] = variable_data |
| 556 | + |
| 557 | + else: |
524 | 558 |
|
| 559 | + # If all the files are empty copy of them as the output file |
| 560 | + shutil.copyfile(empty_template, output_filename) |
525 | 561 | # ---------------------------------------------------------------------------------------------- |
0 commit comments