PaddlePaddle
diff --git a/‎examples/data_efficient_nopt/config/operators_poisson.yaml
Lines changed: 18 additions & 16 deletions b/‎examples/data_efficient_nopt/config/operators_poisson.yaml
Lines changed: 18 additions & 16 deletions
diff --git a/‎examples/data_efficient_nopt/config/vmae_config_pretrain.yaml
Lines changed: 105 additions & 0 deletions b/‎examples/data_efficient_nopt/config/vmae_config_pretrain.yaml
Lines changed: 105 additions & 0 deletions
diff --git a/‎examples/data_efficient_nopt/data_utils/datasets.py
Lines changed: 215 additions & 0 deletions b/‎examples/data_efficient_nopt/data_utils/datasets.py
Lines changed: 215 additions & 0 deletions
@@ -12,10 +12,12 @@ default: &DEFAULT
   nx: 256
   ny: 256
   # optimization
+  loss_style: 'mean'
+  loss_func: 'mse'
   optimizer: 'adam'
   scheduler: 'none'
   learning_rate: !!float 1.0
-  max_epochs: 500
+  max_epochs: 2
   scheduler_epochs: 500
   weight_decay: 0
   batch_size: 25
@@ -76,11 +78,11 @@ poisson: &poisson
 
 poisson-64-scale-e5_15: &poisson_64_e5_15
   <<: *poisson
-  train_path:  '/path/to/poisson_64_e5_15_train.h5'
-  val_path:    '/path/to/poisson_64_e5_15_val.h5'
-  test_path:   '/path/to/poisson_64_e5_15_test.h5'
-  scales_path: '/path/to/poisson_64_e5_15_train_scale.npy'
-  train_rand_idx_path: '/path/to/train_rand_idx.npy'
+  train_path:  '/home/aistudio/data_efficient_nopt/data/possion_64/poisson_64_e5_15_train.h5'
+  val_path:    '/home/aistudio/data_efficient_nopt/data/possion_64/poisson_64_e5_15_val.h5'
+  test_path:   '/home/aistudio/data_efficient_nopt/data/possion_64/poisson_64_e5_15_test.h5'
+  scales_path: '/home/aistudio/data_efficient_nopt/data/possion_64/poisson_64_e5_15_train_scale.npy'
+  train_rand_idx_path: '/home/aistudio/data_efficient_nopt/data/possion_64/train_rand_idx.npy'
   batch_size: 128
   log_to_wandb: !!bool True
   learning_rate: 1E-3
@@ -99,11 +101,11 @@ poisson-64-scale-e5_15: &poisson_64_e5_15
 
 pois-64-pretrain-e1_20: &pois_64_e1_20_pt
   <<: *poisson
-  train_path:   '/path/to/poisson_64_e1_20_train.h5'
-  val_path:     '/path/to/poisson_64_e1_20_val.h5'
-  test_path:    '/path/to/poisson_64_e1_20_test.h5'
-  scales_path:  '/path/to/poisson_64_e1_20_train_scale.npy'
-  train_rand_idx_path: '/path/to/train_rand_idx.npy'
+  train_path:   '/home/aistudio/data_efficient_nopt/data/possion_64/poisson_64_e1_20_train.h5'
+  val_path:     '/home/aistudio/data_efficient_nopt/data/possion_64/poisson_64_e1_20_val.h5'
+  test_path:    '/home/aistudio/data_efficient_nopt/data/possion_64/poisson_64_e1_20_test.h5'
+  scales_path:  '/home/aistudio/data_efficient_nopt/data/possion_64/poisson_64_e1_20_train_scale.npy'
+  train_rand_idx_path: '/home/aistudio/data_efficient_nopt/data/possion_64/train_rand_idx.npy'
   batch_size: 128
   log_to_wandb: !!bool True
   mode_cut: 32
@@ -120,11 +122,11 @@ pois-64-pretrain-e1_20: &pois_64_e1_20_pt
 
 pois-64-finetune-e5_15: &pois_64_e5_15_ft
   <<: *poisson
-  train_path:   '/path/to/poisson_64_e5_15_train.h5'
-  val_path:     '/path/to/poisson_64_e5_15_val.h5'
-  test_path:    '/path/to/poisson_64_e5_15_test.h5'
-  scales_path:  '/path/to/poisson_64_e5_15_train_scale.npy'
-  train_rand_idx_path: '/path/to/train_rand_idx.npy'
+  train_path:   '/home/aistudio/data_efficient_nopt/data/possion_64/poisson_64_e5_15_train.h5'
+  val_path:     '/home/aistudio/data_efficient_nopt/data/possion_64/poisson_64_e5_15_val.h5'
+  test_path:    '/home/aistudio/data_efficient_nopt/data/possion_64/poisson_64_e5_15_test.h5'
+  scales_path:  '/home/aistudio/data_efficient_nopt/data/possion_64/poisson_64_e5_15_train_scale.npy'
+  train_rand_idx_path: '/home/aistudio/data_efficient_nopt/data/possion_64/train_rand_idx.npy'
   batch_size: 128
   log_to_wandb: !!bool True
   mode_cut: 32
 
@@ -0,0 +1,105 @@
+basic_config: &basic_config
+  # Run settings
+  log_to_wandb: !!bool True # Use wandb integration
+  log_to_screen: !!bool True # Log progress to screen.
+  save_checkpoint: !!bool True # Save checkpoints
+  checkpoint_save_interval: 100 # Save every # epochs - also saves "best" according to val loss
+  debug_grad: !!bool True # Compute gradient/step_sizes/ect for debugging
+  true_time: !!bool False # Debugging setting - sets num workers to zero and activates syncs
+  num_data_workers: 12 # Generally pulling 8 cpu per process, so using 6 for DL - not sure if best ratio
+  enable_amp: !!bool False # Use automatic mixed precision - blows up with low variance fields right now
+  compile: !!bool False # Compile model - Does not currently work
+  gradient_checkpointing: !!bool False # Whether to use gradient checkpointing - Slow, but lower memory
+  exp_dir: './exp' # Output path
+  log_interval: 1 # How often to log - Don't think this is actually implemented
+  pretrained: False # Whether to load a pretrained model
+  vmae_pretrained: False # Whether to load a pretrained model
+  # wandb settings
+  project: 'proj_name'
+  group: 'ns_incomp'
+  entity: 'entity_name'
+  # Training settings ################################
+  mask_ratio: 0. # TODO: % of INvisible tokens: None, 0., >0.
+  blur: [0, 0] # TODO: range of blur sigma
+  drop_path: 0.1
+  batch_size: 4
+  accum_grad: 2 # Real batch size is accum * batch_size, real steps/"epoch" is epoch_size / accum
+  scheduler_epochs: -1
+  pretrain_train: [.9, .1] # TODO:
+  train_subsample: 1. # TODO:
+  max_epochs: 500
+  # epoch_size: 2000 # TODO: Artificial epoch size
+  epoch_size: 200 # TODO: Artificial epoch size
+  rescale_gradients: !!bool False # Activate hook that scales block gradients to norm 1
+  optimizer: 'adan' # adam, adan, whatever else i end up adding - adan did better on HP sweep
+  scheduler: 'cosine' # Only cosine implemented
+  warmup_steps: 100 # TODO: Warmup when not using DAdapt
+  ######################################################
+  learning_rate: -1 # -1 means use DAdapt
+  weight_decay: 1e-3
+  n_states: 12  # TODO: Number of state variables across the datasets - Can be larger than real number and things will just go unused
+  state_names: ['Pressure', 'Vx', 'Vy', 'Density',  'Vx', 'Vy', 'Density', 'Pressure'] # TODO: Should be sorted
+  dt: 1 # TODO: Striding of data - Not currently implemented > 1
+  n_steps: 1 # TODO: Length of history to include in input
+  enforce_max_steps: !!bool False # If false and n_steps > dataset steps, use dataset steps. Otherwise, raise Exception.
+  # Model settings ####################################
+  model_type: 'vmae' # vit_small_patch16_224
+  encoder_embed_dim: 384 # Dimension of internal representation - 192/384/768/1024 for Ti/S/B/L
+  decoder_embed_dim: 192
+  encoder_num_heads: 6 # Number of heads for attention - 3/6/12/16 for Ti/S/B/L
+  decoder_num_heads: 3
+  decoder_depth: 4
+  decoder_num_classes: 768
+  tubelet_size: 1
+  ######################################################
+  input_size: 512
+  drop_path_rate: 0.1
+  init_scale: 0.001
+  # --num_frames 16 \
+  # --opt adamw \
+  # --lr 5e-4 \
+  # --opt_betas 0.9 0.999 \
+  # --weight_decay 0.05 \
+  # --dist_eval \
+  # --test_num_segment 2 \
+  # --test_num_crop 3 \
+  # block_type: 'axial' # Which type of block to use - if axial, next two fields must be set to define axial ops
+  # time_type: 'attention' # Conditional on block type
+  # space_type: 'axial_attention' # Conditional on block type
+  tie_fields: !!bool False # Whether to use 1 embedding per field per data
+  processor_blocks: 12 # Number of transformer blocks in the backbone - 12/12/12/24 for Ti/S/B/L
+  patch_size: 16 # Actually currently hardcoded at 16
+  bias_type: 'rel'  # Options rel, continuous, none
+  # Data settings
+  train_val_test: [.8, .1, .1]
+  augmentation: !!bool False # Augmentation not implemented
+  use_all_fields: !!bool True # Prepopulate the field metadata dictionary from dictionary in datasets
+  tie_batches: !!bool False # Force everything in batch to come from one dset
+  extended_names: !!bool False # Whether to use extended names - not currently implemented
+  embedding_offset: 0  # Use when adding extra finetuning fields
+  train_data_paths: [
+              ['/path/to/data/PDEBench/2D/NS_incom', 'incompNS', ''],
+              ]
+  valid_data_paths: [
+              ['/path/to/data/PDEBench/2D/NS_incom', 'incompNS', ''],
+              ]
+
+
+finetune: &finetune
+  <<: *basic_config
+  max_epochs: 500
+  train_val_test: [.8, .1, .1]
+  accum_grad: 1
+  pretrained: !!bool True
+  group: 'debugging'
+  pretrained_ckpt_path: '/path/to/training_checkpoints/ckpt.tar'
+  train_data_paths: [
+              ['/PDEBench/2D/CFD/2D_Train_Turb', 'compNS', 'M1.0'],
+              ]
+  valid_data_paths: [  # These are the same for all configs - uses split according to train_val_test
+               ['/PDEBench/2D/CFD/2D_Train_Turb', 'compNS', 'M1.0'],
+              ]
+  embedding_offset: 0 # Number of fields in original model - FT fields start after this
+  freeze_middle: !!bool False # Whether to freeze the middle layers of the model
+  freeze_processor: !!bool False
+  append_datasets: [] # List of datasets to append to the input/output projections for finetuning
@@ -0,0 +1,215 @@
+"""
+Remember to parameterize the file paths eventually
+"""
+import os
+
+import numpy as np
+from paddle.io import DataLoader
+from paddle.io import Dataset
+from paddle.io import DistributedBatchSampler
+from paddle.io import RandomSampler
+
+from .hdf5_datasets import DiffRe2DDataset
+from .hdf5_datasets import IncompNSDataset
+from .masking_generator import TubeMaskingGenerator
+from .mixed_dset_sampler import MultisetSampler
+
+broken_paths = []
+# IF YOU ADD A NEW DSET MAKE SURE TO UPDATE THIS MAPPING SO MIXED DSET KNOWS HOW TO USE IT
+DSET_NAME_TO_OBJECT = {
+    "incompNS": IncompNSDataset,
+    "diffre2d": DiffRe2DDataset,
+}
+
+
+def get_data_loader(params, paths, distributed, split="train", rank=0, train_offset=0):
+    # paths, types, include_string = zip(*paths)
+    train_val_test = params.train_val_test
+    if split == "pretrain":
+        train_val_test = [
+            params.train_val_test[0] * params.pretrain_train[0],
+            train_val_test[1],
+            train_val_test[2],
+        ]
+        split = "train"  # then restore to train split
+    elif split == "train":
+        # negative means reverse indexing
+        train_val_test = [
+            -params.train_val_test[0]
+            * params.pretrain_train[1]
+            * params.train_subsample,
+            train_val_test[1],
+            train_val_test[2],
+        ]
+    dataset = MixedDataset(
+        paths,
+        n_steps=params.n_steps,
+        train_val_test=train_val_test,
+        split=split,
+        tie_fields=params.tie_fields,
+        use_all_fields=params.use_all_fields,
+        enforce_max_steps=params.enforce_max_steps,
+        train_offset=train_offset,
+        masking=params.masking if hasattr(params, "masking") else None,
+        blur=params.blur if hasattr(params, "blur") else None,
+        rollout=getattr(params, "rollout", 1),
+    )
+    # dataset = IncompNSDataset(paths[0], n_steps=params.n_steps, train_val_test=params.train_val_test, split=split)
+    if distributed:
+        base_sampler = DistributedBatchSampler
+    else:
+        base_sampler = RandomSampler
+    sampler = MultisetSampler(
+        dataset,
+        base_sampler,
+        params.batch_size,
+        distributed=distributed,
+        max_samples=params.epoch_size,
+        rank=rank,
+    )  # , seed=seed)
+    # sampler = DistributedBatchSampler(dataset) if distributed else None
+    dataloader = DataLoader(
+        dataset,
+        batch_size=int(params.batch_size),
+        num_workers=params.num_data_workers,
+        shuffle=False,  # (sampler is None),
+        drop_last=True,
+    )
+    return dataloader, dataset, sampler
+
+
+class MixedDataset(Dataset):
+    def __init__(
+        self,
+        path_list=[],
+        n_steps=1,
+        dt=1,
+        train_val_test=(0.8, 0.1, 0.1),
+        split="train",
+        tie_fields=True,
+        use_all_fields=True,
+        extended_names=False,
+        enforce_max_steps=False,
+        train_offset=0,
+        masking=None,
+        blur=None,
+        rollout=1,
+    ):
+        super().__init__()
+        # Global dicts used by Mixed DSET.
+        self.train_offset = train_offset
+        self.path_list, self.type_list, self.include_string = zip(*path_list)
+        self.tie_fields = tie_fields
+        self.extended_names = extended_names
+        self.split = split
+        self.sub_dsets = []
+        self.offsets = [0]
+        self.train_val_test = train_val_test
+        self.use_all_fields = use_all_fields
+        self.rollout = rollout
+
+        for dset, path, include_string in zip(
+            self.type_list, self.path_list, self.include_string
+        ):
+            subdset = DSET_NAME_TO_OBJECT[dset](
+                path,
+                include_string,
+                n_steps=n_steps,
+                dt=dt,
+                train_val_test=train_val_test,
+                split=split,
+                rollout=self.rollout,
+            )
+            # Check to make sure our dataset actually exists with these settings
+            try:
+                len(subdset)
+            except ValueError:
+                raise ValueError(
+                    f"Dataset {path} is empty. Check that n_steps < trajectory_length in file."
+                )
+            self.sub_dsets.append(subdset)
+            self.offsets.append(self.offsets[-1] + len(self.sub_dsets[-1]))
+        self.offsets[0] = -1
+
+        self.subset_dict = self._build_subset_dict()
+
+        self.masking = masking  # None or ((#frames, height, width), mask_ratio)
+        if (
+            self.masking
+            and type(self.masking) in [tuple, list]
+            and len(self.masking) == 2
+        ):  # and self.masking[1] > 0.:
+            self.mask_generator = TubeMaskingGenerator(self.masking[0], self.masking[1])
+        self.blur = blur
+
+    def get_state_names(self):
+        name_list = []
+        if self.use_all_fields:
+            for name, dset in DSET_NAME_TO_OBJECT.items():
+                field_names = dset._specifics()[2]
+                name_list += field_names
+            return name_list
+        else:
+            visited = set()
+            for dset in self.sub_dsets:
+                name = dset.get_name()  # Could use extended names here
+                if name not in visited:
+                    visited.add(name)
+                    name_list.append(dset.field_names)
+        return [f for fl in name_list for f in fl]  # Flatten the names
+
+    def _build_subset_dict(self):
+        # Maps fields to subsets of variables
+        if self.tie_fields:  # Hardcoded, but seems less effective anyway
+            subset_dict = {
+                "swe": [3],
+                "incompNS": [0, 1, 2],
+                "compNS": [0, 1, 2, 3],
+                "diffre2d": [4, 5],
+            }
+        elif self.use_all_fields:
+            cur_max = 0
+            subset_dict = {}
+            for name, dset in DSET_NAME_TO_OBJECT.items():
+                field_names = dset._specifics()[2]
+                subset_dict[name] = list(range(cur_max, cur_max + len(field_names)))
+                cur_max += len(field_names)
+        else:
+            subset_dict = {}
+            cur_max = self.train_offset
+            for dset in self.sub_dsets:
+                name = dset.get_name(self.extended_names)
+                if name not in subset_dict:
+                    subset_dict[name] = list(
+                        range(cur_max, cur_max + len(dset.field_names))
+                    )
+                    cur_max += len(dset.field_names)
+        return subset_dict
+
+    def __getitem__(self, index):
+        file_idx = (
+            np.searchsorted(self.offsets, index, side="right") - 1
+        )  # which dataset are we are on
+        local_idx = index - max(self.offsets[file_idx], 0)
+
+        x, y = self.sub_dsets[file_idx][local_idx]
+        try:
+            x, y = self.sub_dsets[file_idx][local_idx]
+        except:  # noqa
+            print(
+                "FAILED AT ", file_idx, local_idx, index, int(os.environ.get("RANK", 0))
+            )
+
+        if (
+            self.masking
+            and type(self.masking) in [tuple, list]
+            and len(self.masking) == 2
+        ):  # and self.masking[1] > 0.:
+            mask = self.mask_generator()
+            # return x, file_idx, paddle.to_tensor(self.subset_dict[self.sub_dsets[file_idx].get_name()]), bcs, y, mask, x_blur
+            return x, y, mask
+        else:
+            return x, y
+
+    def __len__(self):
+        return sum([len(dset) for dset in self.sub_dsets])