Skip to content

Commit e5b39df

Browse files
committed
feat(ppsci): support data_effient_nopt for training and test
1 parent ab7d6a7 commit e5b39df

File tree

10 files changed

+2594
-25
lines changed

10 files changed

+2594
-25
lines changed

examples/data_efficient_nopt/config/operators_poisson.yaml

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,12 @@ default: &DEFAULT
1212
nx: 256
1313
ny: 256
1414
# optimization
15+
loss_style: 'mean'
16+
loss_func: 'mse'
1517
optimizer: 'adam'
1618
scheduler: 'none'
1719
learning_rate: !!float 1.0
18-
max_epochs: 500
20+
max_epochs: 2
1921
scheduler_epochs: 500
2022
weight_decay: 0
2123
batch_size: 25
@@ -76,11 +78,11 @@ poisson: &poisson
7678

7779
poisson-64-scale-e5_15: &poisson_64_e5_15
7880
<<: *poisson
79-
train_path: '/path/to/poisson_64_e5_15_train.h5'
80-
val_path: '/path/to/poisson_64_e5_15_val.h5'
81-
test_path: '/path/to/poisson_64_e5_15_test.h5'
82-
scales_path: '/path/to/poisson_64_e5_15_train_scale.npy'
83-
train_rand_idx_path: '/path/to/train_rand_idx.npy'
81+
train_path: '/home/aistudio/data_efficient_nopt/data/possion_64/poisson_64_e5_15_train.h5'
82+
val_path: '/home/aistudio/data_efficient_nopt/data/possion_64/poisson_64_e5_15_val.h5'
83+
test_path: '/home/aistudio/data_efficient_nopt/data/possion_64/poisson_64_e5_15_test.h5'
84+
scales_path: '/home/aistudio/data_efficient_nopt/data/possion_64/poisson_64_e5_15_train_scale.npy'
85+
train_rand_idx_path: '/home/aistudio/data_efficient_nopt/data/possion_64/train_rand_idx.npy'
8486
batch_size: 128
8587
log_to_wandb: !!bool True
8688
learning_rate: 1E-3
@@ -99,11 +101,11 @@ poisson-64-scale-e5_15: &poisson_64_e5_15
99101

100102
pois-64-pretrain-e1_20: &pois_64_e1_20_pt
101103
<<: *poisson
102-
train_path: '/path/to/poisson_64_e1_20_train.h5'
103-
val_path: '/path/to/poisson_64_e1_20_val.h5'
104-
test_path: '/path/to/poisson_64_e1_20_test.h5'
105-
scales_path: '/path/to/poisson_64_e1_20_train_scale.npy'
106-
train_rand_idx_path: '/path/to/train_rand_idx.npy'
104+
train_path: '/home/aistudio/data_efficient_nopt/data/possion_64/poisson_64_e1_20_train.h5'
105+
val_path: '/home/aistudio/data_efficient_nopt/data/possion_64/poisson_64_e1_20_val.h5'
106+
test_path: '/home/aistudio/data_efficient_nopt/data/possion_64/poisson_64_e1_20_test.h5'
107+
scales_path: '/home/aistudio/data_efficient_nopt/data/possion_64/poisson_64_e1_20_train_scale.npy'
108+
train_rand_idx_path: '/home/aistudio/data_efficient_nopt/data/possion_64/train_rand_idx.npy'
107109
batch_size: 128
108110
log_to_wandb: !!bool True
109111
mode_cut: 32
@@ -120,11 +122,11 @@ pois-64-pretrain-e1_20: &pois_64_e1_20_pt
120122

121123
pois-64-finetune-e5_15: &pois_64_e5_15_ft
122124
<<: *poisson
123-
train_path: '/path/to/poisson_64_e5_15_train.h5'
124-
val_path: '/path/to/poisson_64_e5_15_val.h5'
125-
test_path: '/path/to/poisson_64_e5_15_test.h5'
126-
scales_path: '/path/to/poisson_64_e5_15_train_scale.npy'
127-
train_rand_idx_path: '/path/to/train_rand_idx.npy'
125+
train_path: '/home/aistudio/data_efficient_nopt/data/possion_64/poisson_64_e5_15_train.h5'
126+
val_path: '/home/aistudio/data_efficient_nopt/data/possion_64/poisson_64_e5_15_val.h5'
127+
test_path: '/home/aistudio/data_efficient_nopt/data/possion_64/poisson_64_e5_15_test.h5'
128+
scales_path: '/home/aistudio/data_efficient_nopt/data/possion_64/poisson_64_e5_15_train_scale.npy'
129+
train_rand_idx_path: '/home/aistudio/data_efficient_nopt/data/possion_64/train_rand_idx.npy'
128130
batch_size: 128
129131
log_to_wandb: !!bool True
130132
mode_cut: 32
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
basic_config: &basic_config
2+
# Run settings
3+
log_to_wandb: !!bool True # Use wandb integration
4+
log_to_screen: !!bool True # Log progress to screen.
5+
save_checkpoint: !!bool True # Save checkpoints
6+
checkpoint_save_interval: 100 # Save every # epochs - also saves "best" according to val loss
7+
debug_grad: !!bool True # Compute gradient/step_sizes/ect for debugging
8+
true_time: !!bool False # Debugging setting - sets num workers to zero and activates syncs
9+
num_data_workers: 12 # Generally pulling 8 cpu per process, so using 6 for DL - not sure if best ratio
10+
enable_amp: !!bool False # Use automatic mixed precision - blows up with low variance fields right now
11+
compile: !!bool False # Compile model - Does not currently work
12+
gradient_checkpointing: !!bool False # Whether to use gradient checkpointing - Slow, but lower memory
13+
exp_dir: './exp' # Output path
14+
log_interval: 1 # How often to log - Don't think this is actually implemented
15+
pretrained: False # Whether to load a pretrained model
16+
vmae_pretrained: False # Whether to load a pretrained model
17+
# wandb settings
18+
project: 'proj_name'
19+
group: 'ns_incomp'
20+
entity: 'entity_name'
21+
# Training settings ################################
22+
mask_ratio: 0. # TODO: % of INvisible tokens: None, 0., >0.
23+
blur: [0, 0] # TODO: range of blur sigma
24+
drop_path: 0.1
25+
batch_size: 4
26+
accum_grad: 2 # Real batch size is accum * batch_size, real steps/"epoch" is epoch_size / accum
27+
scheduler_epochs: -1
28+
pretrain_train: [.9, .1] # TODO:
29+
train_subsample: 1. # TODO:
30+
max_epochs: 500
31+
# epoch_size: 2000 # TODO: Artificial epoch size
32+
epoch_size: 200 # TODO: Artificial epoch size
33+
rescale_gradients: !!bool False # Activate hook that scales block gradients to norm 1
34+
optimizer: 'adan' # adam, adan, whatever else i end up adding - adan did better on HP sweep
35+
scheduler: 'cosine' # Only cosine implemented
36+
warmup_steps: 100 # TODO: Warmup when not using DAdapt
37+
######################################################
38+
learning_rate: -1 # -1 means use DAdapt
39+
weight_decay: 1e-3
40+
n_states: 12 # TODO: Number of state variables across the datasets - Can be larger than real number and things will just go unused
41+
state_names: ['Pressure', 'Vx', 'Vy', 'Density', 'Vx', 'Vy', 'Density', 'Pressure'] # TODO: Should be sorted
42+
dt: 1 # TODO: Striding of data - Not currently implemented > 1
43+
n_steps: 1 # TODO: Length of history to include in input
44+
enforce_max_steps: !!bool False # If false and n_steps > dataset steps, use dataset steps. Otherwise, raise Exception.
45+
# Model settings ####################################
46+
model_type: 'vmae' # vit_small_patch16_224
47+
encoder_embed_dim: 384 # Dimension of internal representation - 192/384/768/1024 for Ti/S/B/L
48+
decoder_embed_dim: 192
49+
encoder_num_heads: 6 # Number of heads for attention - 3/6/12/16 for Ti/S/B/L
50+
decoder_num_heads: 3
51+
decoder_depth: 4
52+
decoder_num_classes: 768
53+
tubelet_size: 1
54+
######################################################
55+
input_size: 512
56+
drop_path_rate: 0.1
57+
init_scale: 0.001
58+
# --num_frames 16 \
59+
# --opt adamw \
60+
# --lr 5e-4 \
61+
# --opt_betas 0.9 0.999 \
62+
# --weight_decay 0.05 \
63+
# --dist_eval \
64+
# --test_num_segment 2 \
65+
# --test_num_crop 3 \
66+
# block_type: 'axial' # Which type of block to use - if axial, next two fields must be set to define axial ops
67+
# time_type: 'attention' # Conditional on block type
68+
# space_type: 'axial_attention' # Conditional on block type
69+
tie_fields: !!bool False # Whether to use 1 embedding per field per data
70+
processor_blocks: 12 # Number of transformer blocks in the backbone - 12/12/12/24 for Ti/S/B/L
71+
patch_size: 16 # Actually currently hardcoded at 16
72+
bias_type: 'rel' # Options rel, continuous, none
73+
# Data settings
74+
train_val_test: [.8, .1, .1]
75+
augmentation: !!bool False # Augmentation not implemented
76+
use_all_fields: !!bool True # Prepopulate the field metadata dictionary from dictionary in datasets
77+
tie_batches: !!bool False # Force everything in batch to come from one dset
78+
extended_names: !!bool False # Whether to use extended names - not currently implemented
79+
embedding_offset: 0 # Use when adding extra finetuning fields
80+
train_data_paths: [
81+
['/path/to/data/PDEBench/2D/NS_incom', 'incompNS', ''],
82+
]
83+
valid_data_paths: [
84+
['/path/to/data/PDEBench/2D/NS_incom', 'incompNS', ''],
85+
]
86+
87+
88+
finetune: &finetune
89+
<<: *basic_config
90+
max_epochs: 500
91+
train_val_test: [.8, .1, .1]
92+
accum_grad: 1
93+
pretrained: !!bool True
94+
group: 'debugging'
95+
pretrained_ckpt_path: '/path/to/training_checkpoints/ckpt.tar'
96+
train_data_paths: [
97+
['/PDEBench/2D/CFD/2D_Train_Turb', 'compNS', 'M1.0'],
98+
]
99+
valid_data_paths: [ # These are the same for all configs - uses split according to train_val_test
100+
['/PDEBench/2D/CFD/2D_Train_Turb', 'compNS', 'M1.0'],
101+
]
102+
embedding_offset: 0 # Number of fields in original model - FT fields start after this
103+
freeze_middle: !!bool False # Whether to freeze the middle layers of the model
104+
freeze_processor: !!bool False
105+
append_datasets: [] # List of datasets to append to the input/output projections for finetuning
Lines changed: 215 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,215 @@
1+
"""
2+
Remember to parameterize the file paths eventually
3+
"""
4+
import os
5+
6+
import numpy as np
7+
from paddle.io import DataLoader
8+
from paddle.io import Dataset
9+
from paddle.io import DistributedBatchSampler
10+
from paddle.io import RandomSampler
11+
12+
from .hdf5_datasets import DiffRe2DDataset
13+
from .hdf5_datasets import IncompNSDataset
14+
from .masking_generator import TubeMaskingGenerator
15+
from .mixed_dset_sampler import MultisetSampler
16+
17+
broken_paths = []
18+
# IF YOU ADD A NEW DSET MAKE SURE TO UPDATE THIS MAPPING SO MIXED DSET KNOWS HOW TO USE IT
19+
DSET_NAME_TO_OBJECT = {
20+
"incompNS": IncompNSDataset,
21+
"diffre2d": DiffRe2DDataset,
22+
}
23+
24+
25+
def get_data_loader(params, paths, distributed, split="train", rank=0, train_offset=0):
26+
# paths, types, include_string = zip(*paths)
27+
train_val_test = params.train_val_test
28+
if split == "pretrain":
29+
train_val_test = [
30+
params.train_val_test[0] * params.pretrain_train[0],
31+
train_val_test[1],
32+
train_val_test[2],
33+
]
34+
split = "train" # then restore to train split
35+
elif split == "train":
36+
# negative means reverse indexing
37+
train_val_test = [
38+
-params.train_val_test[0]
39+
* params.pretrain_train[1]
40+
* params.train_subsample,
41+
train_val_test[1],
42+
train_val_test[2],
43+
]
44+
dataset = MixedDataset(
45+
paths,
46+
n_steps=params.n_steps,
47+
train_val_test=train_val_test,
48+
split=split,
49+
tie_fields=params.tie_fields,
50+
use_all_fields=params.use_all_fields,
51+
enforce_max_steps=params.enforce_max_steps,
52+
train_offset=train_offset,
53+
masking=params.masking if hasattr(params, "masking") else None,
54+
blur=params.blur if hasattr(params, "blur") else None,
55+
rollout=getattr(params, "rollout", 1),
56+
)
57+
# dataset = IncompNSDataset(paths[0], n_steps=params.n_steps, train_val_test=params.train_val_test, split=split)
58+
if distributed:
59+
base_sampler = DistributedBatchSampler
60+
else:
61+
base_sampler = RandomSampler
62+
sampler = MultisetSampler(
63+
dataset,
64+
base_sampler,
65+
params.batch_size,
66+
distributed=distributed,
67+
max_samples=params.epoch_size,
68+
rank=rank,
69+
) # , seed=seed)
70+
# sampler = DistributedBatchSampler(dataset) if distributed else None
71+
dataloader = DataLoader(
72+
dataset,
73+
batch_size=int(params.batch_size),
74+
num_workers=params.num_data_workers,
75+
shuffle=False, # (sampler is None),
76+
drop_last=True,
77+
)
78+
return dataloader, dataset, sampler
79+
80+
81+
class MixedDataset(Dataset):
82+
def __init__(
83+
self,
84+
path_list=[],
85+
n_steps=1,
86+
dt=1,
87+
train_val_test=(0.8, 0.1, 0.1),
88+
split="train",
89+
tie_fields=True,
90+
use_all_fields=True,
91+
extended_names=False,
92+
enforce_max_steps=False,
93+
train_offset=0,
94+
masking=None,
95+
blur=None,
96+
rollout=1,
97+
):
98+
super().__init__()
99+
# Global dicts used by Mixed DSET.
100+
self.train_offset = train_offset
101+
self.path_list, self.type_list, self.include_string = zip(*path_list)
102+
self.tie_fields = tie_fields
103+
self.extended_names = extended_names
104+
self.split = split
105+
self.sub_dsets = []
106+
self.offsets = [0]
107+
self.train_val_test = train_val_test
108+
self.use_all_fields = use_all_fields
109+
self.rollout = rollout
110+
111+
for dset, path, include_string in zip(
112+
self.type_list, self.path_list, self.include_string
113+
):
114+
subdset = DSET_NAME_TO_OBJECT[dset](
115+
path,
116+
include_string,
117+
n_steps=n_steps,
118+
dt=dt,
119+
train_val_test=train_val_test,
120+
split=split,
121+
rollout=self.rollout,
122+
)
123+
# Check to make sure our dataset actually exists with these settings
124+
try:
125+
len(subdset)
126+
except ValueError:
127+
raise ValueError(
128+
f"Dataset {path} is empty. Check that n_steps < trajectory_length in file."
129+
)
130+
self.sub_dsets.append(subdset)
131+
self.offsets.append(self.offsets[-1] + len(self.sub_dsets[-1]))
132+
self.offsets[0] = -1
133+
134+
self.subset_dict = self._build_subset_dict()
135+
136+
self.masking = masking # None or ((#frames, height, width), mask_ratio)
137+
if (
138+
self.masking
139+
and type(self.masking) in [tuple, list]
140+
and len(self.masking) == 2
141+
): # and self.masking[1] > 0.:
142+
self.mask_generator = TubeMaskingGenerator(self.masking[0], self.masking[1])
143+
self.blur = blur
144+
145+
def get_state_names(self):
146+
name_list = []
147+
if self.use_all_fields:
148+
for name, dset in DSET_NAME_TO_OBJECT.items():
149+
field_names = dset._specifics()[2]
150+
name_list += field_names
151+
return name_list
152+
else:
153+
visited = set()
154+
for dset in self.sub_dsets:
155+
name = dset.get_name() # Could use extended names here
156+
if name not in visited:
157+
visited.add(name)
158+
name_list.append(dset.field_names)
159+
return [f for fl in name_list for f in fl] # Flatten the names
160+
161+
def _build_subset_dict(self):
162+
# Maps fields to subsets of variables
163+
if self.tie_fields: # Hardcoded, but seems less effective anyway
164+
subset_dict = {
165+
"swe": [3],
166+
"incompNS": [0, 1, 2],
167+
"compNS": [0, 1, 2, 3],
168+
"diffre2d": [4, 5],
169+
}
170+
elif self.use_all_fields:
171+
cur_max = 0
172+
subset_dict = {}
173+
for name, dset in DSET_NAME_TO_OBJECT.items():
174+
field_names = dset._specifics()[2]
175+
subset_dict[name] = list(range(cur_max, cur_max + len(field_names)))
176+
cur_max += len(field_names)
177+
else:
178+
subset_dict = {}
179+
cur_max = self.train_offset
180+
for dset in self.sub_dsets:
181+
name = dset.get_name(self.extended_names)
182+
if name not in subset_dict:
183+
subset_dict[name] = list(
184+
range(cur_max, cur_max + len(dset.field_names))
185+
)
186+
cur_max += len(dset.field_names)
187+
return subset_dict
188+
189+
def __getitem__(self, index):
190+
file_idx = (
191+
np.searchsorted(self.offsets, index, side="right") - 1
192+
) # which dataset are we are on
193+
local_idx = index - max(self.offsets[file_idx], 0)
194+
195+
x, y = self.sub_dsets[file_idx][local_idx]
196+
try:
197+
x, y = self.sub_dsets[file_idx][local_idx]
198+
except: # noqa
199+
print(
200+
"FAILED AT ", file_idx, local_idx, index, int(os.environ.get("RANK", 0))
201+
)
202+
203+
if (
204+
self.masking
205+
and type(self.masking) in [tuple, list]
206+
and len(self.masking) == 2
207+
): # and self.masking[1] > 0.:
208+
mask = self.mask_generator()
209+
# return x, file_idx, paddle.to_tensor(self.subset_dict[self.sub_dsets[file_idx].get_name()]), bcs, y, mask, x_blur
210+
return x, y, mask
211+
else:
212+
return x, y
213+
214+
def __len__(self):
215+
return sum([len(dset) for dset in self.sub_dsets])

0 commit comments

Comments
 (0)