feat(ppsci): remove redundant codes

xiaoyewww · xiaoyewww · commit a57c461f206b · 2025-07-12T01:15:37.000+08:00
diff --git a/docs/zh/examples/data_efficient_nopt.md b/docs/zh/examples/data_efficient_nopt.md
@@ -8,6 +8,10 @@
 
 ## 代码信息
 
+|    Model    |       Checkpoint       | **R2** | **Slope** |
+| :---------: | :--------------------: | :----: | :-------: |
+| FNO_Possion | [finetune_b01_m0_n8192](https://dataset.bj.bcebos.com/PaddleScience/data_efficient_nopt/possion_data/finetune_b01_m0_n8192.pdparams) | 0.9765 |   0.9752  |
+
 === "模型训练命令"
 
     ``` sh
@@ -39,14 +43,6 @@
         train_config.pois_64_finetune_e5_15.pretrained_ckpt_path="./data/pretrain_b01_m0.pdparams"
     ```
 
-=== "模型评估命令"
-
-    暂无
-
-=== "模型导出命令"
-
-    暂无
-
 === "模型推理命令"
 
     ``` sh
@@ -207,12 +203,6 @@ examples/data_efficient_nopt/data_efficient_nopt.py
 
 总而言之，这篇论文提出了一种创新且高效的神经算子学习框架，通过无监督预训练在大量廉价的无标签物理数据上学习通用表示，并通过情境学习在推理阶段利用少量相似案例来提升OOD泛化能力。这一框架显著降低了对昂贵模拟数据的需求，并提高了模型在复杂物理问题中的适应性和泛化性，为科学机器学习的数据高效发展开辟了新途径。
 
-下方展示了部分实验结果：
-
-|    Model    |       Checkpoint       | **$RMSE$** | **RMSE (normalized)$** | **R2** | **Slope** |
-| :---------: | :--------------------: | :--------: | :--------------------: | :----: | :-------: |
-| FNO_Possion | finetune_b01_m0_n8192  |   0.2586   |          0.1414        | 0.9765 |   0.9752  |
-
 ## 6. 参考资料
 
 - [Data-Efficient Operator Learning via Unsupervised Pretraining and In-Context Learning](https://arxiv.org/abs/2402.15734)
diff --git a/examples/data_efficient_nopt/data_efficient_nopt.py b/examples/data_efficient_nopt/data_efficient_nopt.py
@@ -35,78 +35,21 @@
 from tqdm import tqdm
 
 from ppsci.arch.data_efficient_nopt_model import YParams
+from ppsci.arch.data_efficient_nopt_model import add_weight_decay
 from ppsci.arch.data_efficient_nopt_model import build_fno
 from ppsci.arch.data_efficient_nopt_model import fno_pretrain as fno
 from ppsci.arch.data_efficient_nopt_model import gaussian_blur
+from ppsci.arch.data_efficient_nopt_model import get_cutoff
+from ppsci.arch.data_efficient_nopt_model import grad_norm
+from ppsci.arch.data_efficient_nopt_model import l2_err
+from ppsci.arch.data_efficient_nopt_model import param_diff
+from ppsci.arch.data_efficient_nopt_model import param_norm
 from ppsci.data.dataset.data_efficient_nopt_dataset import MixedDatasetLoader
 from ppsci.data.dataset.data_efficient_nopt_dataset import PoisHelmDatasetLoader
 
 logger = logging.getLogger(__name__)
 
 
-def l2_err(pred, target, spatial_dim=(-1, -2, -3)):
-    x = paddle.sum((pred - target) ** 2, axis=spatial_dim) / paddle.sum(
-        target**2, axis=spatial_dim
-    )
-    x = paddle.sqrt(x)
-    return paddle.mean(x)
-
-
-def grad_norm(parameters):
-    with paddle.no_grad():
-        total_norm = 0
-        for p in parameters:
-            if p.grad is not None:
-                total_norm += p.grad.data.pow(2).sum().item()
-        return total_norm**0.5
-
-
-def grad_clone(parameters):
-    with paddle.no_grad():
-        clones = []
-        for p in parameters:
-            if p.grad is not None:
-                clones.append(p.grad.clone())
-            else:
-                clones.append(paddle.zeros_like(p))
-        return clones
-
-
-def param_norm(parameters):
-    with paddle.no_grad():
-        total_norm = 0
-        for p in parameters:
-            total_norm += p.pow(2).sum().item()
-        return total_norm**0.5
-
-
-def param_diff(params1, params2):
-    with paddle.no_grad():
-        total_norm = 0
-        for p1, p2 in zip(params1, params2):
-            total_norm += (p2 - p1).pow(2).sum().item()
-        return total_norm**0.5
-
-
-def add_weight_decay(model, weight_decay=1e-5, inner_lr=1e-3, skip_list=()):
-    decay = []
-    no_decay = []
-    for name, param in model.named_parameters():
-        if param.stop_gradient:
-            continue
-        if len(param.squeeze().shape) <= 1 or name in skip_list:
-            no_decay.append(param)
-        else:
-            decay.append(param)
-    return [
-        {
-            "params": no_decay,
-            "weight_decay": 0.0,
-        },
-        {"params": decay, "weight_decay": weight_decay},
-    ]
-
-
 class Trainer:
     def __init__(self, params, global_rank, local_rank, device, sweep_id=None):
         self.device = device
@@ -531,10 +474,7 @@ def validate_one_epoch(self, full=False):
         Note: need to split datasets for meaningful metrics, but TBD.
         """
         self.model.eval()
-        if full:
-            cutoff = 999999999999
-        else:
-            cutoff = 40
+        cutoff = get_cutoff(full=full)
         with paddle.no_grad():
             with amp.auto_cast(enable=False, dtype=self.mp_type):
                 logs = {
diff --git a/ppsci/arch/data_efficient_nopt_model.py b/ppsci/arch/data_efficient_nopt_model.py
@@ -18,24 +18,79 @@
 
 import paddle
 import paddle.nn as nn
-import paddle.nn.functional as F
 import paddle.tensor as Tensor
 
+from ppsci.arch.activation import act_func_dict
 
-def _get_act(activation):
-    if activation == "tanh":
-        func = F.tanh
-    elif activation == "gelu":
-        func = F.gelu
-    elif activation == "relu":
-        func = F.relu_
-    elif activation == "elu":
-        func = F.elu_
-    elif activation == "leaky_relu":
-        func = F.leaky_relu_
-    else:
-        raise ValueError(f"{activation} is not supported")
-    return func
+FULL_MODE_CUTOFF = 999999999999
+NORMAL_MODE_CUTOFF = 40
+
+
+def get_cutoff(full):
+    return FULL_MODE_CUTOFF if full else NORMAL_MODE_CUTOFF
+
+
+def l2_err(pred, target, spatial_dim=(-1, -2, -3)):
+    x = paddle.sum((pred - target) ** 2, axis=spatial_dim) / paddle.sum(
+        target**2, axis=spatial_dim
+    )
+    x = paddle.sqrt(x)
+    return paddle.mean(x)
+
+
+def grad_norm(parameters):
+    with paddle.no_grad():
+        total_norm = 0
+        for p in parameters:
+            if p.grad is not None:
+                total_norm += p.grad.data.pow(2).sum().item()
+        return total_norm**0.5
+
+
+def grad_clone(parameters):
+    with paddle.no_grad():
+        clones = []
+        for p in parameters:
+            if p.grad is not None:
+                clones.append(p.grad.clone())
+            else:
+                clones.append(paddle.zeros_like(p))
+        return clones
+
+
+def param_norm(parameters):
+    with paddle.no_grad():
+        total_norm = 0
+        for p in parameters:
+            total_norm += p.pow(2).sum().item()
+        return total_norm**0.5
+
+
+def param_diff(params1, params2):
+    with paddle.no_grad():
+        total_norm = 0
+        for p1, p2 in zip(params1, params2):
+            total_norm += (p2 - p1).pow(2).sum().item()
+        return total_norm**0.5
+
+
+def add_weight_decay(model, weight_decay=1e-5, inner_lr=1e-3, skip_list=()):
+    decay = []
+    no_decay = []
+    for name, param in model.named_parameters():
+        if param.stop_gradient:
+            continue
+        if len(param.squeeze().shape) <= 1 or name in skip_list:
+            no_decay.append(param)
+        else:
+            decay.append(param)
+    return [
+        {
+            "params": no_decay,
+            "weight_decay": 0.0,
+        },
+        {"params": decay, "weight_decay": weight_decay},
+    ]
 
 
 def compl_mul2d_v2(a: paddle.Tensor, b: paddle.Tensor) -> paddle.Tensor:
@@ -141,7 +196,7 @@ def __init__(
             ]
         )
 
-        self.activation = _get_act(activation)
+        self.activation = act_func_dict[activation]
 
     def forward(self, x):
         """
@@ -205,7 +260,7 @@ def __init__(
         self.dropout = nn.Dropout(p=dropout)
         self.fc1 = nn.Linear(layers[-1], fc_dim)
         self.fc2 = nn.Linear(fc_dim, out_dim)
-        self.activation = _get_act(activation)
+        self.activation = act_func_dict[activation]
         self.mean_constraint = mean_constraint
 
     def forward(self, x):
@@ -387,7 +442,7 @@ def __init__(
             layers[-1] * (n_demos + 1) + out_dim * n_demos * self.num_heads, fc_dim
         )
         self.fc2 = nn.Linear(fc_dim, out_dim)
-        self.activation = _get_act(activation)
+        self.activation = act_func_dict[activation]
         self.mean_constraint = mean_constraint
         self.n_demos = n_demos
 
@@ -504,8 +559,7 @@ def __init__(
         self.l_attn = l_attn
         self.fc1 = nn.Linear(self.C_fno, fc_dim)
         self.fc2 = nn.Linear(fc_dim, out_dim)
-        #########################
-        self.activation = _get_act(activation)
+        self.activation = act_func_dict[activation]
         self.mean_constraint = mean_constraint
         self.n_demos = n_demos
 
@@ -682,7 +736,7 @@ def __init__(
         )
         self.dropout = nn.Dropout(p=dropout)
         self.encoder_to_decoder = nn.Linear(self.C_fno, self.C_fno)
-        self.activation = _get_act(activation)
+        self.activation = act_func_dict[activation]
         self.mean_constraint = mean_constraint
 
     def forward(self, x, mask=None):
diff --git a/ppsci/data/dataset/data_efficient_nopt_dataset.py b/ppsci/data/dataset/data_efficient_nopt_dataset.py
@@ -14,9 +14,6 @@
 #
 # refs: https://github.com/delta-lab-ai/data_efficient_nopt
 
-"""
-Remember to parameterize the file paths eventually
-"""
 import glob
 import logging
 import os
@@ -32,6 +29,8 @@
 from paddle.io import RandomSampler
 from paddle.io import Sampler
 
+logger = logging.getLogger(__name__)
+
 __all__ = [
     "MultisetSampler",
 ]
@@ -167,7 +166,7 @@ def _get_directory_stats(self, path):
                     with h5py.File(file, "r") as _f:
                         samples, steps = self._get_specific_stats(_f)
                         if steps - self.n_steps - (self.dt - 1) < 1:
-                            print(
+                            logger.warning(
                                 "WARNING: File {} has {} steps, but n_steps is {}. Setting file steps = max allowable.".format(
                                     file, steps, self.n_steps
                                 )
@@ -209,7 +208,7 @@ def _get_directory_stats(self, path):
                             + (steps - file_nsteps - (self.dt - 1)) * split_samples
                         )
                 except:  # noqa
-                    print(
+                    logger.warning(
                         "WARNING: Failed to open file {}. Continuing without it.".format(
                             file
                         )
@@ -221,13 +220,12 @@ def _get_directory_stats(self, path):
         self.len = self.offsets[-1]
         if self.split_level == "file":
             if self.train_val_test is None:
-                print(
+                logger.warning(
                     "WARNING: No train/val/test split specified. Using all data for training."
                 )
                 self.split_offset = 0
                 self.len = self.offsets[-1]
             else:
-                print("Using train/val/test split: {}".format(self.train_val_test))
                 total_samples = sum(self.file_samples)
                 if (
                     self.train_val_test[1] * total_samples < 1
@@ -495,9 +493,9 @@ def __iter__(self) -> Iterator[T_co]:
                     for d in queue:
                         yield d
             except Exception as err:
-                print("ERRRR", err)
+                logger.error("ERRRR", err)
                 sampler_choices.pop(index_sampled)
-                print(
+                logger.warning(
                     f"Note: dset {dset_sampled} fully used. Dsets remaining: {len(sampler_choices)}"
                 )
                 continue
@@ -938,7 +936,7 @@ def __getitem__(self, index):
         try:
             x, y = self.sub_dsets[file_idx][local_idx]
         except:  # noqa
-            print(
+            logger.error(
                 "FAILED AT ", file_idx, local_idx, index, int(os.environ.get("RANK", 0))
             )