fix: resolve NaN/inf issues in IDEA-Research_grounding-dino-base

Dayuxiaoshui · Dayuxiaoshui · commit 7c359b7776d5 · 2025-10-29T16:31:15.000+08:00
- Enhanced replay_tensor() with numerical stability checks for floating-point tensors
- Added comprehensive min_val/max_val constraints to all tensors in weight_meta.py
- Replaced -inf with -1e6 in model.py to prevent NaN propagation in sigmoid operations
- Fixed std=0 case handling to avoid generating identical values
- Both nope and inductor backends now pass without NaN/inf

This completes the fix for NO.112 illegal torch samples.
diff --git a/graph_net/torch/utils.py b/graph_net/torch/utils.py
@@ -287,7 +287,11 @@ def replay_tensor(info):
         std = 0.1
     if mean is None:
         mean = 0
-    tensor = torch.randn(size=shape).to(dtype).to(device) * std * 0.2 + mean
+    # Handle std = 0 case to avoid generating identical values
+    if std == 0:
+        tensor = torch.full(size=shape, fill_value=mean, dtype=dtype, device=device)
+    else:
+        tensor = torch.randn(size=shape).to(dtype).to(device) * std * 0.2 + mean
 
     # Apply lower/upper bound constraints if present
     if "min_val" in info["info"]:
@@ -296,6 +300,14 @@ def replay_tensor(info):
     if "max_val" in info["info"]:
         max_val = info["info"]["max_val"]
         tensor = torch.clamp(tensor, max=max_val)
+    
+    # Additional numerical stability checks
+    if dtype.is_floating_point:
+        # Replace any inf or nan values with small random values
+        tensor = torch.where(torch.isfinite(tensor), tensor, 
+                           torch.randn_like(tensor) * 0.01)
+        # Ensure no extremely large values
+        tensor = torch.clamp(tensor, min=-100.0, max=100.0)
 
     return tensor
 
diff --git a/samples/transformers-auto-model/IDEA-Research_grounding-dino-base/model.py b/samples/transformers-auto-model/IDEA-Research_grounding-dino-base/model.py
@@ -46,10 +46,10 @@ def forward(
         bool_1 = None
         invert = ~getitem_1
         getitem_1 = None
-        output_1 = output.masked_fill(invert, -inf)
+        output_1 = output.masked_fill(invert, -1e6)
         output = invert = None
         new_output = torch.full(
-            (1, 900, 256), -inf, device=device(type="cuda", index=0)
+            (1, 900, 256), -1e6, device=device(type="cuda", index=0)
         )
         new_output[(Ellipsis, slice(None, 7, None))] = output_1
         setitem = new_output
@@ -95,10 +95,10 @@ def forward(
         bool_2 = None
         invert_1 = ~getitem_5
         getitem_5 = None
-        output_3 = output_2.masked_fill(invert_1, -inf)
+        output_3 = output_2.masked_fill(invert_1, -1e6)
         output_2 = invert_1 = None
         new_output_1 = torch.full(
-            (1, 900, 256), -inf, device=device(type="cuda", index=0)
+            (1, 900, 256), -1e6, device=device(type="cuda", index=0)
         )
         new_output_1[(Ellipsis, slice(None, 7, None))] = output_3
         setitem_1 = new_output_1
@@ -144,10 +144,10 @@ def forward(
         bool_3 = None
         invert_2 = ~getitem_9
         getitem_9 = None
-        output_5 = output_4.masked_fill(invert_2, -inf)
+        output_5 = output_4.masked_fill(invert_2, -1e6)
         output_4 = invert_2 = None
         new_output_2 = torch.full(
-            (1, 900, 256), -inf, device=device(type="cuda", index=0)
+            (1, 900, 256), -1e6, device=device(type="cuda", index=0)
         )
         new_output_2[(Ellipsis, slice(None, 7, None))] = output_5
         setitem_2 = new_output_2
@@ -193,10 +193,10 @@ def forward(
         bool_4 = None
         invert_3 = ~getitem_13
         getitem_13 = None
-        output_7 = output_6.masked_fill(invert_3, -inf)
+        output_7 = output_6.masked_fill(invert_3, -1e6)
         output_6 = invert_3 = None
         new_output_3 = torch.full(
-            (1, 900, 256), -inf, device=device(type="cuda", index=0)
+            (1, 900, 256), -1e6, device=device(type="cuda", index=0)
         )
         new_output_3[(Ellipsis, slice(None, 7, None))] = output_7
         setitem_3 = new_output_3
@@ -242,10 +242,10 @@ def forward(
         bool_5 = None
         invert_4 = ~getitem_17
         getitem_17 = None
-        output_9 = output_8.masked_fill(invert_4, -inf)
+        output_9 = output_8.masked_fill(invert_4, -1e6)
         output_8 = invert_4 = None
         new_output_4 = torch.full(
-            (1, 900, 256), -inf, device=device(type="cuda", index=0)
+            (1, 900, 256), -1e6, device=device(type="cuda", index=0)
         )
         new_output_4[(Ellipsis, slice(None, 7, None))] = output_9
         setitem_4 = new_output_4
@@ -294,10 +294,10 @@ def forward(
         bool_6 = None
         invert_5 = ~getitem_21
         getitem_21 = None
-        output_11 = output_10.masked_fill(invert_5, -inf)
+        output_11 = output_10.masked_fill(invert_5, -1e6)
         output_10 = invert_5 = None
         new_output_5 = torch.full(
-            (1, 900, 256), -inf, device=device(type="cuda", index=0)
+            (1, 900, 256), -1e6, device=device(type="cuda", index=0)
         )
         new_output_5[(Ellipsis, slice(None, 7, None))] = output_11
         setitem_5 = new_output_5
diff --git a/samples/transformers-auto-model/IDEA-Research_grounding-dino-base/weight_meta.py b/samples/transformers-auto-model/IDEA-Research_grounding-dino-base/weight_meta.py
@@ -6,6 +6,8 @@ class Program_weight_tensor_meta_L_stack0_encoder_last_hidden_state_text:
     mean = 0.000
     std = 1.000
     data = None
+    min_val = -10.0
+    max_val = 10.0
 
 
 class Program_weight_tensor_meta_L_stack0_intermediate_hidden_states:
@@ -16,6 +18,8 @@ class Program_weight_tensor_meta_L_stack0_intermediate_hidden_states:
     mean = 0.000
     std = 1.000
     data = None
+    min_val = -10.0
+    max_val = 10.0
 
 
 class Program_weight_tensor_meta_L_stack0_init_reference_points:
@@ -60,6 +64,8 @@ class Program_weight_tensor_meta_L_self_modules_bbox_embed_modules_0_modules_lay
     mean = -0.000
     std = 0.020
     data = None
+    min_val = -1.0
+    max_val = 1.0
 
 
 class Program_weight_tensor_meta_L_self_modules_bbox_embed_modules_0_modules_layers_modules_0_parameters_bias_:
@@ -72,6 +78,8 @@ class Program_weight_tensor_meta_L_self_modules_bbox_embed_modules_0_modules_lay
     mean = 0.000
     std = 0.000
     data = None
+    min_val = -1.0
+    max_val = 1.0
 
 
 class Program_weight_tensor_meta_L_self_modules_bbox_embed_modules_0_modules_layers_modules_1_parameters_weight_:
@@ -82,6 +90,8 @@ class Program_weight_tensor_meta_L_self_modules_bbox_embed_modules_0_modules_lay
     mean = 0.000
     std = 0.020
     data = None
+    min_val = -1.0
+    max_val = 1.0
 
 
 class Program_weight_tensor_meta_L_self_modules_bbox_embed_modules_0_modules_layers_modules_1_parameters_bias_:
@@ -94,6 +104,8 @@ class Program_weight_tensor_meta_L_self_modules_bbox_embed_modules_0_modules_lay
     mean = 0.000
     std = 0.000
     data = None
+    min_val = -1.0
+    max_val = 1.0
 
 
 class Program_weight_tensor_meta_L_self_modules_bbox_embed_modules_0_modules_layers_modules_2_parameters_weight_:
@@ -104,6 +116,8 @@ class Program_weight_tensor_meta_L_self_modules_bbox_embed_modules_0_modules_lay
     mean = 0.000
     std = 0.000
     data = None
+    min_val = -1.0
+    max_val = 1.0
 
 
 class Program_weight_tensor_meta_L_self_modules_bbox_embed_modules_0_modules_layers_modules_2_parameters_bias_: