Update per_sample_grads.py

sekyondaMeta · web-flow · commit 5fc349e20c54 · 2025-07-16T15:55:36.000-04:00
diff --git a/intermediate_source/per_sample_grads.py b/intermediate_source/per_sample_grads.py
@@ -168,22 +168,16 @@ def compute_loss(params, buffers, sample, target):
 # we can double check that the results using ``grad`` and ``vmap`` match the
 # results of hand processing each one individually:
 
-for name, ft_per_sample_grad in ft_per_sample_grads.items():
-    # Find the corresponding manually computed gradient.
-    idx = list(model.named_parameters()).index((name, model.get_parameter(name)))
-    per_sample_grad = per_sample_grads[idx]
-
-    # Check if shapes match
-    if per_sample_grad.shape != ft_per_sample_grad.shape:
-        print(f"Shape mismatch for {name}: {per_sample_grad.shape} vs {ft_per_sample_grad.shape}")
-        # Reshape if needed (sometimes functional API returns different shape)
-        if per_sample_grad.numel() == ft_per_sample_grad.numel():
-            ft_per_sample_grad = ft_per_sample_grad.view(per_sample_grad.shape)
-
-    # Use a higher tolerance for comparison
-    assert torch.allclose(per_sample_grad, ft_per_sample_grad, atol=2e-2, rtol=2e-2), \
-        f"Mismatch in {name}: max diff {(per_sample_grad - ft_per_sample_grad).abs().max().item()}"
-
+# Get the parameter names in the same order as per_sample_grads
+param_names = list(params.keys())
+
+# Compare gradients for each parameter
+for i, name in enumerate(param_names):
+    per_sample_grad = per_sample_grads[i]
+    ft_per_sample_grad = ft_per_sample_grads[name]
+    
+    assert torch.allclose(per_sample_grad, ft_per_sample_grad, atol=3e-3, rtol=1e-5), \
+        f"Gradients don't match for {name}: max diff = {(per_sample_grad - ft_per_sample_grad).abs().max()}"
 
 ######################################################################
 # A quick note: there are limitations around what types of functions can be