Skip to content

Commit d40cfc5

Browse files
authored
Revert "Disable fp64 patch (#3663)" (#4494)
Signed-off-by: Anatoly Myachev <[email protected]>
1 parent 8a3efe7 commit d40cfc5

File tree

2 files changed

+70
-2
lines changed

2 files changed

+70
-2
lines changed

scripts/patch-pytorch.sh

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,14 +25,15 @@ apply_patch() {
2525
echo "Applying patch $1"
2626
cd "$REPO_ROOT"
2727
if [[ -f $SCRIPTS_DIR/$1 ]]; then
28-
git apply "$SCRIPTS_DIR/$1"
28+
git apply --3way "$SCRIPTS_DIR/$1"
2929
else
3030
fetch_patch "$1"
31-
git apply "$SCRIPTS_DIR/$(basename "$1")"
31+
git apply --3way "$SCRIPTS_DIR/$(basename "$1")"
3232
fi
3333
}
3434

3535
echo "Applying PyTorch patches in $REPO_ROOT"
3636

3737
# put your patch applies here
3838
apply_patch https://github.com/pytorch/pytorch/pull/143553.diff
39+
apply_patch pytorch_fp64.patch

scripts/pytorch_fp64.patch

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
diff --git a/benchmarks/dynamo/common.py b/benchmarks/dynamo/common.py
2+
index b3a17f4f669..b06c0e3bdcc 100644
3+
--- a/benchmarks/dynamo/common.py
4+
+++ b/benchmarks/dynamo/common.py
5+
@@ -2457,6 +2457,12 @@ def cast_to_fp32(model, inputs):
6+
return cast_to(torch.float32, model, inputs)
7+
8+
9+
+def cast_to_device(device, model, inputs):
10+
+ model = model.to(device=device)
11+
+ inputs = tree_map_only(torch.Tensor, lambda x: x.to(device=device), inputs)
12+
+ return model, inputs
13+
+
14+
+
15+
class DummyGradScaler:
16+
def scale(self, loss):
17+
return loss
18+
@@ -2948,10 +2954,24 @@ class BenchmarkRunner:
19+
model_fp64 = None
20+
inputs_fp64 = None
21+
try:
22+
- model_fp64, inputs_fp64 = cast_to_fp64(
23+
- self.deepcopy_and_maybe_parallelize(model),
24+
- clone_inputs(example_inputs),
25+
- )
26+
+ # Currently, XPU GEMM FP64 support is WIP. Therefore, we explicitly fallback to
27+
+ # CPU to execute FP64 and take the result as the gold reference.
28+
+ if current_device == "xpu":
29+
+ model_fp64, inputs_fp64 = cast_to_fp64(
30+
+ *cast_to_device(
31+
+ "cpu",
32+
+ self.deepcopy_and_maybe_parallelize(model),
33+
+ clone_inputs(example_inputs),
34+
+ )
35+
+ )
36+
+ else:
37+
+ model_fp64, inputs_fp64 = cast_to_fp64(
38+
+ self.deepcopy_and_maybe_parallelize(model),
39+
+ clone_inputs(example_inputs),
40+
+ )
41+
+
42+
+ # current_device of init_optimizer only impacts which optimizer will be applied. It does
43+
+ # not change any tensor internally. Hence, we leave as it is rather than passing cpu.
44+
self.init_optimizer(name, current_device, model_fp64.parameters())
45+
fp64_outputs = self.run_n_iterations(
46+
model_fp64, inputs_fp64, self.model_iter_fn
47+
@@ -2962,11 +2982,19 @@ class BenchmarkRunner:
48+
else x,
49+
fp64_outputs,
50+
)
51+
- except Exception:
52+
+ if current_device == "xpu":
53+
+ fp64_outputs = tree_map_only(
54+
+ torch.Tensor,
55+
+ lambda x: x.to(device=current_device),
56+
+ fp64_outputs,
57+
+ )
58+
+ except Exception as e:
59+
log.warning(
60+
"fp64 golden ref were not generated for %s. Setting accuracy check to cosine",
61+
name,
62+
)
63+
+ error_msg = f"current_device={current_device}; error:{str(e)}"
64+
+ log.warning(error_msg)
65+
self.args.cosine = True
66+
fp64_outputs = None
67+
finally:

0 commit comments

Comments
 (0)