diff --git a/backends/apple/coreml/README.md b/backends/apple/coreml/README.md
index 05b56e9c788..b3b22ed9997 100644
--- a/backends/apple/coreml/README.md
+++ b/backends/apple/coreml/README.md
@@ -65,7 +65,7 @@ To quantize a Program in a Core ML favored way, the client may utilize **CoreMLQ
 import torch
 import executorch.exir
 
-from torch._export import capture_pre_autograd_graph
+from torch.export import export_for_training
 from torch.ao.quantization.quantize_pt2e import (
     convert_pt2e,
     prepare_pt2e,
@@ -93,7 +93,7 @@ class Model(torch.nn.Module):
 source_model = Model()
 example_inputs = (torch.randn((1, 3, 256, 256)), )
 
-pre_autograd_aten_dialect = capture_pre_autograd_graph(model, example_inputs)
+pre_autograd_aten_dialect = export_for_training(model, example_inputs).module()
 
 quantization_config = LinearQuantizerConfig.from_dict(
     {
diff --git a/docs/source/llm/getting-started.md b/docs/source/llm/getting-started.md
index 44e7a7a33b6..9419a92fd6e 100644
--- a/docs/source/llm/getting-started.md
+++ b/docs/source/llm/getting-started.md
@@ -144,8 +144,7 @@ import torch
 
 from executorch.exir import EdgeCompileConfig, to_edge
 from torch.nn.attention import sdpa_kernel, SDPBackend
-from torch._export import capture_pre_autograd_graph
-from torch.export import export
+from torch.export import export, export_for_training
 
 from model import GPT
 
@@ -170,7 +169,7 @@ dynamic_shape = (
 # Trace the model, converting it to a portable intermediate representation.
 # The torch.no_grad() call tells PyTorch to exclude training-specific logic.
 with torch.nn.attention.sdpa_kernel([SDPBackend.MATH]), torch.no_grad():
-    m = capture_pre_autograd_graph(model, example_inputs, dynamic_shapes=dynamic_shape)
+    m = export_for_training(model, example_inputs, dynamic_shapes=dynamic_shape).module()
     traced_model = export(m, example_inputs, dynamic_shapes=dynamic_shape)
 
 # Convert the model into a runnable ExecuTorch program.
@@ -462,7 +461,7 @@ from executorch.exir import EdgeCompileConfig, to_edge
 import torch
 from torch.export import export
 from torch.nn.attention import sdpa_kernel, SDPBackend
-from torch._export import capture_pre_autograd_graph
+from torch.export import export_for_training
 
 from model import GPT
 
@@ -489,7 +488,7 @@ dynamic_shape = (
 # Trace the model, converting it to a portable intermediate representation.
 # The torch.no_grad() call tells PyTorch to exclude training-specific logic.
 with torch.nn.attention.sdpa_kernel([SDPBackend.MATH]), torch.no_grad():
-    m = capture_pre_autograd_graph(model, example_inputs, dynamic_shapes=dynamic_shape)
+    m = export_for_training(model, example_inputs, dynamic_shapes=dynamic_shape).module()
     traced_model = export(m, example_inputs, dynamic_shapes=dynamic_shape)
 
 # Convert the model into a runnable ExecuTorch program.
@@ -635,7 +634,7 @@ xnnpack_quant_config = get_symmetric_quantization_config(
 xnnpack_quantizer = XNNPACKQuantizer()
 xnnpack_quantizer.set_global(xnnpack_quant_config)
 
-m = capture_pre_autograd_graph(model, example_inputs)
+m = export_for_training(model, example_inputs).module()
 
 # Annotate the model for quantization. This prepares the model for calibration.
 m = prepare_pt2e(m, xnnpack_quantizer)