pytorch
diff --git a/‎_downloads/1ac8a049de0513cb49a0e834e4c27a20/regional_compilation.py‎
Lines changed: 27 additions & 19 deletions b/‎_downloads/1ac8a049de0513cb49a0e834e4c27a20/regional_compilation.py‎
Lines changed: 27 additions & 19 deletions
diff --git a/‎_downloads/3195443a0ced3cabc0ad643537bdb5cd/introyt1_tutorial.ipynb‎
Lines changed: 2 additions & 2 deletions b/‎_downloads/3195443a0ced3cabc0ad643537bdb5cd/introyt1_tutorial.ipynb‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎_downloads/4355e2cef7d17548f1e25f97a62828c4/template_tutorial.ipynb‎
Lines changed: 2 additions & 2 deletions b/‎_downloads/4355e2cef7d17548f1e25f97a62828c4/template_tutorial.ipynb‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎_downloads/63a0f0fc7b3ffb15d3a5ac8db3d521ee/tensors_deeper_tutorial.ipynb‎
Lines changed: 2 additions & 2 deletions b/‎_downloads/63a0f0fc7b3ffb15d3a5ac8db3d521ee/tensors_deeper_tutorial.ipynb‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎_downloads/770632dd3941d2a51b831c52ded57aa2/trainingyt.ipynb‎
Lines changed: 2 additions & 2 deletions b/‎_downloads/770632dd3941d2a51b831c52ded57aa2/trainingyt.ipynb‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎_downloads/c28f42852d456daf9af72da6c6909556/captumyt.ipynb‎
Lines changed: 2 additions & 2 deletions b/‎_downloads/c28f42852d456daf9af72da6c6909556/captumyt.ipynb‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎_downloads/cbd5804c4553cb4a23dc24137bde6077/regional_compilation.ipynb‎
Lines changed: 20 additions & 4 deletions b/‎_downloads/cbd5804c4553cb4a23dc24137bde6077/regional_compilation.ipynb‎
Lines changed: 20 additions & 4 deletions
diff --git a/‎_downloads/e2e556f6b4693c2cef716dd7f40caaf6/tensorboardyt_tutorial.ipynb‎
Lines changed: 2 additions & 2 deletions b/‎_downloads/e2e556f6b4693c2cef716dd7f40caaf6/tensorboardyt_tutorial.ipynb‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎_downloads/ed9d4f94afb79f7dada6742a06c486a5/autogradyt_tutorial.ipynb‎
Lines changed: 2 additions & 2 deletions b/‎_downloads/ed9d4f94afb79f7dada6742a06c486a5/autogradyt_tutorial.ipynb‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎_downloads/fe726e041160526cf828806536922cf6/modelsyt_tutorial.ipynb‎
Lines changed: 2 additions & 2 deletions b/‎_downloads/fe726e041160526cf828806536922cf6/modelsyt_tutorial.ipynb‎
Lines changed: 2 additions & 2 deletions
@@ -30,37 +30,37 @@
    to prevent recompilations during regional compilation. In version 2.5, this flag is enabled by default.
 """
 
-
+from time import perf_counter
 
 ######################################################################
 # Steps
 # -----
-# 
+#
 # In this recipe, we will follow these steps:
 #
 # 1. Import all necessary libraries.
 # 2. Define and initialize a neural network with repeated regions.
 # 3. Understand the difference between the full model and the regional compilation.
 # 4. Measure the compilation time of the full model and the regional compilation.
-# 
-# First, let's import the necessary libraries for loading our data: 
-# 
-# 
-# 
+#
+# First, let's import the necessary libraries for loading our data:
+#
+#
+#
 
 import torch
 import torch.nn as nn
-from time import perf_counter
+
 
 ##########################################################
 # Next, let's define and initialize a neural network with repeated regions.
-# 
+#
 # Typically, neural networks are composed of repeated layers. For example, a
 # large language model is composed of many Transformer blocks. In this recipe,
 # we will create a ``Layer`` using the ``nn.Module`` class as a proxy for a repeated region.
 # We will then create a ``Model`` which is composed of 64 instances of this
 # ``Layer`` class.
-# 
+#
 class Layer(torch.nn.Module):
     def __init__(self):
         super().__init__()
@@ -77,13 +77,16 @@ def forward(self, x):
         b = self.relu2(b)
         return b
 
+
 class Model(torch.nn.Module):
     def __init__(self, apply_regional_compilation):
         super().__init__()
         self.linear = torch.nn.Linear(10, 10)
         # Apply compile only to the repeated layers.
         if apply_regional_compilation:
-            self.layers = torch.nn.ModuleList([torch.compile(Layer()) for _ in range(64)])
+            self.layers = torch.nn.ModuleList(
+                [torch.compile(Layer()) for _ in range(64)]
+            )
         else:
             self.layers = torch.nn.ModuleList([Layer() for _ in range(64)])
 
@@ -94,15 +97,16 @@ def forward(self, x):
             x = layer(x)
         return x
 
+
 ####################################################
 # Next, let's review the difference between the full model and the regional compilation.
-# 
-# In full model compilation, the entire model is compiled as a whole. This is the common approach 
+#
+# In full model compilation, the entire model is compiled as a whole. This is the common approach
 # most users take with ``torch.compile``. In this example, we apply ``torch.compile`` to
 # the ``Model`` object. This will effectively inline the 64 layers, producing a
 # large graph to compile. You can look at the full graph by running this recipe
 # with ``TORCH_LOGS=graph_code``.
-# 
+#
 #
 
 model = Model(apply_regional_compilation=False).cuda()
@@ -114,19 +118,19 @@ def forward(self, x):
 # By strategically choosing to compile a repeated region of the model, we can compile a
 # much smaller graph and then reuse the compiled graph for all the regions.
 # In the example, ``torch.compile`` is applied only to the ``layers`` and not the full model.
-# 
+#
 
 regional_compiled_model = Model(apply_regional_compilation=True).cuda()
 
 #####################################################
 # Applying compilation to a repeated region, instead of full model, leads to
 # large savings in compile time. Here, we will just compile a layer instance and
 # then reuse it 64 times in the ``Model`` object.
-# 
+#
 # Note that with repeated regions, some part of the model might not be compiled.
 # For example, the ``self.linear`` in the ``Model`` is outside of the scope of
 # regional compilation.
-# 
+#
 # Also, note that there is a tradeoff between performance speedup and compile
 # time. Full model compilation involves a larger graph and,
 # theoretically, offers more scope for optimizations. However, for practical
@@ -138,10 +142,11 @@ def forward(self, x):
 # Next, let's measure the compilation time of the full model and the regional compilation.
 #
 # ``torch.compile`` is a JIT compiler, which means that it compiles on the first invocation.
-# In the code below, we measure the total time spent in the first invocation. While this method is not 
+# In the code below, we measure the total time spent in the first invocation. While this method is not
 # precise, it provides a good estimate since the majority of the time is spent in
 # compilation.
 
+
 def measure_latency(fn, input):
     # Reset the compiler caches to ensure no reuse between different runs
     torch.compiler.reset()
@@ -152,13 +157,16 @@ def measure_latency(fn, input):
         end = perf_counter()
         return end - start
 
+
 input = torch.randn(10, 10, device="cuda")
 full_model_compilation_latency = measure_latency(full_compiled_model, input)
 print(f"Full model compilation time = {full_model_compilation_latency:.2f} seconds")
 
 regional_compilation_latency = measure_latency(regional_compiled_model, input)
 print(f"Regional compilation time = {regional_compilation_latency:.2f} seconds")
 
+assert regional_compilation_latency < full_model_compilation_latency
+
 ############################################################################
 # Conclusion
 # -----------
@@ -167,4 +175,4 @@ def measure_latency(fn, input):
 # has repeated regions. This approach requires user modifications to apply `torch.compile` to
 # the repeated regions instead of more commonly used full model compilation. We
 # are continually working on reducing cold start compilation time.
-# 
+#
@@ -34,7 +34,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "4dac0422",
+   "id": "6d7882b6",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -50,7 +50,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "d9d3c3d4",
+   "id": "12d9f983",
    "metadata": {},
    "source": [
     "\n",
 
@@ -31,7 +31,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "335f8dc2",
+   "id": "cf54a5d8",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -47,7 +47,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "88467d9a",
+   "id": "aea90132",
    "metadata": {},
    "source": [
     "\n",
 
@@ -34,7 +34,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "dafe94d5",
+   "id": "3070dd14",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -50,7 +50,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "88fe8d2c",
+   "id": "cd002842",
    "metadata": {},
    "source": [
     "\n",
 
@@ -35,7 +35,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "1df017eb",
+   "id": "908900d2",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -51,7 +51,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "578bf3ae",
+   "id": "95b9be6b",
    "metadata": {},
    "source": [
     "\n",
 
@@ -37,7 +37,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "e7fbf414",
+   "id": "a040408b",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -53,7 +53,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "3d96f2c9",
+   "id": "8718e844",
    "metadata": {},
    "source": [
     "\n",
 
@@ -50,6 +50,17 @@
     "</div>\n"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "from time import perf_counter"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -78,8 +89,7 @@
    "outputs": [],
    "source": [
     "import torch\n",
-    "import torch.nn as nn\n",
-    "from time import perf_counter"
+    "import torch.nn as nn"
    ]
   },
   {
@@ -120,13 +130,16 @@
     "        b = self.relu2(b)\n",
     "        return b\n",
     "\n",
+    "\n",
     "class Model(torch.nn.Module):\n",
     "    def __init__(self, apply_regional_compilation):\n",
     "        super().__init__()\n",
     "        self.linear = torch.nn.Linear(10, 10)\n",
     "        # Apply compile only to the repeated layers.\n",
     "        if apply_regional_compilation:\n",
-    "            self.layers = torch.nn.ModuleList([torch.compile(Layer()) for _ in range(64)])\n",
+    "            self.layers = torch.nn.ModuleList(\n",
+    "                [torch.compile(Layer()) for _ in range(64)]\n",
+    "            )\n",
     "        else:\n",
     "            self.layers = torch.nn.ModuleList([Layer() for _ in range(64)])\n",
     "\n",
@@ -238,12 +251,15 @@
     "        end = perf_counter()\n",
     "        return end - start\n",
     "\n",
+    "\n",
     "input = torch.randn(10, 10, device=\"cuda\")\n",
     "full_model_compilation_latency = measure_latency(full_compiled_model, input)\n",
     "print(f\"Full model compilation time = {full_model_compilation_latency:.2f} seconds\")\n",
     "\n",
     "regional_compilation_latency = measure_latency(regional_compiled_model, input)\n",
-    "print(f\"Regional compilation time = {regional_compilation_latency:.2f} seconds\")"
+    "print(f\"Regional compilation time = {regional_compilation_latency:.2f} seconds\")\n",
+    "\n",
+    "assert regional_compilation_latency < full_model_compilation_latency"
    ]
   },
   {
 
@@ -35,7 +35,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "cf3c8e39",
+   "id": "91ec9983",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -51,7 +51,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "a26c0941",
+   "id": "d4edb552",
    "metadata": {},
    "source": [
     "\n",
 
@@ -34,7 +34,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "6a4d7c89",
+   "id": "73b180de",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -50,7 +50,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "99be565a",
+   "id": "f9ecb929",
    "metadata": {},
    "source": [
     "\n",
 
@@ -34,7 +34,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "3d8b7f84",
+   "id": "d7244383",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -50,7 +50,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "3c95a1fd",
+   "id": "2201f142",
    "metadata": {},
    "source": [
     "\n",