modal-labs · rchalamala · Feb 20, 2026 · Feb 20, 2026 · devin-ai-integration · Feb 20, 2026
diff --git a/06_gpu_and_ml/dreambooth/diffusers_lora_finetune.py b/06_gpu_and_ml/dreambooth/diffusers_lora_finetune.py
@@ -532,7 +532,7 @@ async def background():
 # Remember, once you've trained your own fine-tuned model, you can deploy it permanently -- for no cost when it is not being used! --
 # using `modal deploy diffusers_lora_finetune.py`.
 
-# If you just want to try the app out, you can find our deployment [here](https://modal-labs--example-diffusers-lora-finetune-fastapi-app.modal.run).
+# If you just want to try the app out, deploy it with `modal deploy diffusers_lora_finetune.py` and open the URL it prints.
 
 
 @app.local_entrypoint()

diff --git a/06_gpu_and_ml/llm-serving/ministral3_inference.py b/06_gpu_and_ml/llm-serving/ministral3_inference.py
@@ -7,7 +7,7 @@
 
 # In this example, we show how to serve Mistral's Ministral 3 vision-language models on Modal.
 
-# The [Ministral 3](https://huggingface.co/collections/mistralai/ministral-3-more) model series
+# The [Ministral 3](https://huggingface.co/mistralai/Ministral-3-8B-Instruct-2512) model series
 # performs competitively with the Qwen 3-VL model series on benchmarks
 # (see model cards for details).
 
@@ -55,7 +55,7 @@
 
 vllm_image = vllm_image.env({"HF_XET_HIGH_PERFORMANCE": "1"})
 
-# The [Ministral 3 model series](https://huggingface.co/collections/mistralai/ministral-3-more)
+# The [Ministral 3 model series](https://huggingface.co/mistralai/Ministral-3-8B-Instruct-2512)
 # contains a variety of models:
 
 # - 3B, 8B, and 14B sizes
@@ -70,7 +70,7 @@
 
 # Native hardware support for FP8 formats in [Tensor Cores](https://modal.com/gpu-glossary/device-hardware/tensor-core)
 # is limited to the latest [Streaming Multiprocessor architectures](https://modal.com/gpu-glossary/device-hardware/streaming-multiprocessor-architecture),
-# like those of Modal's [Hopper H100/H200 and Blackwell B200 GPUs](https://modal.com/blog/announcing-h200-b200).
+# like those of Modal's [Hopper H100/H200 and Blackwell B200 GPUs](https://modal.com/blog/introducing-b200-h200).
 
 # At 80 GB VRAM, a single H100 GPU has enough space to store the 8B FP8 model weights (~8 GB)
 # and a very large KV cache. A single H100 is also enough to serve the 14B model in full precision,

diff --git a/06_gpu_and_ml/llm-serving/trtllm_latency.py b/06_gpu_and_ml/llm-serving/trtllm_latency.py
@@ -40,7 +40,7 @@
 # to optimize the engine for low latency.
 
 # Be sure to check out TensorRT-LLM's
-# [examples](https://nvidia.github.io/TensorRT-LLM/llm-api-examples)
+# [examples](https://nvidia.github.io/TensorRT-LLM/llm-api/)
 # for sample code beyond what we cover here, like low-rank adapters (LoRAs).
 
 # ### What is a TRT-LLM engine?

diff --git a/06_gpu_and_ml/llm-serving/very_large_models.py b/06_gpu_and_ml/llm-serving/very_large_models.py
@@ -277,7 +277,7 @@ def _start_server() -> subprocess.Popen:
 app = modal.App("example-serve-very-large-models", image=image)
 
 # Most importantly, we need to decide what hardware to run on.
-# [H200 and B200 GPUs](https://modal.com/blog/introducting-b200-h200)
+# [H200 and B200 GPUs](https://modal.com/blog/introducing-b200-h200)
 # have over 100 GB of [GPU RAM](https://modal.com/gpu-glossary/device-hardware/gpu-ram) --
 # 141 GB and 180 GB, respectively.
 # The model's weights will be stored in this memory,

diff --git a/06_gpu_and_ml/llm-serving/vllm_inference.py b/06_gpu_and_ml/llm-serving/vllm_inference.py
@@ -52,7 +52,7 @@
 # We'll use an FP8 (eight-bit floating-point) post-training-quantized variant: `Qwen/Qwen3-4B-Thinking-2507-FP8`.
 # Native hardware support for FP8 formats in [Tensor Cores](https://modal.com/gpu-glossary/device-hardware/tensor-core)
 # is limited to the latest [Streaming Multiprocessor architectures](https://modal.com/gpu-glossary/device-hardware/streaming-multiprocessor-architecture),
-# like those of Modal's [Hopper H100/H200 and Blackwell B200 GPUs](https://modal.com/blog/announcing-h200-b200).
+# like those of Modal's [Hopper H100/H200 and Blackwell B200 GPUs](https://modal.com/blog/introducing-b200-h200).
 
 # You can swap this model out for another by changing the strings below.
 # A single H100 GPU has enough VRAM to store a 4,000,000,000 parameter model,

diff --git a/06_gpu_and_ml/reinforcement-learning/learn_math.py b/06_gpu_and_ml/reinforcement-learning/learn_math.py
@@ -4,8 +4,8 @@
 
 # # Training a mathematical reasoning model using the verifiers library with sandboxed code execution
 
-# This example demonstrates how to train mathematical reasoning models on Modal using the [verifiers library](https://github.com/willccbb/verifiers) with [Modal Sandboxes](https://modal.com/docs/guide/sandbox) for executing generated code.
-# The [verifiers library](https://github.com/willccbb/verifiers) is a set of tools and abstractions for training LLMs with reinforcement learning in verifiable multi-turn environments via [GRPO](https://arxiv.org/abs/2402.03300).
+# This example demonstrates how to train mathematical reasoning models on Modal using the [verifiers library](https://github.com/PrimeIntellect-ai/verifiers) with [Modal Sandboxes](https://modal.com/docs/guide/sandbox) for executing generated code.
+# The [verifiers library](https://github.com/PrimeIntellect-ai/verifiers) is a set of tools and abstractions for training LLMs with reinforcement learning in verifiable multi-turn environments via [GRPO](https://arxiv.org/abs/2402.03300).
 
 # This example demonstrates how to:
 # - Launch a distributed GRPO training job on Modal with 4× H100 GPUs.
@@ -15,7 +15,7 @@
 
 # ## Setup
 # We start by importing modal and the dependencies from the verifiers library. Then, we create a Modal App and an image with a NVIDIA CUDA base image.
-# We install the dependencies for the `verifiers` and `flash-attn` libraries, following the verifiers [README](https://github.com/willccbb/verifiers?tab=readme-ov-file#getting-started).
+# We install the dependencies for the `verifiers` and `flash-attn` libraries, following the verifiers [README](https://github.com/PrimeIntellect-ai/verifiers?tab=readme-ov-file#getting-started).
 
 import modal
 
@@ -80,8 +80,8 @@
 """
 
 # ## Training
-# Following the [verifiers example](https://github.com/willccbb/verifiers/blob/main/verifiers/examples/math_python.py), we will need a training script and a config file.
-# For sandboxed code execution, we will use [this training script](/docs/examples/trainer_script_grpo) and the config file defined [here](https://github.com/willccbb/verifiers/blob/main/configs/zero3.yaml).
+# Following the [verifiers example](https://github.com/PrimeIntellect-ai/verifiers), we will need a training script and a config file.
-# Following the [verifiers example](https://github.com/PrimeIntellect-ai/verifiers), we will need a training script and a config file.
+# Following the [verifiers example](https://github.com/PrimeIntellect-ai/verifiers/blob/main/verifiers/examples/math_python.py), we will need a training script and a config file.
-# Following the [verifiers example](https://github.com/PrimeIntellect-ai/verifiers), we will need a training script and a config file.
+# Following the [verifiers example](https://github.com/PrimeIntellect-ai/verifiers/blob/main/verifiers/examples/math_python.py), we will need a training script and a config file.
+# For sandboxed code execution, we will use [this training script](/docs/examples/trainer_script_grpo) and the config file defined [here](https://github.com/PrimeIntellect-ai/verifiers/blob/main/configs/zero3.yaml).
 
 # We create a function that uses 4 H100 GPUs and mounts the defined Volumes. Then, we write the training script and the config file to the `/root/` directory.
 # We use the `willcb/Qwen3-0.6B` model from HuggingFace, setting up inference via a vLLM server. Once, the model is served, we will launch the training script using `accelerate`.

diff --git a/06_gpu_and_ml/reinforcement-learning/trainer_script_grpo.py b/06_gpu_and_ml/reinforcement-learning/trainer_script_grpo.py
@@ -5,7 +5,7 @@
 
 # # Training script for training a reasoning model using the verifiers library with sandboxed code execution
 
-# This script is used to train a model using GRPO. This is adapted from the [verifiers library](https://github.com/willccbb/verifiers/blob/main/verifiers/examples/math_python.py) example.
+# This script is used to train a model using GRPO. This is adapted from the [verifiers library](https://github.com/PrimeIntellect-ai/verifiers) example.
 # Here, we use a Modal Sandbox to execute python code during training. Modal Sandboxes offer an easy way to execute untrusted code in a completely isolated environment.
 # This is a more secure way to execute python code during training.
 
@@ -83,7 +83,7 @@ def sandbox_exec(code):
 run_name = "math-grpo_" + model_name.split("/")[-1].lower()
 
 # These parameters are adapted to test the training script via an overfitting test. We will use 128 examples from the training set and overfit the model to them.
-# To learn more about the parameters, please refer to the [verifiers library](https://github.com/willccbb/verifiers/blob/main/verifiers/examples/math_python.py) example.
+# To learn more about the parameters, please refer to the [verifiers library](https://github.com/PrimeIntellect-ai/verifiers) example.
 
 training_args = vf.grpo_defaults(run_name=run_name)
 training_args.num_iterations = 50

diff --git a/06_gpu_and_ml/speech-to-text/sortformer2_1_speaker_diarization.py b/06_gpu_and_ml/speech-to-text/sortformer2_1_speaker_diarization.py
@@ -74,7 +74,7 @@
 
 # ## Using WebSockets to stream audio and diarization results
 
-# We use a Modal [ASGI](https://modal.com/docs/guide/asgi) app to serve the diarization results
+# We use a Modal [ASGI](https://modal.com/docs/guide/webhooks) app to serve the diarization results
 # over WebSockets. This allows us to stream the diarization results to the client in real-time.
 
 # We use a simple queue-based architecture to handle the audio and diarization results.

diff --git a/07_web_endpoints/fasthtml-checkboxes/fasthtml_checkboxes.py b/07_web_endpoints/fasthtml-checkboxes/fasthtml_checkboxes.py
@@ -6,7 +6,7 @@
 
 # # Deploy 100,000 multiplayer checkboxes on Modal with FastHTML
 
-# [![Screenshot of FastHTML Checkboxes UI](./ui.png)](https://modal-labs-examples--example-fasthtml-checkboxes-web.modal.run)
+# ![Screenshot of FastHTML Checkboxes UI](./ui.png)
 
 # This example shows how you can deploy a multiplayer checkbox game with FastHTML on Modal.
 

diff --git a/10_integrations/algolia_indexer.py b/10_integrations/algolia_indexer.py
@@ -46,7 +46,7 @@
         "synonyms": [["cls", "class"]],
     },
     "stop_urls": [
-        "https://modal.com/docs/reference/modal.Stub",
+        "https://modal.com/docs/reference/modal.App",
         "https://modal.com/gpu-glossary",
         "https://modal.com/docs/reference/changelog",
     ],
@@ -156,7 +156,7 @@ def crawl_webhook():
 
 # The indexed contents can be found at https://www.algolia.com/apps/APP_ID/explorer/browse/, for your
 # APP_ID. Once you're happy with the results, you can [set up the `docsearch` package with your
-# website](https://docsearch.algolia.com/docs/docsearch-v3/), and create a search component that uses this index.
+# website](https://docsearch.algolia.com/docs/legacy/run-your-own/), and create a search component that uses this index.
 
 # ## Entrypoint for development
 

diff --git a/10_integrations/cloud_bucket_mount_loras.py b/10_integrations/cloud_bucket_mount_loras.py
@@ -16,7 +16,7 @@
 
 # By the end of this example, we've deployed a "playground" app where anyone with a browser can try
 # out these custom models. That's the power of Modal: custom, autoscaling AI applications, deployed in seconds.
-# You can try out our deployment [here](https://modal-labs-examples--example-cloud-bucket-mount-loras-ui.modal.run).
+# You can try it out by deploying with `modal deploy cloud_bucket_mount_loras.py` and opening the URL it prints.
 
 # ## Basic setup
 

diff --git a/10_integrations/cron_datasette.py b/10_integrations/cron_datasette.py
@@ -9,7 +9,7 @@
 # Build and deploy an interactive movie database that automatically updates daily with the latest IMDb data.
 # This example shows how to serve a Datasette application on Modal with millions of movie and TV show records.
 
-# Try it out for yourself [here](https://modal-labs-examples--example-cron-datasette-ui.modal.run).
+# To try it out, deploy with `modal deploy cron_datasette.py` and open the URL it prints.
 
 # Along the way, we will learn how to use the following Modal features:
 
@@ -403,4 +403,4 @@ def run(force_refresh: bool = False, filter_year: int = None):
     print("  modal deploy cron_datasette.py  # For production deployment")
 
 
-# You can explore the data at the [deployed web endpoint](https://modal-labs-examples--example-cron-datasette-ui.modal.run).
+# You can explore the data by deploying with `modal deploy cron_datasette.py` and opening the URL it prints.
diff --git a/13_sandboxes/sandbox_agent.py b/13_sandboxes/sandbox_agent.py
@@ -15,7 +15,7 @@
 
 app = modal.App.lookup("example-sandbox-agent", create_if_missing=True)
 
-# First, we create a custom [Image](https://modal.com/docs/images) that has Claude Code
+# First, we create a custom [Image](https://modal.com/docs/guide/images) that has Claude Code
 # and git installed.
 
 image = (

diff --git a/misc/falcon_bitsandbytes.py b/misc/falcon_bitsandbytes.py
@@ -11,7 +11,7 @@
 # to the sheer size of the model, the cold start time on Modal is around 2 minutes.
 #
 # For faster cold start at the expense of inference speed, check out
-# [Running Falcon-40B with AutoGPTQ](https://modal.com/docs/examples/falcon_gptq).
+# [Running Falcon-40B with AutoGPTQ](https://github.com/modal-labs/modal-examples/blob/main/misc/falcon_gptq.py).
-# [Running Falcon-40B with AutoGPTQ](https://github.com/modal-labs/modal-examples/blob/main/misc/falcon_gptq.py).
+# [Running Falcon-40B with AutoGPTQ](./falcon_gptq.py).
-# [Running Falcon-40B with AutoGPTQ](https://github.com/modal-labs/modal-examples/blob/main/misc/falcon_gptq.py).
+# [Running Falcon-40B with AutoGPTQ](./falcon_gptq.py).
 #
 # ## Setup
 #
@@ -157,7 +157,7 @@ def generate(self, prompt: str):
 
 
 # ## Run the model
-# We define a [`local_entrypoint`](https:modal.com/docs/guide/apps#entrypoints-for-ephemeral-apps) to call our remote function
+# We define a [`local_entrypoint`](https://modal.com/docs/guide/apps#entrypoints-for-ephemeral-apps) to call our remote function
 # sequentially for a list of inputs. You can run this locally with `modal run -q falcon_bitsandbytes.py`. The `-q` flag
 # enables streaming to work in the terminal output.
 prompt_template = (
@@ -181,7 +181,7 @@ def cli(prompt: Optional[str] = None):
 # Finally, we can serve the model from a web endpoint with `modal deploy falcon_bitsandbytes.py`. If
 # you visit the resulting URL with a question parameter in your URL, you can view the model's
 # stream back a response.
-# You can try our deployment [here](https://modal-labs--example-falcon-bnb-get.modal.run/?question=How%20do%20planes%20work?).
+# You can try it out by deploying with `modal deploy falcon_bitsandbytes.py` and querying the web endpoint it creates.
 @app.function(timeout=60 * 10)
 @modal.fastapi_endpoint()
 def get(question: str):

diff --git a/misc/falcon_gptq.py b/misc/falcon_gptq.py
@@ -8,7 +8,7 @@
 # cold start time on Modal is around 25s.
 #
 # For faster inference at the expense of a slower cold start, check out
-# [Running Falcon-40B with `bitsandbytes` quantization](https://modal.com/docs/examples/falcon_bitsandbytes). You can also
+# [Running Falcon-40B with `bitsandbytes` quantization](https://github.com/modal-labs/modal-examples/blob/main/misc/falcon_bitsandbytes.py). You can also
-# [Running Falcon-40B with `bitsandbytes` quantization](https://github.com/modal-labs/modal-examples/blob/main/misc/falcon_bitsandbytes.py). You can also
+# [Running Falcon-40B with `bitsandbytes` quantization](https://modal.com/docs/examples/falcon_bitsandbytes). You can also
-# [Running Falcon-40B with `bitsandbytes` quantization](https://github.com/modal-labs/modal-examples/blob/main/misc/falcon_bitsandbytes.py). You can also
+# [Running Falcon-40B with `bitsandbytes` quantization](https://modal.com/docs/examples/falcon_bitsandbytes). You can also
 # run a smaller model via the [Gemma 7B example](https://modal.com/docs/examples/vllm_gemma).
 #
 # ## Setup
@@ -138,7 +138,7 @@ def cli():
 # Finally, we can serve the model from a web endpoint with `modal deploy falcon_gptq.py`. If
 # you visit the resulting URL with a question parameter in your URL, you can view the model's
 # stream back a response.
-# You can try our deployment [here](https://modal-labs--example-falcon-gptq-get.modal.run/?question=Why%20are%20manhole%20covers%20round?).
+# You can try it out by deploying with `modal deploy falcon_gptq.py` and querying the web endpoint it creates.
 @app.function(timeout=60 * 10)
 @modal.fastapi_endpoint()
 def get(question: str):