modal-labs · charlesfrye · Feb 20, 2026 · Feb 20, 2026 · devin-ai-integration · Feb 20, 2026
diff --git a/06_gpu_and_ml/llm-serving/ministral3_inference.py b/06_gpu_and_ml/llm-serving/ministral3_inference.py
@@ -70,7 +70,7 @@
 
 # Native hardware support for FP8 formats in [Tensor Cores](https://modal.com/gpu-glossary/device-hardware/tensor-core)
 # is limited to the latest [Streaming Multiprocessor architectures](https://modal.com/gpu-glossary/device-hardware/streaming-multiprocessor-architecture),
-# like those of Modal's [Hopper H100/H200 and Blackwell B200 GPUs](https://modal.com/blog/announcing-h200-b200).
+# like those of Modal's [Hopper H100/H200 and Blackwell B200 GPUs](https://modal.com/blog/introducing-b200-h200).
 
 # At 80 GB VRAM, a single H100 GPU has enough space to store the 8B FP8 model weights (~8 GB)
 # and a very large KV cache. A single H100 is also enough to serve the 14B model in full precision,

diff --git a/06_gpu_and_ml/llm-serving/very_large_models.py b/06_gpu_and_ml/llm-serving/very_large_models.py
@@ -277,7 +277,7 @@ def _start_server() -> subprocess.Popen:
 app = modal.App("example-serve-very-large-models", image=image)
 
 # Most importantly, we need to decide what hardware to run on.
-# [H200 and B200 GPUs](https://modal.com/blog/introducting-b200-h200)
+# [H200 and B200 GPUs](https://modal.com/blog/introducing-b200-h200)
 # have over 100 GB of [GPU RAM](https://modal.com/gpu-glossary/device-hardware/gpu-ram) --
 # 141 GB and 180 GB, respectively.
 # The model's weights will be stored in this memory,

diff --git a/06_gpu_and_ml/llm-serving/vllm_inference.py b/06_gpu_and_ml/llm-serving/vllm_inference.py
@@ -52,7 +52,7 @@
 # We'll use an FP8 (eight-bit floating-point) post-training-quantized variant: `Qwen/Qwen3-4B-Thinking-2507-FP8`.
 # Native hardware support for FP8 formats in [Tensor Cores](https://modal.com/gpu-glossary/device-hardware/tensor-core)
 # is limited to the latest [Streaming Multiprocessor architectures](https://modal.com/gpu-glossary/device-hardware/streaming-multiprocessor-architecture),
-# like those of Modal's [Hopper H100/H200 and Blackwell B200 GPUs](https://modal.com/blog/announcing-h200-b200).
+# like those of Modal's [Hopper H100/H200 and Blackwell B200 GPUs](https://modal.com/blog/introducing-b200-h200).
 
 # You can swap this model out for another by changing the strings below.
 # A single H100 GPU has enough VRAM to store a 4,000,000,000 parameter model,

diff --git a/06_gpu_and_ml/speech-to-text/sortformer2_1_speaker_diarization.py b/06_gpu_and_ml/speech-to-text/sortformer2_1_speaker_diarization.py
@@ -74,7 +74,7 @@
 
 # ## Using WebSockets to stream audio and diarization results
 
-# We use a Modal [ASGI](https://modal.com/docs/guide/asgi) app to serve the diarization results
+# We use a Modal [ASGI](https://modal.com/docs/guide/webhooks) app to serve the diarization results
 # over WebSockets. This allows us to stream the diarization results to the client in real-time.
 
 # We use a simple queue-based architecture to handle the audio and diarization results.

diff --git a/13_sandboxes/sandbox_agent.py b/13_sandboxes/sandbox_agent.py
@@ -15,7 +15,7 @@
 
 app = modal.App.lookup("example-sandbox-agent", create_if_missing=True)
 
-# First, we create a custom [Image](https://modal.com/docs/images) that has Claude Code
+# First, we create a custom [Image](https://modal.com/docs/guide/images) that has Claude Code
 # and git installed.
 
 image = (