modal-labs · ekzhang · Feb 10, 2025 · Feb 10, 2025 · Feb 10, 2025 · ekzhang
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,7 +1,7 @@
 repos:
   - repo: https://github.com/astral-sh/ruff-pre-commit
     # keep version here in sync with CI/CD and other modal repos
-    rev: "v0.2.1"
+    rev: "v0.9.6"
     hooks:
       - id: ruff
         args: [--fix, --exit-non-zero-on-fix]

diff --git a/05_scheduling/schedule_simple.py b/05_scheduling/schedule_simple.py
@@ -27,14 +27,14 @@
 @app.function(schedule=modal.Period(seconds=5))
 def print_time_1():
     print(
-        f'Printing with period 5 seconds: {datetime.now().strftime("%m/%d/%Y, %H:%M:%S")}'
+        f"Printing with period 5 seconds: {datetime.now().strftime('%m/%d/%Y, %H:%M:%S')}"
     )
 
 
 @app.function(schedule=modal.Cron("* * * * *"))
 def print_time_2():
     print(
-        f'Printing with cron every minute: {datetime.now().strftime("%m/%d/%Y, %H:%M:%S")}'
+        f"Printing with cron every minute: {datetime.now().strftime('%m/%d/%Y, %H:%M:%S')}"
     )
 
 

diff --git a/06_gpu_and_ml/embeddings/wikipedia/download.py b/06_gpu_and_ml/embeddings/wikipedia/download.py
@@ -27,7 +27,7 @@ def download_dataset():
     start = time.time()
     dataset = load_dataset(DATASET_NAME, DATASET_CONFIG, num_proc=6)
     end = time.time()
-    print(f"Download complete - downloaded files in {end-start}s")
+    print(f"Download complete - downloaded files in {end - start}s")
 
     dataset.save_to_disk(f"{DATASET_DIR}/{DATASET_NAME}")
     volume.commit()

diff --git a/06_gpu_and_ml/embeddings/wikipedia/main.py b/06_gpu_and_ml/embeddings/wikipedia/main.py
@@ -181,7 +181,7 @@ def load_dataset_from_disk(down_scale: float = 0.01):
     # Load the dataset as a Hugging Face dataset
     print(f"Loading dataset from {DATASET_DIR}/wikipedia")
     dataset = load_from_disk(f"{DATASET_DIR}/wikipedia")
-    print(f"Dataset loaded in {time.perf_counter()-start:.2f} seconds")
+    print(f"Dataset loaded in {time.perf_counter() - start:.2f} seconds")
 
     # Extract the total size of the dataset
     ttl_size = len(dataset["train"])
@@ -254,7 +254,7 @@ def upload_result_to_hf(batch_size: int) -> None:
     )
 
     end = time.perf_counter()
-    print(f"Uploaded in {end-start}s")
+    print(f"Uploaded in {end - start}s")
 
 
 @app.function(

diff --git a/06_gpu_and_ml/hyperparameter-sweep/hp_sweep_gpt.py b/06_gpu_and_ml/hyperparameter-sweep/hp_sweep_gpt.py
@@ -169,7 +169,7 @@ def train_model(
 
     L.basicConfig(
         level=L.INFO,
-        format=f"\033[0;32m%(asctime)s %(levelname)s [%(filename)s.%(funcName)s:%(lineno)d] [Node {node_rank+1}] %(message)s\033[0m",
+        format=f"\033[0;32m%(asctime)s %(levelname)s [%(filename)s.%(funcName)s:%(lineno)d] [Node {node_rank + 1}] %(message)s\033[0m",
         datefmt="%b %d %H:%M:%S",
     )
 
@@ -336,7 +336,7 @@ def main(
         node_rank = result[0]
         results.append(result)
         print(
-            f"[Node {node_rank+1}/{n_nodes}] Finished."
+            f"[Node {node_rank + 1}/{n_nodes}] Finished."
             f" Early stop val loss result: {result[1:]}"
         )
 

diff --git a/06_gpu_and_ml/hyperparameter-sweep/src/dataset.py b/06_gpu_and_ml/hyperparameter-sweep/src/dataset.py
@@ -19,9 +19,9 @@ def __init__(
         self.device = device
         self.batch_size = batch_size
         self.context_size = context_size
-        assert (train_percent > 0.0) and (
-            train_percent < 1.0
-        ), "train_percent must be in (0,1)"
+        assert (train_percent > 0.0) and (train_percent < 1.0), (
+            "train_percent must be in (0,1)"
+        )
 
         # Train/Validation split.
         data = torch.tensor(encoded_text, dtype=torch.long)

diff --git a/06_gpu_and_ml/hyperparameter-sweep/src/model.py b/06_gpu_and_ml/hyperparameter-sweep/src/model.py
@@ -111,9 +111,9 @@ def __init__(self, vocab_size, hparams, device):
         self.context_size = hparams.context_size
         self.device = device
         # Sanity check parameters
-        assert (
-            hparams.n_embed % hparams.n_heads == 0
-        ), "n_embed must be divisible by n_heads"
+        assert hparams.n_embed % hparams.n_heads == 0, (
+            "n_embed must be divisible by n_heads"
+        )
 
         self.token_embedding_table = nn.Embedding(
             vocab_size, hparams.n_embed, device=device

diff --git a/06_gpu_and_ml/llm-serving/openai_compatible/client.py b/06_gpu_and_ml/llm-serving/openai_compatible/client.py
@@ -222,7 +222,7 @@ def main():
                 for i, response in enumerate(response.choices):
                     print(
                         Colors.BLUE
-                        + f"\n🤖 Choice {i+1}:{response.message.content}"
+                        + f"\n🤖 Choice {i + 1}:{response.message.content}"
                         + Colors.END,
                         sep="",
                     )

diff --git a/06_gpu_and_ml/llm-serving/trtllm_llama.py b/06_gpu_and_ml/llm-serving/trtllm_llama.py
@@ -323,9 +323,9 @@ def generate(self, prompts: list[str], settings=None):
                 repetition_penalty=1.1,
             )
 
-        settings[
-            "max_new_tokens"
-        ] = MAX_OUTPUT_LEN  # exceeding this will raise an error
+        settings["max_new_tokens"] = (
+            MAX_OUTPUT_LEN  # exceeding this will raise an error
+        )
         settings["end_id"] = self.end_id
         settings["pad_id"] = self.pad_id
 

diff --git a/06_gpu_and_ml/openai_whisper/finetuning/train/end_to_end_check.py b/06_gpu_and_ml/openai_whisper/finetuning/train/end_to_end_check.py
@@ -65,9 +65,9 @@ def test_download_and_tryout_model(run_id: str):
         f"{expected_transcription=}\n{predicted_transcription=}\n"
         f"Word Error Rate (WER): {wer_score}"
     )
-    assert (
-        wer_score < 1.0
-    ), f"Even without finetuning, a WER score of {wer_score} is far too high."
+    assert wer_score < 1.0, (
+        f"Even without finetuning, a WER score of {wer_score} is far too high."
+    )
 
 
 # This simple entrypoint function just starts an ephemeral app run and calls

diff --git a/06_gpu_and_ml/openai_whisper/finetuning/train/train.py b/06_gpu_and_ml/openai_whisper/finetuning/train/train.py
@@ -517,9 +517,9 @@ def compute_metrics(pred):
         kwargs["dataset_tags"] = data_args.dataset_name
         if data_args.dataset_config_name is not None:
             kwargs["dataset_args"] = data_args.dataset_config_name
-            kwargs[
-                "dataset"
-            ] = f"{data_args.dataset_name} {data_args.dataset_config_name}"
+            kwargs["dataset"] = (
+                f"{data_args.dataset_name} {data_args.dataset_config_name}"
+            )
         else:
             kwargs["dataset"] = data_args.dataset_name
 

diff --git a/06_gpu_and_ml/sam/segment_anything.py b/06_gpu_and_ml/sam/segment_anything.py
@@ -106,8 +106,9 @@ def generate_video_masks(
         labels = np.array([1] * len(points), np.int32)
 
         # run the model on GPU
-        with torch.inference_mode(), torch.autocast(
-            "cuda", dtype=torch.bfloat16
+        with (
+            torch.inference_mode(),
+            torch.autocast("cuda", dtype=torch.bfloat16),
         ):
             self.inference_state = self.video_predictor.init_state(
                 video_path=str(frames_dir)

diff --git a/06_gpu_and_ml/stable_diffusion/text_to_image.py b/06_gpu_and_ml/stable_diffusion/text_to_image.py
@@ -177,10 +177,10 @@ def entrypoint(
         start = time.time()
         images = inference_service.run.remote(prompt, batch_size, seed)
         duration = time.time() - start
-        print(f"Run {sample_idx+1} took {duration:.3f}s")
+        print(f"Run {sample_idx + 1} took {duration:.3f}s")
         if sample_idx:
             print(
-                f"\tGenerated {len(images)} image(s) at {(duration)/len(images):.3f}s / image."
+                f"\tGenerated {len(images)} image(s) at {(duration) / len(images):.3f}s / image."
             )
         for batch_idx, image_bytes in enumerate(images):
             output_path = (

diff --git a/06_gpu_and_ml/text-to-pokemon/text_to_pokemon/inpaint.py b/06_gpu_and_ml/text-to-pokemon/text_to_pokemon/inpaint.py
@@ -100,9 +100,9 @@ def new_pokemon_name(
         mask_img_bytes = buf.getvalue()
         mask, _ = load_img(mask_img_bytes)
 
-    assert (
-        img.shape[:2] == mask.shape[:2]
-    ), "shapes of base image and mask must match"
+    assert img.shape[:2] == mask.shape[:2], (
+        "shapes of base image and mask must match"
+    )
 
     # "No GPU is required, and for simple backgrounds, the results may even be better than AI models."
     cur_res = cv2.inpaint(

diff --git a/06_gpu_and_ml/text-to-pokemon/text_to_pokemon/ops.py b/06_gpu_and_ml/text-to-pokemon/text_to_pokemon/ops.py
@@ -37,10 +37,10 @@ def reset_diskcache(dry_run=True) -> None:
                 filepath.unlink()
         if files and dry_run:
             print(
-                f"🏜 dry-run: would have deleted {i+1} Pokémon character samples"
+                f"🏜 dry-run: would have deleted {i + 1} Pokémon character samples"
             )
         elif files:
-            print(f"deleted {i+1} Pokémon character samples")
+            print(f"deleted {i + 1} Pokémon character samples")
         else:
             print("No Pokémon character samples to delete")
 
@@ -57,9 +57,9 @@ def reset_diskcache(dry_run=True) -> None:
                 filepath.unlink()
 
         if files and dry_run:
-            print(f"🏜 dry-run: would have deleted {i+1} Pokémon card images")
+            print(f"🏜 dry-run: would have deleted {i + 1} Pokémon card images")
         elif files:
-            print(f"deleted {i+1} Pokémon card images")
+            print(f"deleted {i + 1} Pokémon card images")
         else:
             print("No Pokémon character card images to delete")
 

diff --git a/07_web_endpoints/count_faces.py b/07_web_endpoints/count_faces.py
@@ -97,7 +97,7 @@ async def process(file: UploadFile = File(...)):
                     <title>Face Counter Result</title>
                 </head>
                 <body>
-                    <h1>{inflect.engine().number_to_words(num_faces).title()} {'Face' if num_faces==1 else 'Faces'} Detected</h1>
+                    <h1>{inflect.engine().number_to_words(num_faces).title()} {"Face" if num_faces == 1 else "Faces"} Detected</h1>
                     <h2>{"😀" * num_faces}</h2>
                     <a href="/">Go back</a>
                 </body>

diff --git a/09_job_queues/doc_ocr_jobs.py b/09_job_queues/doc_ocr_jobs.py
@@ -60,7 +60,9 @@ def setup():
 
     from transformers import AutoModel, AutoTokenizer
 
-    with warnings.catch_warnings():  # filter noisy warnings from GOT modeling code
+    with (
+        warnings.catch_warnings()
+    ):  # filter noisy warnings from GOT modeling code
         warnings.simplefilter("ignore")
         tokenizer = AutoTokenizer.from_pretrained(
             MODEL_NAME, revision=MODEL_REVISION, trust_remote_code=True

diff --git a/11_notebooks/basic.ipynb b/11_notebooks/basic.ipynb
@@ -28,9 +28,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from modal import app\n",
-    "\n",
-    "app = modal.App(name=\"example-basic-notebook-app\")\n"
+    "app = modal.App(name=\"example-basic-notebook-app\")"
    ]
   },
   {
@@ -89,7 +87,7 @@
     "\n",
     "\n",
     "with app.run():\n",
-    "    print(quadruple.local(100))   # running locally\n",
+    "    print(quadruple.local(100))  # running locally\n",
     "    print(quadruple.remote(100))  # run remotely\n",
     "    print(\"Doing a very inefficient remote multiplication just for fun!\")\n",
     "    result = quadruple.remote(10_000_000)"
@@ -124,6 +122,7 @@
     "@app.function(gpu=\"any\")\n",
     "def hello_gpu():\n",
     "    import subprocess\n",
+    "\n",
     "    subprocess.run(\"nvidia-smi\", shell=True, check=True)\n",
     "    return \"hello from a remote GPU!\"\n",
     "\n",

diff --git a/12_datasets/coco.py b/12_datasets/coco.py
@@ -45,7 +45,7 @@ def log_disk_space(interval: int) -> None:
             statvfs = os.statvfs("/")
             free_space = statvfs.f_frsize * statvfs.f_bavail
             print(
-                f"{task_id} free disk space: {free_space / (1024 ** 3):.2f} GB",
+                f"{task_id} free disk space: {free_space / (1024**3):.2f} GB",
                 file=sys.stderr,
             )
             time.sleep(interval)
@@ -62,13 +62,16 @@ def extractall(fzip, dest, desc="Extracting"):
     from tqdm.utils import CallbackIOWrapper
 
     dest = pathlib.Path(dest).expanduser()
-    with zipfile.ZipFile(fzip) as zipf, tqdm(
-        desc=desc,
-        unit="B",
-        unit_scale=True,
-        unit_divisor=1024,
-        total=sum(getattr(i, "file_size", 0) for i in zipf.infolist()),
-    ) as pbar:
+    with (
+        zipfile.ZipFile(fzip) as zipf,
+        tqdm(
+            desc=desc,
+            unit="B",
+            unit_scale=True,
+            unit_divisor=1024,
+            total=sum(getattr(i, "file_size", 0) for i in zipf.infolist()),
+        ) as pbar,
+    ):
         for i in zipf.infolist():
             if not getattr(i, "file_size", 0):  # directory
                 zipf.extract(i, os.fspath(dest))

diff --git a/12_datasets/imagenet.py b/12_datasets/imagenet.py
@@ -50,7 +50,7 @@ def log_disk_space(interval: int) -> None:
             statvfs = os.statvfs("/")
             free_space = statvfs.f_frsize * statvfs.f_bavail
             print(
-                f"{task_id} free disk space: {free_space / (1024 ** 3):.2f} GB",
+                f"{task_id} free disk space: {free_space / (1024**3):.2f} GB",
                 file=sys.stderr,
             )
             time.sleep(interval)
@@ -104,13 +104,16 @@ def extractall(fzip, dest, desc="Extracting"):
     from tqdm.utils import CallbackIOWrapper
 
     dest = pathlib.Path(dest).expanduser()
-    with zipfile.ZipFile(fzip) as zipf, tqdm(
-        desc=desc,
-        unit="B",
-        unit_scale=True,
-        unit_divisor=1024,
-        total=sum(getattr(i, "file_size", 0) for i in zipf.infolist()),
-    ) as pbar:
+    with (
+        zipfile.ZipFile(fzip) as zipf,
+        tqdm(
+            desc=desc,
+            unit="B",
+            unit_scale=True,
+            unit_divisor=1024,
+            total=sum(getattr(i, "file_size", 0) for i in zipf.infolist()),
+        ) as pbar,
+    ):
         for i in zipf.infolist():
             if not getattr(i, "file_size", 0):  # directory
                 zipf.extract(i, os.fspath(dest))

diff --git a/12_datasets/laion400.py b/12_datasets/laion400.py
@@ -59,7 +59,7 @@ def log_disk_space(interval: int) -> None:
             statvfs = os.statvfs("/")
             free_space = statvfs.f_frsize * statvfs.f_bavail
             print(
-                f"{task_id} free disk space: {free_space / (1024 ** 3):.2f} GB",
+                f"{task_id} free disk space: {free_space / (1024**3):.2f} GB",
                 file=sys.stderr,
             )
             time.sleep(interval)

diff --git a/12_datasets/rosettafold.py b/12_datasets/rosettafold.py
@@ -44,7 +44,7 @@ def log_disk_space(interval: int) -> None:
             statvfs = os.statvfs("/")
             free_space = statvfs.f_frsize * statvfs.f_bavail
             print(
-                f"{task_id} free disk space: {free_space / (1024 ** 3):.2f} GB",
+                f"{task_id} free disk space: {free_space / (1024**3):.2f} GB",
                 file=sys.stderr,
             )
             time.sleep(interval)

diff --git a/internal/requirements.txt b/internal/requirements.txt
@@ -5,4 +5,4 @@ nbconvert
 jupytext~=1.16.1
 pydantic~=1.10.14
 mypy==1.2.0
-ruff==0.2.1
+ruff==0.9.6
diff --git a/internal/utils.py b/internal/utils.py
@@ -25,18 +25,18 @@ class ExampleType(int, Enum):
 class Example(BaseModel):
     type: ExampleType
     filename: str  # absolute filepath to example file
-    module: Optional[
-        str
-    ] = None  # python import path, or none if file is not a py module.
+    module: Optional[str] = (
+        None  # python import path, or none if file is not a py module.
+    )
     # TODO(erikbern): don't think the module is used (by docs or monitors)?
     metadata: Optional[dict] = None
     repo_filename: str  # git repo relative filepath
     cli_args: Optional[list] = None  # Full command line args to run it
     stem: Optional[str] = None  # stem of path
     tags: Optional[list[str]] = None  # metadata tags for the example
-    env: Optional[
-        dict[str, str]
-    ] = None  # environment variables for the example
+    env: Optional[dict[str, str]] = (
+        None  # environment variables for the example
+    )
 
 
 _RE_NEWLINE = re.compile(r"\r?\n")

diff --git a/misc/trellis3d.py b/misc/trellis3d.py
@@ -1,4 +1,5 @@
 "This example originally contributed by @sandeeppatra96 and @patraxo on GitHub"
+
 import logging
 import tempfile
 import traceback