Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
# keep version here in sync with CI/CD and other modal repos
rev: "v0.2.1"
rev: "v0.9.6"
hooks:
- id: ruff
args: [--fix, --exit-non-zero-on-fix]
Expand Down
4 changes: 2 additions & 2 deletions 05_scheduling/schedule_simple.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,14 @@
@app.function(schedule=modal.Period(seconds=5))
def print_time_1():
print(
f'Printing with period 5 seconds: {datetime.now().strftime("%m/%d/%Y, %H:%M:%S")}'
f"Printing with period 5 seconds: {datetime.now().strftime('%m/%d/%Y, %H:%M:%S')}"
)


@app.function(schedule=modal.Cron("* * * * *"))
def print_time_2():
print(
f'Printing with cron every minute: {datetime.now().strftime("%m/%d/%Y, %H:%M:%S")}'
f"Printing with cron every minute: {datetime.now().strftime('%m/%d/%Y, %H:%M:%S')}"
)


Expand Down
2 changes: 1 addition & 1 deletion 06_gpu_and_ml/embeddings/wikipedia/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def download_dataset():
start = time.time()
dataset = load_dataset(DATASET_NAME, DATASET_CONFIG, num_proc=6)
end = time.time()
print(f"Download complete - downloaded files in {end-start}s")
print(f"Download complete - downloaded files in {end - start}s")

dataset.save_to_disk(f"{DATASET_DIR}/{DATASET_NAME}")
volume.commit()
Expand Down
4 changes: 2 additions & 2 deletions 06_gpu_and_ml/embeddings/wikipedia/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ def load_dataset_from_disk(down_scale: float = 0.01):
# Load the dataset as a Hugging Face dataset
print(f"Loading dataset from {DATASET_DIR}/wikipedia")
dataset = load_from_disk(f"{DATASET_DIR}/wikipedia")
print(f"Dataset loaded in {time.perf_counter()-start:.2f} seconds")
print(f"Dataset loaded in {time.perf_counter() - start:.2f} seconds")

# Extract the total size of the dataset
ttl_size = len(dataset["train"])
Expand Down Expand Up @@ -254,7 +254,7 @@ def upload_result_to_hf(batch_size: int) -> None:
)

end = time.perf_counter()
print(f"Uploaded in {end-start}s")
print(f"Uploaded in {end - start}s")


@app.function(
Expand Down
4 changes: 2 additions & 2 deletions 06_gpu_and_ml/hyperparameter-sweep/hp_sweep_gpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ def train_model(

L.basicConfig(
level=L.INFO,
format=f"\033[0;32m%(asctime)s %(levelname)s [%(filename)s.%(funcName)s:%(lineno)d] [Node {node_rank+1}] %(message)s\033[0m",
format=f"\033[0;32m%(asctime)s %(levelname)s [%(filename)s.%(funcName)s:%(lineno)d] [Node {node_rank + 1}] %(message)s\033[0m",
datefmt="%b %d %H:%M:%S",
)

Expand Down Expand Up @@ -336,7 +336,7 @@ def main(
node_rank = result[0]
results.append(result)
print(
f"[Node {node_rank+1}/{n_nodes}] Finished."
f"[Node {node_rank + 1}/{n_nodes}] Finished."
f" Early stop val loss result: {result[1:]}"
)

Expand Down
6 changes: 3 additions & 3 deletions 06_gpu_and_ml/hyperparameter-sweep/src/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@ def __init__(
self.device = device
self.batch_size = batch_size
self.context_size = context_size
assert (train_percent > 0.0) and (
train_percent < 1.0
), "train_percent must be in (0,1)"
assert (train_percent > 0.0) and (train_percent < 1.0), (
"train_percent must be in (0,1)"
)

# Train/Validation split.
data = torch.tensor(encoded_text, dtype=torch.long)
Expand Down
6 changes: 3 additions & 3 deletions 06_gpu_and_ml/hyperparameter-sweep/src/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,9 +111,9 @@ def __init__(self, vocab_size, hparams, device):
self.context_size = hparams.context_size
self.device = device
# Sanity check parameters
assert (
hparams.n_embed % hparams.n_heads == 0
), "n_embed must be divisible by n_heads"
assert hparams.n_embed % hparams.n_heads == 0, (
"n_embed must be divisible by n_heads"
)

self.token_embedding_table = nn.Embedding(
vocab_size, hparams.n_embed, device=device
Expand Down
2 changes: 1 addition & 1 deletion 06_gpu_and_ml/llm-serving/openai_compatible/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ def main():
for i, response in enumerate(response.choices):
print(
Colors.BLUE
+ f"\n🤖 Choice {i+1}:{response.message.content}"
+ f"\n🤖 Choice {i + 1}:{response.message.content}"
+ Colors.END,
sep="",
)
Expand Down
6 changes: 3 additions & 3 deletions 06_gpu_and_ml/llm-serving/trtllm_llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,9 +323,9 @@ def generate(self, prompts: list[str], settings=None):
repetition_penalty=1.1,
)

settings[
"max_new_tokens"
] = MAX_OUTPUT_LEN # exceeding this will raise an error
settings["max_new_tokens"] = (
MAX_OUTPUT_LEN # exceeding this will raise an error
)
settings["end_id"] = self.end_id
settings["pad_id"] = self.pad_id

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,9 @@ def test_download_and_tryout_model(run_id: str):
f"{expected_transcription=}\n{predicted_transcription=}\n"
f"Word Error Rate (WER): {wer_score}"
)
assert (
wer_score < 1.0
), f"Even without finetuning, a WER score of {wer_score} is far too high."
assert wer_score < 1.0, (
f"Even without finetuning, a WER score of {wer_score} is far too high."
)


# This simple entrypoint function just starts an ephemeral app run and calls
Expand Down
6 changes: 3 additions & 3 deletions 06_gpu_and_ml/openai_whisper/finetuning/train/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -517,9 +517,9 @@ def compute_metrics(pred):
kwargs["dataset_tags"] = data_args.dataset_name
if data_args.dataset_config_name is not None:
kwargs["dataset_args"] = data_args.dataset_config_name
kwargs[
"dataset"
] = f"{data_args.dataset_name} {data_args.dataset_config_name}"
kwargs["dataset"] = (
f"{data_args.dataset_name} {data_args.dataset_config_name}"
)
else:
kwargs["dataset"] = data_args.dataset_name

Expand Down
5 changes: 3 additions & 2 deletions 06_gpu_and_ml/sam/segment_anything.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,9 @@ def generate_video_masks(
labels = np.array([1] * len(points), np.int32)

# run the model on GPU
with torch.inference_mode(), torch.autocast(
"cuda", dtype=torch.bfloat16
with (
torch.inference_mode(),
torch.autocast("cuda", dtype=torch.bfloat16),
):
self.inference_state = self.video_predictor.init_state(
video_path=str(frames_dir)
Expand Down
4 changes: 2 additions & 2 deletions 06_gpu_and_ml/stable_diffusion/text_to_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,10 +177,10 @@ def entrypoint(
start = time.time()
images = inference_service.run.remote(prompt, batch_size, seed)
duration = time.time() - start
print(f"Run {sample_idx+1} took {duration:.3f}s")
print(f"Run {sample_idx + 1} took {duration:.3f}s")
if sample_idx:
print(
f"\tGenerated {len(images)} image(s) at {(duration)/len(images):.3f}s / image."
f"\tGenerated {len(images)} image(s) at {(duration) / len(images):.3f}s / image."
)
for batch_idx, image_bytes in enumerate(images):
output_path = (
Expand Down
6 changes: 3 additions & 3 deletions 06_gpu_and_ml/text-to-pokemon/text_to_pokemon/inpaint.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,9 +100,9 @@ def new_pokemon_name(
mask_img_bytes = buf.getvalue()
mask, _ = load_img(mask_img_bytes)

assert (
img.shape[:2] == mask.shape[:2]
), "shapes of base image and mask must match"
assert img.shape[:2] == mask.shape[:2], (
"shapes of base image and mask must match"
)

# "No GPU is required, and for simple backgrounds, the results may even be better than AI models."
cur_res = cv2.inpaint(
Expand Down
8 changes: 4 additions & 4 deletions 06_gpu_and_ml/text-to-pokemon/text_to_pokemon/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,10 @@ def reset_diskcache(dry_run=True) -> None:
filepath.unlink()
if files and dry_run:
print(
f"🏜 dry-run: would have deleted {i+1} Pokémon character samples"
f"🏜 dry-run: would have deleted {i + 1} Pokémon character samples"
)
elif files:
print(f"deleted {i+1} Pokémon character samples")
print(f"deleted {i + 1} Pokémon character samples")
else:
print("No Pokémon character samples to delete")

Expand All @@ -57,9 +57,9 @@ def reset_diskcache(dry_run=True) -> None:
filepath.unlink()

if files and dry_run:
print(f"🏜 dry-run: would have deleted {i+1} Pokémon card images")
print(f"🏜 dry-run: would have deleted {i + 1} Pokémon card images")
elif files:
print(f"deleted {i+1} Pokémon card images")
print(f"deleted {i + 1} Pokémon card images")
else:
print("No Pokémon character card images to delete")

Expand Down
2 changes: 1 addition & 1 deletion 07_web_endpoints/count_faces.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ async def process(file: UploadFile = File(...)):
<title>Face Counter Result</title>
</head>
<body>
<h1>{inflect.engine().number_to_words(num_faces).title()} {'Face' if num_faces==1 else 'Faces'} Detected</h1>
<h1>{inflect.engine().number_to_words(num_faces).title()} {"Face" if num_faces == 1 else "Faces"} Detected</h1>
<h2>{"😀" * num_faces}</h2>
<a href="/">Go back</a>
</body>
Expand Down
4 changes: 3 additions & 1 deletion 09_job_queues/doc_ocr_jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,9 @@ def setup():

from transformers import AutoModel, AutoTokenizer

with warnings.catch_warnings(): # filter noisy warnings from GOT modeling code
with (
warnings.catch_warnings()
): # filter noisy warnings from GOT modeling code
warnings.simplefilter("ignore")
tokenizer = AutoTokenizer.from_pretrained(
MODEL_NAME, revision=MODEL_REVISION, trust_remote_code=True
Expand Down
7 changes: 3 additions & 4 deletions 11_notebooks/basic.ipynb
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This notebook also had a bug, fixing it

Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,7 @@
"metadata": {},
"outputs": [],
"source": [
"from modal import app\n",
"\n",
"app = modal.App(name=\"example-basic-notebook-app\")\n"
"app = modal.App(name=\"example-basic-notebook-app\")"
]
},
{
Expand Down Expand Up @@ -89,7 +87,7 @@
"\n",
"\n",
"with app.run():\n",
" print(quadruple.local(100)) # running locally\n",
" print(quadruple.local(100)) # running locally\n",
" print(quadruple.remote(100)) # run remotely\n",
" print(\"Doing a very inefficient remote multiplication just for fun!\")\n",
" result = quadruple.remote(10_000_000)"
Expand Down Expand Up @@ -124,6 +122,7 @@
"@app.function(gpu=\"any\")\n",
"def hello_gpu():\n",
" import subprocess\n",
"\n",
" subprocess.run(\"nvidia-smi\", shell=True, check=True)\n",
" return \"hello from a remote GPU!\"\n",
"\n",
Expand Down
19 changes: 11 additions & 8 deletions 12_datasets/coco.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def log_disk_space(interval: int) -> None:
statvfs = os.statvfs("/")
free_space = statvfs.f_frsize * statvfs.f_bavail
print(
f"{task_id} free disk space: {free_space / (1024 ** 3):.2f} GB",
f"{task_id} free disk space: {free_space / (1024**3):.2f} GB",
file=sys.stderr,
)
time.sleep(interval)
Expand All @@ -62,13 +62,16 @@ def extractall(fzip, dest, desc="Extracting"):
from tqdm.utils import CallbackIOWrapper

dest = pathlib.Path(dest).expanduser()
with zipfile.ZipFile(fzip) as zipf, tqdm(
desc=desc,
unit="B",
unit_scale=True,
unit_divisor=1024,
total=sum(getattr(i, "file_size", 0) for i in zipf.infolist()),
) as pbar:
with (
zipfile.ZipFile(fzip) as zipf,
tqdm(
desc=desc,
unit="B",
unit_scale=True,
unit_divisor=1024,
total=sum(getattr(i, "file_size", 0) for i in zipf.infolist()),
) as pbar,
):
for i in zipf.infolist():
if not getattr(i, "file_size", 0): # directory
zipf.extract(i, os.fspath(dest))
Expand Down
19 changes: 11 additions & 8 deletions 12_datasets/imagenet.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def log_disk_space(interval: int) -> None:
statvfs = os.statvfs("/")
free_space = statvfs.f_frsize * statvfs.f_bavail
print(
f"{task_id} free disk space: {free_space / (1024 ** 3):.2f} GB",
f"{task_id} free disk space: {free_space / (1024**3):.2f} GB",
file=sys.stderr,
)
time.sleep(interval)
Expand Down Expand Up @@ -104,13 +104,16 @@ def extractall(fzip, dest, desc="Extracting"):
from tqdm.utils import CallbackIOWrapper

dest = pathlib.Path(dest).expanduser()
with zipfile.ZipFile(fzip) as zipf, tqdm(
desc=desc,
unit="B",
unit_scale=True,
unit_divisor=1024,
total=sum(getattr(i, "file_size", 0) for i in zipf.infolist()),
) as pbar:
with (
zipfile.ZipFile(fzip) as zipf,
tqdm(
desc=desc,
unit="B",
unit_scale=True,
unit_divisor=1024,
total=sum(getattr(i, "file_size", 0) for i in zipf.infolist()),
) as pbar,
):
for i in zipf.infolist():
if not getattr(i, "file_size", 0): # directory
zipf.extract(i, os.fspath(dest))
Expand Down
2 changes: 1 addition & 1 deletion 12_datasets/laion400.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def log_disk_space(interval: int) -> None:
statvfs = os.statvfs("/")
free_space = statvfs.f_frsize * statvfs.f_bavail
print(
f"{task_id} free disk space: {free_space / (1024 ** 3):.2f} GB",
f"{task_id} free disk space: {free_space / (1024**3):.2f} GB",
file=sys.stderr,
)
time.sleep(interval)
Expand Down
2 changes: 1 addition & 1 deletion 12_datasets/rosettafold.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def log_disk_space(interval: int) -> None:
statvfs = os.statvfs("/")
free_space = statvfs.f_frsize * statvfs.f_bavail
print(
f"{task_id} free disk space: {free_space / (1024 ** 3):.2f} GB",
f"{task_id} free disk space: {free_space / (1024**3):.2f} GB",
file=sys.stderr,
)
time.sleep(interval)
Expand Down
2 changes: 1 addition & 1 deletion internal/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ nbconvert
jupytext~=1.16.1
pydantic~=1.10.14
mypy==1.2.0
ruff==0.2.1
ruff==0.9.6
12 changes: 6 additions & 6 deletions internal/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,18 +25,18 @@ class ExampleType(int, Enum):
class Example(BaseModel):
type: ExampleType
filename: str # absolute filepath to example file
module: Optional[
str
] = None # python import path, or none if file is not a py module.
module: Optional[str] = (
None # python import path, or none if file is not a py module.
)
# TODO(erikbern): don't think the module is used (by docs or monitors)?
metadata: Optional[dict] = None
repo_filename: str # git repo relative filepath
cli_args: Optional[list] = None # Full command line args to run it
stem: Optional[str] = None # stem of path
tags: Optional[list[str]] = None # metadata tags for the example
env: Optional[
dict[str, str]
] = None # environment variables for the example
env: Optional[dict[str, str]] = (
None # environment variables for the example
)


_RE_NEWLINE = re.compile(r"\r?\n")
Expand Down
1 change: 1 addition & 0 deletions misc/trellis3d.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"This example originally contributed by @sandeeppatra96 and @patraxo on GitHub"

import logging
import tempfile
import traceback
Expand Down