Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
# keep version here in sync with CI/CD and other modal repos
rev: "v0.2.1"
rev: "v0.9.6"
hooks:
- id: ruff
args: [--fix, --exit-non-zero-on-fix]
Expand Down
4 changes: 2 additions & 2 deletions 05_scheduling/schedule_simple.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,14 @@
@app.function(schedule=modal.Period(seconds=5))
def print_time_1():
print(
f'Printing with period 5 seconds: {datetime.now().strftime("%m/%d/%Y, %H:%M:%S")}'
f"Printing with period 5 seconds: {datetime.now().strftime('%m/%d/%Y, %H:%M:%S')}"
)


@app.function(schedule=modal.Cron("* * * * *"))
def print_time_2():
print(
f'Printing with cron every minute: {datetime.now().strftime("%m/%d/%Y, %H:%M:%S")}'
f"Printing with cron every minute: {datetime.now().strftime('%m/%d/%Y, %H:%M:%S')}"
)


Expand Down
2 changes: 1 addition & 1 deletion 06_gpu_and_ml/embeddings/wikipedia/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def download_dataset():
start = time.time()
dataset = load_dataset(DATASET_NAME, DATASET_CONFIG, num_proc=6)
end = time.time()
print(f"Download complete - downloaded files in {end-start}s")
print(f"Download complete - downloaded files in {end - start}s")

dataset.save_to_disk(f"{DATASET_DIR}/{DATASET_NAME}")
volume.commit()
Expand Down
4 changes: 2 additions & 2 deletions 06_gpu_and_ml/embeddings/wikipedia/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ def load_dataset_from_disk(down_scale: float = 0.01):
# Load the dataset as a Hugging Face dataset
print(f"Loading dataset from {DATASET_DIR}/wikipedia")
dataset = load_from_disk(f"{DATASET_DIR}/wikipedia")
print(f"Dataset loaded in {time.perf_counter()-start:.2f} seconds")
print(f"Dataset loaded in {time.perf_counter() - start:.2f} seconds")

# Extract the total size of the dataset
ttl_size = len(dataset["train"])
Expand Down Expand Up @@ -254,7 +254,7 @@ def upload_result_to_hf(batch_size: int) -> None:
)

end = time.perf_counter()
print(f"Uploaded in {end-start}s")
print(f"Uploaded in {end - start}s")


@app.function(
Expand Down
4 changes: 2 additions & 2 deletions 06_gpu_and_ml/hyperparameter-sweep/hp_sweep_gpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ def train_model(

L.basicConfig(
level=L.INFO,
format=f"\033[0;32m%(asctime)s %(levelname)s [%(filename)s.%(funcName)s:%(lineno)d] [Node {node_rank+1}] %(message)s\033[0m",
format=f"\033[0;32m%(asctime)s %(levelname)s [%(filename)s.%(funcName)s:%(lineno)d] [Node {node_rank + 1}] %(message)s\033[0m",
datefmt="%b %d %H:%M:%S",
)

Expand Down Expand Up @@ -336,7 +336,7 @@ def main(
node_rank = result[0]
results.append(result)
print(
f"[Node {node_rank+1}/{n_nodes}] Finished."
f"[Node {node_rank + 1}/{n_nodes}] Finished."
f" Early stop val loss result: {result[1:]}"
)

Expand Down
6 changes: 3 additions & 3 deletions 06_gpu_and_ml/hyperparameter-sweep/src/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@ def __init__(
self.device = device
self.batch_size = batch_size
self.context_size = context_size
assert (train_percent > 0.0) and (
train_percent < 1.0
), "train_percent must be in (0,1)"
assert (train_percent > 0.0) and (train_percent < 1.0), (
"train_percent must be in (0,1)"
)

# Train/Validation split.
data = torch.tensor(encoded_text, dtype=torch.long)
Expand Down
6 changes: 3 additions & 3 deletions 06_gpu_and_ml/hyperparameter-sweep/src/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,9 +111,9 @@ def __init__(self, vocab_size, hparams, device):
self.context_size = hparams.context_size
self.device = device
# Sanity check parameters
assert (
hparams.n_embed % hparams.n_heads == 0
), "n_embed must be divisible by n_heads"
assert hparams.n_embed % hparams.n_heads == 0, (
"n_embed must be divisible by n_heads"
)

self.token_embedding_table = nn.Embedding(
vocab_size, hparams.n_embed, device=device
Expand Down
2 changes: 1 addition & 1 deletion 06_gpu_and_ml/llm-serving/openai_compatible/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ def main():
for i, response in enumerate(response.choices):
print(
Colors.BLUE
+ f"\n🤖 Choice {i+1}:{response.message.content}"
+ f"\n🤖 Choice {i + 1}:{response.message.content}"
+ Colors.END,
sep="",
)
Expand Down
6 changes: 3 additions & 3 deletions 06_gpu_and_ml/llm-serving/trtllm_llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,9 +323,9 @@ def generate(self, prompts: list[str], settings=None):
repetition_penalty=1.1,
)

settings[
"max_new_tokens"
] = MAX_OUTPUT_LEN # exceeding this will raise an error
settings["max_new_tokens"] = (
MAX_OUTPUT_LEN # exceeding this will raise an error
)
settings["end_id"] = self.end_id
settings["pad_id"] = self.pad_id

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,9 @@ def test_download_and_tryout_model(run_id: str):
f"{expected_transcription=}\n{predicted_transcription=}\n"
f"Word Error Rate (WER): {wer_score}"
)
assert (
wer_score < 1.0
), f"Even without finetuning, a WER score of {wer_score} is far too high."
assert wer_score < 1.0, (
f"Even without finetuning, a WER score of {wer_score} is far too high."
)


# This simple entrypoint function just starts an ephemeral app run and calls
Expand Down
6 changes: 3 additions & 3 deletions 06_gpu_and_ml/openai_whisper/finetuning/train/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -517,9 +517,9 @@ def compute_metrics(pred):
kwargs["dataset_tags"] = data_args.dataset_name
if data_args.dataset_config_name is not None:
kwargs["dataset_args"] = data_args.dataset_config_name
kwargs[
"dataset"
] = f"{data_args.dataset_name} {data_args.dataset_config_name}"
kwargs["dataset"] = (
f"{data_args.dataset_name} {data_args.dataset_config_name}"
)
else:
kwargs["dataset"] = data_args.dataset_name

Expand Down
5 changes: 3 additions & 2 deletions 06_gpu_and_ml/sam/segment_anything.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,9 @@ def generate_video_masks(
labels = np.array([1] * len(points), np.int32)

# run the model on GPU
with torch.inference_mode(), torch.autocast(
"cuda", dtype=torch.bfloat16
with (
torch.inference_mode(),
torch.autocast("cuda", dtype=torch.bfloat16),
):
self.inference_state = self.video_predictor.init_state(
video_path=str(frames_dir)
Expand Down
4 changes: 2 additions & 2 deletions 06_gpu_and_ml/stable_diffusion/text_to_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,10 +177,10 @@ def entrypoint(
start = time.time()
images = inference_service.run.remote(prompt, batch_size, seed)
duration = time.time() - start
print(f"Run {sample_idx+1} took {duration:.3f}s")
print(f"Run {sample_idx + 1} took {duration:.3f}s")
if sample_idx:
print(
f"\tGenerated {len(images)} image(s) at {(duration)/len(images):.3f}s / image."
f"\tGenerated {len(images)} image(s) at {(duration) / len(images):.3f}s / image."
)
for batch_idx, image_bytes in enumerate(images):
output_path = (
Expand Down
6 changes: 3 additions & 3 deletions 06_gpu_and_ml/text-to-pokemon/text_to_pokemon/inpaint.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,9 +100,9 @@ def new_pokemon_name(
mask_img_bytes = buf.getvalue()
mask, _ = load_img(mask_img_bytes)

assert (
img.shape[:2] == mask.shape[:2]
), "shapes of base image and mask must match"
assert img.shape[:2] == mask.shape[:2], (
"shapes of base image and mask must match"
)

# "No GPU is required, and for simple backgrounds, the results may even be better than AI models."
cur_res = cv2.inpaint(
Expand Down
8 changes: 4 additions & 4 deletions 06_gpu_and_ml/text-to-pokemon/text_to_pokemon/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,10 @@ def reset_diskcache(dry_run=True) -> None:
filepath.unlink()
if files and dry_run:
print(
f"🏜 dry-run: would have deleted {i+1} Pokémon character samples"
f"🏜 dry-run: would have deleted {i + 1} Pokémon character samples"
)
elif files:
print(f"deleted {i+1} Pokémon character samples")
print(f"deleted {i + 1} Pokémon character samples")
else:
print("No Pokémon character samples to delete")

Expand All @@ -57,9 +57,9 @@ def reset_diskcache(dry_run=True) -> None:
filepath.unlink()

if files and dry_run:
print(f"🏜 dry-run: would have deleted {i+1} Pokémon card images")
print(f"🏜 dry-run: would have deleted {i + 1} Pokémon card images")
elif files:
print(f"deleted {i+1} Pokémon card images")
print(f"deleted {i + 1} Pokémon card images")
else:
print("No Pokémon character card images to delete")

Expand Down
2 changes: 1 addition & 1 deletion 07_web_endpoints/count_faces.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ async def process(file: UploadFile = File(...)):
<title>Face Counter Result</title>
</head>
<body>
<h1>{inflect.engine().number_to_words(num_faces).title()} {'Face' if num_faces==1 else 'Faces'} Detected</h1>
<h1>{inflect.engine().number_to_words(num_faces).title()} {"Face" if num_faces == 1 else "Faces"} Detected</h1>
<h2>{"😀" * num_faces}</h2>
<a href="/">Go back</a>
</body>
Expand Down
4 changes: 3 additions & 1 deletion 09_job_queues/doc_ocr_jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,9 @@ def setup():

from transformers import AutoModel, AutoTokenizer

with warnings.catch_warnings(): # filter noisy warnings from GOT modeling code
with (
warnings.catch_warnings()
): # filter noisy warnings from GOT modeling code
warnings.simplefilter("ignore")
tokenizer = AutoTokenizer.from_pretrained(
MODEL_NAME, revision=MODEL_REVISION, trust_remote_code=True
Expand Down
7 changes: 3 additions & 4 deletions 11_notebooks/basic.ipynb
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This notebook also had a bug, fixing it

Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,7 @@
"metadata": {},
"outputs": [],
"source": [
"from modal import app\n",
"\n",
"app = modal.App(name=\"example-basic-notebook-app\")\n"
"app = modal.App(name=\"example-basic-notebook-app\")"
]
},
{
Expand Down Expand Up @@ -89,7 +87,7 @@
"\n",
"\n",
"with app.run():\n",
" print(quadruple.local(100)) # running locally\n",
" print(quadruple.local(100)) # running locally\n",
" print(quadruple.remote(100)) # run remotely\n",
" print(\"Doing a very inefficient remote multiplication just for fun!\")\n",
" result = quadruple.remote(10_000_000)"
Expand Down Expand Up @@ -124,6 +122,7 @@
"@app.function(gpu=\"any\")\n",
"def hello_gpu():\n",
" import subprocess\n",
"\n",
" subprocess.run(\"nvidia-smi\", shell=True, check=True)\n",
" return \"hello from a remote GPU!\"\n",
"\n",
Expand Down
19 changes: 11 additions & 8 deletions 12_datasets/coco.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def log_disk_space(interval: int) -> None:
statvfs = os.statvfs("/")
free_space = statvfs.f_frsize * statvfs.f_bavail
print(
f"{task_id} free disk space: {free_space / (1024 ** 3):.2f} GB",
f"{task_id} free disk space: {free_space / (1024**3):.2f} GB",
file=sys.stderr,
)
time.sleep(interval)
Expand All @@ -62,13 +62,16 @@ def extractall(fzip, dest, desc="Extracting"):
from tqdm.utils import CallbackIOWrapper

dest = pathlib.Path(dest).expanduser()
with zipfile.ZipFile(fzip) as zipf, tqdm(
desc=desc,
unit="B",
unit_scale=True,
unit_divisor=1024,
total=sum(getattr(i, "file_size", 0) for i in zipf.infolist()),
) as pbar:
with (
zipfile.ZipFile(fzip) as zipf,
tqdm(
desc=desc,
unit="B",
unit_scale=True,
unit_divisor=1024,
total=sum(getattr(i, "file_size", 0) for i in zipf.infolist()),
) as pbar,
):
for i in zipf.infolist():
if not getattr(i, "file_size", 0): # directory
zipf.extract(i, os.fspath(dest))
Expand Down
19 changes: 11 additions & 8 deletions 12_datasets/imagenet.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def log_disk_space(interval: int) -> None:
statvfs = os.statvfs("/")
free_space = statvfs.f_frsize * statvfs.f_bavail
print(
f"{task_id} free disk space: {free_space / (1024 ** 3):.2f} GB",
f"{task_id} free disk space: {free_space / (1024**3):.2f} GB",
file=sys.stderr,
)
time.sleep(interval)
Expand Down Expand Up @@ -104,13 +104,16 @@ def extractall(fzip, dest, desc="Extracting"):
from tqdm.utils import CallbackIOWrapper

dest = pathlib.Path(dest).expanduser()
with zipfile.ZipFile(fzip) as zipf, tqdm(
desc=desc,
unit="B",
unit_scale=True,
unit_divisor=1024,
total=sum(getattr(i, "file_size", 0) for i in zipf.infolist()),
) as pbar:
with (
zipfile.ZipFile(fzip) as zipf,
tqdm(
desc=desc,
unit="B",
unit_scale=True,
unit_divisor=1024,
total=sum(getattr(i, "file_size", 0) for i in zipf.infolist()),
) as pbar,
):
for i in zipf.infolist():
if not getattr(i, "file_size", 0): # directory
zipf.extract(i, os.fspath(dest))
Expand Down
2 changes: 1 addition & 1 deletion 12_datasets/laion400.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def log_disk_space(interval: int) -> None:
statvfs = os.statvfs("/")
free_space = statvfs.f_frsize * statvfs.f_bavail
print(
f"{task_id} free disk space: {free_space / (1024 ** 3):.2f} GB",
f"{task_id} free disk space: {free_space / (1024**3):.2f} GB",
file=sys.stderr,
)
time.sleep(interval)
Expand Down
2 changes: 1 addition & 1 deletion 12_datasets/rosettafold.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def log_disk_space(interval: int) -> None:
statvfs = os.statvfs("/")
free_space = statvfs.f_frsize * statvfs.f_bavail
print(
f"{task_id} free disk space: {free_space / (1024 ** 3):.2f} GB",
f"{task_id} free disk space: {free_space / (1024**3):.2f} GB",
file=sys.stderr,
)
time.sleep(interval)
Expand Down
12 changes: 6 additions & 6 deletions internal/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,18 +25,18 @@ class ExampleType(int, Enum):
class Example(BaseModel):
type: ExampleType
filename: str # absolute filepath to example file
module: Optional[
str
] = None # python import path, or none if file is not a py module.
module: Optional[str] = (
None # python import path, or none if file is not a py module.
)
# TODO(erikbern): don't think the module is used (by docs or monitors)?
metadata: Optional[dict] = None
repo_filename: str # git repo relative filepath
cli_args: Optional[list] = None # Full command line args to run it
stem: Optional[str] = None # stem of path
tags: Optional[list[str]] = None # metadata tags for the example
env: Optional[
dict[str, str]
] = None # environment variables for the example
env: Optional[dict[str, str]] = (
None # environment variables for the example
)


_RE_NEWLINE = re.compile(r"\r?\n")
Expand Down
1 change: 1 addition & 0 deletions misc/trellis3d.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"This example originally contributed by @sandeeppatra96 and @patraxo on GitHub"

import logging
import tempfile
import traceback
Expand Down
Loading