Skip to content

Commit cfaeded

Browse files
d4l3kfacebook-github-bot
authored andcommitted
examples/lightning_classy_vision: support NAS (#205)
Summary: This updates the lightning_classy_vision example to have a variable model architecture (can specify # of hidden layers and size) as well as log simple performance metrics for each step. This will be used an part of an Ax HPO example Pull Request resolved: #205 Test Plan: ``` $ pyre $ pytest examples/apps/lightining_classy_vision/ ``` CI load tensorboard logs to check steps are profiled ![2021-09-28-105636_426x798_scrot](https://user-images.githubusercontent.com/909104/135140441-fb03b25a-4340-4ff4-bb4a-b0d80a1d59e0.png) Reviewed By: kiukchung, aivanou Differential Revision: D31240694 Pulled By: d4l3k fbshipit-source-id: 34c9fee987be586284d08b8cbf3ea5fc649fe143
1 parent c9efb2b commit cfaeded

File tree

7 files changed

+114
-7
lines changed

7 files changed

+114
-7
lines changed

examples/apps/lightning_classy_vision/component.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ def trainer(
7272
image=image,
7373
resource=named_resources[resource]
7474
if resource
75-
else torchx.Resource(cpu=1, gpu=0, memMB=1024),
75+
else torchx.Resource(cpu=1, gpu=0, memMB=1500),
7676
)
7777
],
7878
)

examples/apps/lightning_classy_vision/model.py

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
import os.path
1616
import subprocess
17-
from typing import Tuple
17+
from typing import Tuple, Optional, List
1818

1919
import fsspec
2020
import pytorch_lightning as pl
@@ -29,15 +29,26 @@ class TinyImageNetModel(pl.LightningModule):
2929
An very simple linear model for the tiny image net dataset.
3030
"""
3131

32-
def __init__(self) -> None:
32+
def __init__(self, layer_sizes: Optional[List[int]] = None) -> None:
3333
super().__init__()
34-
self.l1 = torch.nn.Linear(64 * 64, 4096)
34+
35+
# build a model with hidden layers specified by layer_sizes
36+
if layer_sizes is None:
37+
layer_sizes = []
38+
dims = [64 * 64] + layer_sizes + [4096]
39+
layers = []
40+
for i, (a, b) in enumerate(zip(dims, dims[1:])):
41+
if i > 0:
42+
layers.append(torch.nn.ReLU(inplace=True))
43+
layers.append(torch.nn.Linear(a, b))
44+
self.seq: torch.nn.modules.Sequential = torch.nn.Sequential(*layers)
45+
3546
self.train_acc = Accuracy()
3647
self.val_acc = Accuracy()
3748

3849
# pyre-fixme[14]
3950
def forward(self, x: torch.Tensor) -> torch.Tensor:
40-
return torch.relu(self.l1(x.view(x.size(0), -1)))
51+
return self.seq(x.view(x.size(0), -1))
4152

4253
# pyre-fixme[14]
4354
def training_step(
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
#!/usr/bin/env python3
2+
# Copyright (c) Facebook, Inc. and its affiliates.
3+
# All rights reserved.
4+
#
5+
# This source code is licensed under the BSD-style license found in the
6+
# LICENSE file in the root directory of this source tree.
7+
8+
"""
9+
Simple Logging Profiler
10+
===========================
11+
12+
This is a simple profiler that's used as part of the trainer app example. This
13+
logs the Lightning training stage durations a logger such as Tensorboard. This
14+
output is used for HPO optimization with Ax.
15+
"""
16+
17+
import time
18+
from typing import Dict
19+
20+
from pytorch_lightning.loggers.base import LightningLoggerBase
21+
from pytorch_lightning.profiler.base import BaseProfiler
22+
23+
24+
class SimpleLoggingProfiler(BaseProfiler):
25+
"""
26+
This profiler records the duration of actions (in seconds) and reports the
27+
mean duration of each action to the specified logger. Reported metrics are
28+
in the format `duration_<event>`.
29+
"""
30+
31+
def __init__(self, logger: LightningLoggerBase) -> None:
32+
super().__init__()
33+
34+
self.current_actions: Dict[str, float] = {}
35+
self.logger = logger
36+
37+
def start(self, action_name: str) -> None:
38+
if action_name in self.current_actions:
39+
raise ValueError(
40+
f"Attempted to start {action_name} which has already started."
41+
)
42+
self.current_actions[action_name] = time.monotonic()
43+
44+
def stop(self, action_name: str) -> None:
45+
end_time = time.monotonic()
46+
if action_name not in self.current_actions:
47+
raise ValueError(
48+
f"Attempting to stop recording an action ({action_name}) which was never started."
49+
)
50+
start_time = self.current_actions.pop(action_name)
51+
duration = end_time - start_time
52+
self.logger.log_metrics({"duration_" + action_name: duration})
53+
54+
def summary(self) -> str:
55+
return ""

examples/apps/lightning_classy_vision/test/__init__.py

Whitespace-only changes.
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# Copyright (c) Facebook, Inc. and its affiliates.
2+
# All rights reserved.
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
import unittest
8+
9+
import torch
10+
from examples.apps.lightning_classy_vision.model import (
11+
TinyImageNetModel,
12+
)
13+
14+
15+
class ModelTest(unittest.TestCase):
16+
def test_basic(self) -> None:
17+
model = TinyImageNetModel()
18+
self.assertEqual(len(model.seq), 1)
19+
out = model(torch.zeros((1, 64, 64)))
20+
self.assertIsNotNone(out)
21+
22+
def test_layer_sizes(self) -> None:
23+
model = TinyImageNetModel(
24+
layer_sizes=[
25+
10,
26+
15,
27+
],
28+
)
29+
self.assertEqual(len(model.seq), 5)
30+
out = model(torch.zeros((1, 64, 64)))
31+
self.assertIsNotNone(out)

examples/apps/lightning_classy_vision/train.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,9 @@
3939
TinyImageNetModel,
4040
export_inference_model,
4141
)
42+
from examples.apps.lightning_classy_vision.profiler import (
43+
SimpleLoggingProfiler,
44+
)
4245

4346

4447
def parse_args(argv: List[str]) -> argparse.Namespace:
@@ -75,6 +78,12 @@ def parse_args(argv: List[str]) -> argparse.Namespace:
7578
help="path to place the tensorboard logs",
7679
default="/tmp",
7780
)
81+
parser.add_argument(
82+
"--layers",
83+
nargs="+",
84+
type=int,
85+
help="the MLP hidden layers and sizes, used for neural architecture search",
86+
)
7887
return parser.parse_args(argv)
7988

8089

@@ -83,7 +92,7 @@ def main(argv: List[str]) -> None:
8392
args = parse_args(argv)
8493

8594
# Init our model
86-
model = TinyImageNetModel()
95+
model = TinyImageNetModel(args.layers)
8796

8897
# Download and setup the data module
8998
if args.test:
@@ -124,6 +133,7 @@ def main(argv: List[str]) -> None:
124133
logger=logger,
125134
max_epochs=args.epochs,
126135
callbacks=[checkpoint_callback],
136+
profiler=SimpleLoggingProfiler(logger),
127137
)
128138

129139
# Train the model ⚡

scripts/kfpint.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,7 @@ def run_pipeline(build: BuildInfo, pipeline_file: str) -> object:
209209
experiment_name="integration-tests",
210210
run_name=f"integration test {build.id} - {os.path.basename(pipeline_file)}",
211211
)
212-
ui_url = f"{HOST}/_/pipeline/#/runs/details/{resp.run_id}"
212+
ui_url = f"{HOST}/#/runs/details/{resp.run_id}"
213213
print(f"{resp.run_id} - launched! view run at {ui_url}")
214214
return resp
215215

0 commit comments

Comments
 (0)