Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions cv/classification/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -105,5 +105,18 @@ Bash script `infer.sh` is used to infer the trained model.
sh infer.sh
```

### Multi-Device Support (Experimental)

This branch introduces preliminary support for running on different device types. By default, the training script now automatically selects the best available device in the following priority:
1. CUDA (GPU)
2. NPU (if oneflow_npu is installed)
3. CPU (fallback)

If you want to explicitly run on a specific device (e.g., NPU), you can still override the default by adding the following argument to your train.sh command:

```bash
--device=npu
```

> Note: The label_smoothing feature is currently not supported in this branch. If your configuration file (e.g., configs/default_settings.yaml) includes label_smoothing, please disable it(set to 0.0) to avoid errors.

32 changes: 26 additions & 6 deletions cv/classification/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import argparse
import datetime
import numpy as np
import importlib.util
import oneflow as flow
import oneflow.backends.cudnn as cudnn

Expand Down Expand Up @@ -39,6 +40,15 @@ def build_model(config):
return model


def detect_device():
if flow.cuda.is_available():
return "cuda"
elif importlib.util.find_spec("oneflow_npu") is not None:
return "npu"
else:
return "cpu"


def parse_option():
parser = argparse.ArgumentParser(
"Flowvision image classification training and evaluation script", add_help=False
Expand Down Expand Up @@ -122,10 +132,17 @@ def parse_option():
required=False,
help="local rank for DistributedDataParallel",
)
parser.add_argument(
"--device",
type=str,
default=detect_device(),
help="Specify the device to run the model on. Options: 'cuda', 'cpu', or 'npu'.",
)

args, unparsed = parser.parse_known_args()

config = get_config(args)
config["DEVICE"] = args.device.lower()

return args, config

Expand All @@ -141,7 +158,7 @@ def main(config):

logger.info(f"Creating model:{config.MODEL.ARCH}")
model = build_model(config)
model.cuda()
model.to(config.DEVICE)

optimizer = build_optimizer(config, model)
model = flow.nn.parallel.DistributedDataParallel(model, broadcast_buffers=False, use_bucket=False)
Expand Down Expand Up @@ -255,8 +272,8 @@ def train_one_epoch(
start = time.time()
end = time.time()
for idx, (samples, targets) in enumerate(data_loader):
samples = samples.cuda()
targets = targets.cuda()
samples = samples.to(config.DEVICE)
targets = targets.to(config.DEVICE).to(flow.int32)

if mixup_fn is not None:
samples, targets = mixup_fn(samples, targets)
Expand Down Expand Up @@ -324,8 +341,8 @@ def validate(config, data_loader, model):

end = time.time()
for idx, (images, target) in enumerate(data_loader):
images = images.cuda()
target = target.cuda()
images = images.to(config.DEVICE)
target = target.to(config.DEVICE).to(flow.int32)

# compute output
output = model(images)
Expand Down Expand Up @@ -370,7 +387,7 @@ def throughput(data_loader, model, logger):
model.eval()

for idx, (images, _) in enumerate(data_loader):
images = images.cuda()
images = images.to(config.DEVICE)
batch_size = images.shape[0]
for i in range(50):
model(images)
Expand Down Expand Up @@ -453,4 +470,7 @@ def throughput(data_loader, model, logger):
# print config
logger.info(config.dump())

if config.DEVICE == "npu":
import oneflow_npu

main(config)