Merge pull request #718 from NVIDIA/gh/release

nv-kkudrynski · web-flow · commit d1b185459b1a · 2020-10-21T17:24:37.000+02:00
[ConvNets/Pyt] Pretrained weights usage guidelines
diff --git a/PyTorch/Classification/ConvNets/classify.py b/PyTorch/Classification/ConvNets/classify.py
@@ -63,17 +63,16 @@ def main(args):
 
     if args.weights is not None:
         weights = torch.load(args.weights)
-
         #Temporary fix to allow NGC checkpoint loading
-        weights = {k.replace("module.", ""): v for k, v in weights.items()}
-
+        weights = {
+            k.replace("module.", ""): v for k, v in weights.items()
+        }
         model.load_state_dict(weights)
 
     model = model.cuda()
 
     if args.precision in ["AMP", "FP16"]:
-        model = model.half()
-
+        model = network_to_half()
 
     model.eval()
 
diff --git a/PyTorch/Classification/ConvNets/main.py b/PyTorch/Classification/ConvNets/main.py
@@ -363,10 +363,10 @@ def _worker_init_fn(id):
                 )
             )
             pretrained_weights = torch.load(args.pretrained_weights)
-
-            #Temporary fix to allow NGC checkpoint loading
-
-            pretrained_weights = {k.replace("module.", ""): v for k, v in pretrained_weights.items()}
+            # Temporary fix to allow NGC checkpoint loading
+            pretrained_weights = {
+                k.replace("module.", ""): v for k, v in pretrained_weights.items()
+            }
         else:
             print("=> no pretrained weights found at '{}'".format(args.resume))
 
diff --git a/PyTorch/Classification/ConvNets/resnet50v1.5/README.md b/PyTorch/Classification/ConvNets/resnet50v1.5/README.md
@@ -281,17 +281,21 @@ Example:
 
 ### 6. Start inference
 
-To run inference on ImageNet on a checkpointed model, run:
+You can download pretrained weights from NGC:
 
-`python ./main.py --arch resnet50 --evaluate --epochs 1 --resume <path to checkpoint> -b <batch size> <path to imagenet>`
+```bash
+wget --content-disposition https://api.ngc.nvidia.com/v2/models/nvidia/resnet50_pyt_amp/versions/20.06.0/zip -O resnet50_pyt_amp_20.06.0.zip
 
-To run inference on JPEG image, you have to first extract the model weights from checkpoint:
+unzip resnet50_pyt_amp_20.06.0.zip
+```
 
-`python checkpoint2model.py --checkpoint-path <path to checkpoint> --weight-path <path where weights will be stored>`
+To run inference on ImageNet, run:
 
-Then run classification script:
+`python ./main.py --arch resnet50 --evaluate --epochs 1 --pretrained-weights nvidia_resnet50_200821.pth.tar -b <batch size> <path to imagenet>`
 
-`python classify.py --arch resnet50 -c fanin --weights <path to weights from previous step> --precision AMP|FP32 --image <path to JPEG image>`
+To run inference on JPEG image using pretrained weights:
+
+`python classify.py --arch resnet50 -c fanin --weights nvidia_resnet50_200821.pth.tar  --precision AMP|FP32 --image <path to JPEG image>`
 
 
 ## Advanced
@@ -445,6 +449,19 @@ Metrics gathered through training:
  - `train.data_time` - time spent on waiting on data
  - `train.compute_time` - time spent in forward/backward pass
 
+To restart training from checkpoint use `--resume` option.
+
+To start training from pretrained weights (e.g. downloaded from NGC) use `--pretrained-weights` option.
+
+The difference between those two is that the pretrained weights contain only model weights,
+and checkpoints, apart from model weights, contain optimizer state, LR scheduler state, RNG state.
+
+Checkpoints are suitable for dividing the training into parts, for example in order
+to divide the training job into shorter stages, or restart training after infrastructure fail.
+
+Pretrained weights can be used as a base for finetuning the model to a different dataset,
+or as a backbone to detection models.
+
 ### Inference process
 
 Validation is done every epoch, and can be also run separately on a checkpointed model.
@@ -470,6 +487,27 @@ Then run classification script:
 
 `python classify.py --arch resnet50 -c fanin --weights <path to weights from previous step> --precision AMP|FP32 --image <path to JPEG image>`
 
+You can also run ImageNet validation on pretrained weights:
+
+`python ./main.py --arch resnet50 --evaluate --epochs 1 --pretrained-weights <path to pretrained weights> -b <batch size> <path to imagenet>`
+
+#### NGC Pretrained weights:
+
+Pretrained weights can be downloaded from NGC:
+
+```bash
+wget --content-disposition https://api.ngc.nvidia.com/v2/models/nvidia/resnet50_pyt_amp/versions/20.06.0/zip -O resnet50_pyt_amp_20.06.0.zip
+
+unzip resnet50_pyt_amp_20.06.0.zip
+```
+
+To run inference on ImageNet, run:
+
+`python ./main.py --arch resnet50 --evaluate --epochs 1 --pretrained-weights nvidia_resnet50_200821.pth.tar -b <batch size> <path to imagenet>`
+
+To run inference on JPEG image using pretrained weights:
+
+`python classify.py --arch resnet50 -c fanin --weights nvidia_resnet50_200821.pth.tar  --precision AMP|FP32 --image <path to JPEG image>`
 
 
 ## Performance
diff --git a/PyTorch/Classification/ConvNets/resnext101-32x4d/README.md b/PyTorch/Classification/ConvNets/resnext101-32x4d/README.md
@@ -266,17 +266,21 @@ Example:
 
 ### 6. Start inference
 
-To run inference on ImageNet on a checkpointed model, run:
+You can download pretrained weights from NGC:
 
-`python ./main.py --arch resnext101-32x4d --evaluate --epochs 1 --resume <path to checkpoint> -b <batch size> <path to imagenet>`
+```bash
+wget --content-disposition https://api.ngc.nvidia.com/v2/models/nvidia/resnext101_32x4d_pyt_amp/versions/20.06.0/zip -O resnext101_32x4d_pyt_amp_20.06.0.zip
 
-To run inference on JPEG image, you have to first extract the model weights from checkpoint:
+unzip resnext101_32x4d_pyt_amp_20.06.0.zip
+```
 
-`python checkpoint2model.py --checkpoint-path <path to checkpoint> --weight-path <path where weights will be stored>`
+To run inference on ImageNet, run:
 
-Then run classification script:
+`python ./main.py --arch resnext101-32x4d --evaluate --epochs 1 --pretrained-weights nvidia_resnext101-32x4d_200821.pth.tar -b <batch size> <path to imagenet>`
 
-`python classify.py --arch resnext101-32x4d -c fanin --weights <path to weights from previous step> --precision AMP|FP32 --image <path to JPEG image>`
+To run inference on JPEG image using pretrained weights:
+
+`python classify.py --arch resnext101-32x4d -c fanin --weights nvidia_resnext101-32x4d_200821.pth.tar  --precision AMP|FP32 --image <path to JPEG image>`
 
 
 ## Advanced
@@ -431,6 +435,19 @@ Metrics gathered through training:
  - `train.data_time` - time spent on waiting on data
  - `train.compute_time` - time spent in forward/backward pass
 
+To restart training from checkpoint use `--resume` option.
+
+To start training from pretrained weights (e.g. downloaded from NGC) use `--pretrained-weights` option.
+
+The difference between those two is that the pretrained weights contain only model weights,
+and checkpoints, apart from model weights, contain optimizer state, LR scheduler state, RNG state.
+
+Checkpoints are suitable for dividing the training into parts, for example in order
+to divide the training job into shorter stages, or restart training after infrastructure fail.
+
+Pretrained weights can be used as a base for finetuning the model to a different dataset,
+or as a backbone to detection models.
+
 ### Inference process
 
 Validation is done every epoch, and can be also run separately on a checkpointed model.
@@ -454,8 +471,29 @@ To run inference on JPEG image, you have to first extract the model weights from
 
 Then run classification script:
 
-`python classify.py --arch resnext101-32x4d -c fanin --weights <path to weights from previous step> --precision AMP|
+`python classify.py --arch resnext101-32x4d -c fanin --weights <path to weights from previous step> --precision AMP|FP32 --image <path to JPEG image>`
+
+You can also run ImageNet validation on pretrained weights:
+
+`python ./main.py --arch resnext101-32x4d --evaluate --epochs 1 --pretrained-weights <path to pretrained weights> -b <batch size> <path to imagenet>`
+
+#### NGC Pretrained weights:
+
+Pretrained weights can be downloaded from NGC:
+
+```bash
+wget --content-disposition https://api.ngc.nvidia.com/v2/models/nvidia/resnext101-32x4d_pyt_amp/versions/20.06.0/zip -O resnext101-32x4d_pyt_amp_20.06.0.zip
+
+unzip resnext101-32x4d_pyt_amp_20.06.0.zip
+```
+
+To run inference on ImageNet, run:
+
+`python ./main.py --arch resnext101-32x4d --evaluate --epochs 1 --pretrained-weights nvidia_resnext101-32x4d_200821.pth.tar -b <batch size> <path to imagenet>`
+
+To run inference on JPEG image using pretrained weights:
 
+`python classify.py --arch resnext101-32x4d -c fanin --weights nvidia_resnext101-32x4d_200821.pth.tar  --precision AMP|FP32 --image <path to JPEG image>`
 
 
 ## Performance
diff --git a/PyTorch/Classification/ConvNets/se-resnext101-32x4d/README.md b/PyTorch/Classification/ConvNets/se-resnext101-32x4d/README.md
@@ -267,17 +267,21 @@ Example:
 
 ### 6. Start inference
 
-To run inference on ImageNet on a checkpointed model, run:
+You can download pretrained weights from NGC:
 
-`python ./main.py --arch se-resnext101-32x4d --evaluate --epochs 1 --resume <path to checkpoint> -b <batch size> <path to imagenet>`
+```bash
+wget --content-disposition https://api.ngc.nvidia.com/v2/models/nvidia/seresnext101_32x4d_pyt_amp/versions/20.06.0/zip -O seresnext101_32x4d_pyt_amp_20.06.0.zip
 
-To run inference on JPEG image, you have to first extract the model weights from checkpoint:
+unzip seresnext101_32x4d_pyt_amp_20.06.0.zip
+```
 
-`python checkpoint2model.py --checkpoint-path <path to checkpoint> --weight-path <path where weights will be stored>`
+To run inference on ImageNet, run:
 
-Then run classification script:
+`python ./main.py --arch se-resnext101-32x4d --evaluate --epochs 1 --pretrained-weights nvidia_se-resnext101-32x4d_200821.pth.tar -b <batch size> <path to imagenet>`
 
-`python classify.py --arch se-resnext101-32x4d -c fanin --weights <path to weights from previous step> --precision AMP|FP32 --image <path to JPEG image>`
+To run inference on JPEG image using pretrained weights:
+
+`python classify.py --arch se-resnext101-32x4d -c fanin --weights nvidia_se-resnext101-32x4d_200821.pth.tar  --precision AMP|FP32 --image <path to JPEG image>`
 
 
 ## Advanced
@@ -432,6 +436,19 @@ Metrics gathered through training:
  - `train.data_time` - time spent on waiting on data
  - `train.compute_time` - time spent in forward/backward pass
 
+To restart training from checkpoint use `--resume` option.
+
+To start training from pretrained weights (e.g. downloaded from NGC) use `--pretrained-weights` option.
+
+The difference between those two is that the pretrained weights contain only model weights,
+and checkpoints, apart from model weights, contain optimizer state, LR scheduler state, RNG state.
+
+Checkpoints are suitable for dividing the training into parts, for example in order
+to divide the training job into shorter stages, or restart training after infrastructure fail.
+
+Pretrained weights can be used as a base for finetuning the model to a different dataset,
+or as a backbone to detection models.
+
 ### Inference process
 
 Validation is done every epoch, and can be also run separately on a checkpointed model.
@@ -457,6 +474,27 @@ Then run classification script:
 
 `python classify.py --arch se-resnext101-32x4d -c fanin --weights <path to weights from previous step> --precision AMP|FP32 --image <path to JPEG image>`
 
+You can also run ImageNet validation on pretrained weights:
+
+`python ./main.py --arch se-resnext101-32x4d --evaluate --epochs 1 --pretrained-weights <path to pretrained weights> -b <batch size> <path to imagenet>`
+
+#### NGC Pretrained weights:
+
+Pretrained weights can be downloaded from NGC:
+
+```bash
+wget --content-disposition https://api.ngc.nvidia.com/v2/models/nvidia/seresnext101_32x4d_pyt_amp/versions/20.06.0/zip -O seresnext101_32x4d_pyt_amp_20.06.0.zip
+
+unzip seresnext101_32x4d_pyt_amp_20.06.0.zip
+```
+
+To run inference on ImageNet, run:
+
+`python ./main.py --arch se-resnext101-32x4d --evaluate --epochs 1 --pretrained-weights nvidia_se-resnext101-32x4d_200821.pth.tar -b <batch size> <path to imagenet>`
+
+To run inference on JPEG image using pretrained weights:
+
+`python classify.py --arch se-resnext101-32x4d -c fanin --weights nvidia_se-resnext101-32x4d_200821.pth.tar  --precision AMP|FP32 --image <path to JPEG image>`
 
 
 ## Performance

Original file line number	Diff line number	Diff line change
`@@ -363,10 +363,10 @@ def _worker_init_fn(id):`
`363`	`363`	`)`
`364`	`364`	`)`
`365`	`365`	`pretrained_weights = torch.load(args.pretrained_weights)`
`366`		`-`
`367`		`- #Temporary fix to allow NGC checkpoint loading`
`368`		`-`
`369`		`- pretrained_weights = {k.replace("module.", ""): v for k, v in pretrained_weights.items()}`
	`366`	`+ # Temporary fix to allow NGC checkpoint loading`
	`367`	`+ pretrained_weights = {`
	`368`	`+ k.replace("module.", ""): v for k, v in pretrained_weights.items()`
	`369`	`+ }`
`370`	`370`	`else:`
`371`	`371`	`print("=> no pretrained weights found at '{}'".format(args.resume))`
`372`	`372`