Skip to content

Commit 7325aa5

Browse files
authored
Merge pull request #1341 from xiezl/patch-2
Add the implementation of distributed ResNet for PEFT
2 parents fdbd457 + 839c981 commit 7325aa5

File tree

1 file changed

+87
-0
lines changed

1 file changed

+87
-0
lines changed
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
#
2+
# Licensed to the Apache Software Foundation (ASF) under one
3+
# or more contributor license agreements. See the NOTICE file
4+
# distributed with this work for additional information
5+
# regarding copyright ownership. The ASF licenses this file
6+
# to you under the Apache License, Version 2.0 (the
7+
# "License"); you may not use this file except in compliance
8+
# with the License. You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing,
13+
# software distributed under the License is distributed on an
14+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
# KIND, either express or implied. See the License for the
16+
# specific language governing permissions and limitations
17+
# under the License.
18+
#
19+
20+
# the code is modified from
21+
# https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py
22+
23+
from singa import autograd
24+
from singa import tensor
25+
from singa import device
26+
from singa import opt
27+
28+
import numpy as np
29+
from tqdm import trange
30+
31+
if __name__ == "__main__":
32+
sgd = opt.SGD(lr=0.1, momentum=0.9, weight_decay=1e-5)
33+
sgd = opt.DistOpt(sgd)
34+
35+
if (sgd.global_rank == 0):
36+
print("Start intialization...........", flush=True)
37+
38+
dev = device.create_cuda_gpu_on(sgd.local_rank)
39+
40+
from resnet import resnet50
41+
model = resnet50()
42+
43+
niters = 100
44+
batch_size = 32
45+
IMG_SIZE = 224
46+
47+
tx = tensor.Tensor((batch_size, 3, IMG_SIZE, IMG_SIZE), dev)
48+
ty = tensor.Tensor((batch_size,), dev, tensor.int32)
49+
autograd.training = True
50+
x = np.random.randn(batch_size, 3, IMG_SIZE, IMG_SIZE).astype(np.float32)
51+
y = np.random.randint(0, 1000, batch_size, dtype=np.int32)
52+
tx.copy_from_numpy(x)
53+
ty.copy_from_numpy(y)
54+
55+
import time
56+
57+
dev.Sync()
58+
start = time.time()
59+
fd = 0
60+
softmax = 0
61+
update = 0
62+
with trange(niters) as t:
63+
for _ in t:
64+
dev.Sync()
65+
tick = time.time()
66+
x = model(tx)
67+
dev.Sync()
68+
fd += time.time() - tick
69+
tick = time.time()
70+
loss = autograd.softmax_cross_entropy(x, ty)
71+
dev.Sync()
72+
softmax += time.time() - tick
73+
sgd.backward_and_update(loss)
74+
75+
dev.Sync()
76+
end = time.time()
77+
throughput = float(sgd.world_size * niters * batch_size) / (end - start)
78+
titer = (end - start) / float(niters)
79+
tforward = float(fd) / float(niters)
80+
tsoftmax = float(softmax) / float(niters)
81+
tbackward = titer - tforward - tsoftmax
82+
83+
if (sgd.global_rank == 0):
84+
print("\nThroughput = {} per second".format(throughput), flush=True)
85+
print("Total={}, forward={}, softmax={}, backward={}".format(
86+
titer, tforward, tsoftmax, tbackward),
87+
flush=True)

0 commit comments

Comments
 (0)