Skip to content

Commit ef737c4

Browse files
authored
Merge branch 'main' into extra_decoders
2 parents cbbe0dc + 518ee93 commit ef737c4

File tree

9 files changed

+31
-90
lines changed

9 files changed

+31
-90
lines changed

.github/scripts/cmake.sh

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,10 @@ else
3030
JOBS=$(nproc)
3131
fi
3232

33+
if [[ $OS_TYPE == linux ]]; then
34+
export LD_LIBRARY_PATH="${CONDA_PREFIX}/lib:${LD_LIBRARY_PATH}"
35+
fi
36+
3337
TORCH_PATH=$(python -c "import pathlib, torch; print(pathlib.Path(torch.__path__[0]))")
3438
if [[ $OS_TYPE == windows ]]; then
3539
PACKAGING_DIR="${PWD}/packaging"

.github/workflows/build-cmake.yml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ jobs:
2020
gpu-arch-type: cuda
2121
gpu-arch-version: "11.8"
2222
fail-fast: false
23-
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
23+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
2424
with:
2525
repository: pytorch/vision
2626
runner: ${{ matrix.runner }}
@@ -33,7 +33,6 @@ jobs:
3333
export PYTHON_VERSION=3.9
3434
export GPU_ARCH_TYPE=${{ matrix.gpu-arch-type }}
3535
export GPU_ARCH_VERSION=${{ matrix.gpu-arch-version }}
36-
3736
./.github/scripts/cmake.sh
3837
3938
macos:

.github/workflows/docs.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ on:
1414

1515
jobs:
1616
build:
17-
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
17+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
1818
with:
1919
repository: pytorch/vision
2020
upload-artifact: docs
@@ -77,11 +77,11 @@ jobs:
7777
7878
upload:
7979
needs: build
80-
if: github.repository == 'pytorch/vision' && github.event_name == 'push' &&
80+
if: github.repository == 'pytorch/vision' && github.event_name == 'push' &&
8181
((github.ref_type == 'branch' && github.ref_name == 'main') || github.ref_type == 'tag')
8282
permissions:
8383
contents: write
84-
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
84+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
8585
with:
8686
repository: pytorch/vision
8787
download-artifact: docs

.github/workflows/lint.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ on:
1111

1212
jobs:
1313
python-source-and-configs:
14-
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
14+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
1515
with:
1616
repository: pytorch/vision
1717
test-infra-ref: main
@@ -38,7 +38,7 @@ jobs:
3838
fi
3939
4040
c-source:
41-
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
41+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
4242
with:
4343
repository: pytorch/vision
4444
test-infra-ref: main
@@ -65,7 +65,7 @@ jobs:
6565
6666
6767
python-types:
68-
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
68+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
6969
with:
7070
repository: pytorch/vision
7171
test-infra-ref: main

.github/workflows/prototype-tests-linux-gpu.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ jobs:
2323
gpu-arch-type: cuda
2424
gpu-arch-version: "11.8"
2525
fail-fast: false
26-
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
26+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
2727
with:
2828
repository: pytorch/vision
2929
runner: ${{ matrix.runner }}
@@ -37,7 +37,7 @@ jobs:
3737
export GPU_ARCH_TYPE=${{ matrix.gpu-arch-type }}
3838
export GPU_ARCH_VERSION=${{ matrix.gpu-arch-version }}
3939
./.github/scripts/setup-env.sh
40-
40+
4141
# Prepare conda
4242
CONDA_PATH=$(which conda)
4343
eval "$(${CONDA_PATH} shell.bash hook)"

.github/workflows/tests.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ jobs:
2626
gpu-arch-type: cuda
2727
gpu-arch-version: "11.8"
2828
fail-fast: false
29-
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
29+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
3030
with:
3131
repository: pytorch/vision
3232
runner: ${{ matrix.runner }}
@@ -104,7 +104,7 @@ jobs:
104104
./.github/scripts/unittest.sh
105105
106106
onnx:
107-
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
107+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
108108
with:
109109
repository: pytorch/vision
110110
test-infra-ref: main
@@ -135,7 +135,7 @@ jobs:
135135
echo '::endgroup::'
136136
137137
unittests-extended:
138-
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
138+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
139139
if: contains(github.event.pull_request.labels.*.name, 'run-extended')
140140
with:
141141
repository: pytorch/vision

torchvision/csrc/io/image/cpu/decode_webp.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,12 +44,10 @@ torch::Tensor decode_webp(
4444

4545
auto decoded_data =
4646
decoding_func(encoded_data_p, encoded_data_size, &width, &height);
47-
4847
TORCH_CHECK(decoded_data != nullptr, "WebPDecodeRGB[A] failed.");
4948

50-
auto deleter = [decoded_data](void*) { WebPFree(decoded_data); };
5149
auto out = torch::from_blob(
52-
decoded_data, {height, width, num_channels}, deleter, torch::kUInt8);
50+
decoded_data, {height, width, num_channels}, torch::kUInt8);
5351

5452
return out.permute({2, 0, 1});
5553
}

torchvision/csrc/ops/mps/mps_kernels.h

Lines changed: 14 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ namespace ops {
55

66
namespace mps {
77

8-
static const char* METAL_VISION = R"VISION_METAL(
8+
static at::native::mps::MetalShaderLibrary lib(R"VISION_METAL(
99
1010
#include <metal_atomic>
1111
#include <metal_stdlib>
@@ -26,46 +26,15 @@ inline T ceil_div(T n, T m) {
2626
return (n + m - 1) / m;
2727
}
2828
29-
template <typename T>
30-
inline void atomic_add_float( device T* data_ptr, const T val)
29+
inline void atomic_add_float(device float* data_ptr, const float val)
3130
{
32-
#if __METAL_VERSION__ >= 300
33-
// atomic_float is supported in Metal 3 (macOS Ventura) onward.
34-
device atomic_fetch_add_explicit((device atomic_float*) data_ptr, val, memory_order_relaxed);
35-
#else
36-
// Custom atomic addition implementation
37-
// https://github.com/ShoYamanishi/AppleNumericalComputing/blob/053f06c1f5a831095c4bcc29aaf11366fce5231e/03_dot/metal/dot.metal#L447-L472
38-
// https://forums.developer.nvidia.com/t/atomicadd-float-float-atomicmul-float-float/14639
39-
// https://on-demand.gputechconf.com/gtc/2013/presentations/S3101-Atomic-Memory-Operations.pdf (See the last slide)
40-
41-
// Create an atomic uint pointer for atomic transaction.
42-
device atomic_uint* atom_var = (device atomic_uint*)data_ptr;
43-
// Create necessary storage.
44-
uint fetched_uint, assigning_uint;
45-
T fetched_float, assigning_float;
46-
47-
// Replace the value in atom_var with 0 and return the previous value in atom_var.
48-
fetched_uint = atomic_exchange_explicit( atom_var, 0 /*desired*/, memory_order_relaxed);
49-
// Read out the previous value as float.
50-
fetched_float = *( (thread T*) &fetched_uint );
51-
52-
// Do addition and represent the addition result in uint for atomic transaction.
53-
assigning_float = fetched_float + val;
54-
assigning_uint = *((thread uint*) &assigning_float);
55-
56-
// atom_var should be 0 now, try to assign the addition result back to the atom_var (data_ptr).
57-
while ((fetched_uint = atomic_exchange_explicit( atom_var, assigning_uint /*desired*/, memory_order_relaxed)) != 0) {
58-
// If atom_var was not 0, i.e. fetched_uint != 0, it means that the data has been modified by other threads.
59-
// Try to assign 0 and get the previously assigned addition result.
60-
uint fetched_uint_again = atomic_exchange_explicit(atom_var, 0 /*desired*/, memory_order_relaxed);
61-
T fetched_float_again = *( (thread T*) &fetched_uint_again );
62-
// Re-add again
63-
fetched_float = *((thread T*) &(fetched_uint));
64-
// Previously assigned addition result + addition result from other threads.
65-
assigning_float = fetched_float_again + fetched_float;
66-
assigning_uint = *( (thread uint*) &assigning_float);
67-
}
68-
#endif
31+
atomic_fetch_add_explicit((device atomic_float*) data_ptr, val, memory_order_relaxed);
32+
}
33+
34+
35+
inline void atomic_add_float(device half* data_ptr, const half val)
36+
{
37+
atomic_fetch_add_explicit((device atomic_float*) data_ptr, static_cast<float>(val), memory_order_relaxed);
6938
}
7039
7140
template <typename T, typename integer_t>
@@ -1061,40 +1030,12 @@ REGISTER_PS_ROI_POOL_OP(half, int64_t);
10611030
REGISTER_PS_ROI_POOL_BACKWARD_OP(float, int64_t);
10621031
REGISTER_PS_ROI_POOL_BACKWARD_OP(half, int64_t);
10631032
1064-
)VISION_METAL";
1065-
1066-
static id<MTLLibrary> compileVisionOpsLibrary(id<MTLDevice> device) {
1067-
static id<MTLLibrary> visionLibrary = nil;
1068-
if (visionLibrary) {
1069-
return visionLibrary;
1070-
}
1071-
1072-
NSError* error = nil;
1073-
MTLCompileOptions* options = [[MTLCompileOptions new] autorelease];
1074-
[options setLanguageVersion:MTLLanguageVersion2_3];
1075-
visionLibrary = [device newLibraryWithSource:[NSString stringWithCString:METAL_VISION encoding:NSASCIIStringEncoding]
1076-
options:options
1077-
error:&error];
1078-
TORCH_CHECK(visionLibrary, "Failed to create metal vision library, error: ", [[error description] UTF8String]);
1079-
return visionLibrary;
1080-
}
1081-
1082-
static id<MTLComputePipelineState> visionPipelineState(id<MTLDevice> device, const std::string& kernel) {
1083-
static std::unordered_map<std::string, id<MTLComputePipelineState>> psoCache;
1084-
id<MTLComputePipelineState> pso = psoCache[kernel];
1085-
if (pso) {
1086-
return pso;
1087-
}
1088-
1089-
NSError* error = nil;
1090-
id<MTLLibrary> visionLib = compileVisionOpsLibrary(device);
1091-
id<MTLFunction> visionFunc = [visionLib newFunctionWithName:[NSString stringWithUTF8String:kernel.c_str()]];
1092-
TORCH_CHECK(visionFunc, "Failed to create function state object for: ", kernel);
1093-
pso = [device newComputePipelineStateWithFunction:visionFunc error:&error];
1094-
TORCH_CHECK(pso, "Failed to created pipeline state object, error: ", [[error description] UTF8String]);
1033+
)VISION_METAL");
10951034

1096-
psoCache[kernel] = pso;
1097-
return pso;
1035+
static id<MTLComputePipelineState> visionPipelineState(
1036+
id<MTLDevice> device,
1037+
const std::string& kernel) {
1038+
return lib.getPipelineStateForFunc(kernel);
10981039
}
10991040

11001041
} // namespace mps

torchvision/csrc/ops/mps/ps_roi_pool_kernel.mm

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,6 @@
123123

124124
float spatial_scale_f = static_cast<float>(spatial_scale);
125125

126-
auto num_rois = rois.size(0);
127126
auto grad_input = at::zeros({batch_size, channels, height, width}, grad.options());
128127

129128
if (grad.numel() == 0) {

0 commit comments

Comments
 (0)