Skip to content

Commit 5230f6c

Browse files
Apple: Use image atomic operations on supported Apple hardware
Co-authored-by: A Thousand Ships <[email protected]>
1 parent 9b22b41 commit 5230f6c

32 files changed

+5356
-721
lines changed

doc/classes/RenderingDevice.xml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2490,6 +2490,9 @@
24902490
<constant name="SUPPORTS_BUFFER_DEVICE_ADDRESS" value="6" enum="Features">
24912491
Features support for buffer device address extension.
24922492
</constant>
2493+
<constant name="SUPPORTS_IMAGE_ATOMIC_32_BIT" value="7" enum="Features">
2494+
Support for 32-bit image atomic operations.
2495+
</constant>
24932496
<constant name="LIMIT_MAX_BOUND_UNIFORM_SETS" value="0" enum="Limit">
24942497
Maximum number of uniform sets that can be bound at a given time.
24952498
</constant>

drivers/apple/foundation_helpers.h

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
/**************************************************************************/
2+
/* foundation_helpers.h */
3+
/**************************************************************************/
4+
/* This file is part of: */
5+
/* GODOT ENGINE */
6+
/* https://godotengine.org */
7+
/**************************************************************************/
8+
/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
9+
/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
10+
/* */
11+
/* Permission is hereby granted, free of charge, to any person obtaining */
12+
/* a copy of this software and associated documentation files (the */
13+
/* "Software"), to deal in the Software without restriction, including */
14+
/* without limitation the rights to use, copy, modify, merge, publish, */
15+
/* distribute, sublicense, and/or sell copies of the Software, and to */
16+
/* permit persons to whom the Software is furnished to do so, subject to */
17+
/* the following conditions: */
18+
/* */
19+
/* The above copyright notice and this permission notice shall be */
20+
/* included in all copies or substantial portions of the Software. */
21+
/* */
22+
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
23+
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
24+
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
25+
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
26+
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
27+
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
28+
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
29+
/**************************************************************************/
30+
31+
#pragma once
32+
33+
#import <Foundation/NSString.h>
34+
35+
class String;
36+
template <typename T>
37+
class CharStringT;
38+
39+
using CharString = CharStringT<char>;
40+
41+
namespace conv {
42+
43+
/**
44+
* Converts a Godot String to an NSString without allocating an intermediate UTF-8 buffer.
45+
* */
46+
NSString *to_nsstring(const String &p_str);
47+
/**
48+
* Converts a Godot CharString to an NSString without allocating an intermediate UTF-8 buffer.
49+
* */
50+
NSString *to_nsstring(const CharString &p_str);
51+
/**
52+
* Converts an NSString to a Godot String without allocating intermediate buffers.
53+
* */
54+
String to_string(NSString *p_str);
55+
56+
} //namespace conv
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
/**************************************************************************/
2+
/* foundation_helpers.mm */
3+
/**************************************************************************/
4+
/* This file is part of: */
5+
/* GODOT ENGINE */
6+
/* https://godotengine.org */
7+
/**************************************************************************/
8+
/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
9+
/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
10+
/* */
11+
/* Permission is hereby granted, free of charge, to any person obtaining */
12+
/* a copy of this software and associated documentation files (the */
13+
/* "Software"), to deal in the Software without restriction, including */
14+
/* without limitation the rights to use, copy, modify, merge, publish, */
15+
/* distribute, sublicense, and/or sell copies of the Software, and to */
16+
/* permit persons to whom the Software is furnished to do so, subject to */
17+
/* the following conditions: */
18+
/* */
19+
/* The above copyright notice and this permission notice shall be */
20+
/* included in all copies or substantial portions of the Software. */
21+
/* */
22+
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
23+
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
24+
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
25+
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
26+
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
27+
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
28+
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
29+
/**************************************************************************/
30+
31+
#import "foundation_helpers.h"
32+
33+
#import "core/string/ustring.h"
34+
35+
#import <CoreFoundation/CFString.h>
36+
37+
namespace conv {
38+
39+
NSString *to_nsstring(const String &p_str) {
40+
return [[NSString alloc] initWithBytes:(const void *)p_str.ptr()
41+
length:p_str.length() * sizeof(char32_t)
42+
encoding:NSUTF32LittleEndianStringEncoding];
43+
}
44+
45+
NSString *to_nsstring(const CharString &p_str) {
46+
return [[NSString alloc] initWithBytes:(const void *)p_str.ptr()
47+
length:p_str.length()
48+
encoding:NSUTF8StringEncoding];
49+
}
50+
51+
String to_string(NSString *p_str) {
52+
CFStringRef str = (__bridge CFStringRef)p_str;
53+
CFStringEncoding fastest = CFStringGetFastestEncoding(str);
54+
// Sometimes, CFString will return a pointer to it's encoded data,
55+
// so we can create the string without allocating intermediate buffers.
56+
const char *p = CFStringGetCStringPtr(str, fastest);
57+
if (p) {
58+
switch (fastest) {
59+
case kCFStringEncodingASCII:
60+
return String::ascii(Span(p, CFStringGetLength(str)));
61+
case kCFStringEncodingUTF8:
62+
return String::utf8(p);
63+
case kCFStringEncodingUTF32LE:
64+
return String::utf32(Span((char32_t *)p, CFStringGetLength(str)));
65+
default:
66+
break;
67+
}
68+
}
69+
70+
CFRange range = CFRangeMake(0, CFStringGetLength(str));
71+
CFIndex byte_len = 0;
72+
// Try to losslessly convert the string directly into a String's buffer to avoid intermediate allocations.
73+
CFIndex n = CFStringGetBytes(str, range, kCFStringEncodingUTF32LE, 0, NO, nil, 0, &byte_len);
74+
if (n == range.length) {
75+
String res;
76+
res.resize_uninitialized((byte_len / sizeof(char32_t)) + 1);
77+
res[n] = 0;
78+
n = CFStringGetBytes(str, range, kCFStringEncodingUTF32LE, 0, NO, (UInt8 *)res.ptrw(), res.length() * sizeof(char32_t), nil);
79+
return res;
80+
}
81+
82+
return String::utf8(p_str.UTF8String);
83+
}
84+
85+
} //namespace conv

drivers/d3d12/rendering_device_driver_d3d12.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5586,6 +5586,8 @@ bool RenderingDeviceDriverD3D12::has_feature(Features p_feature) {
55865586
return true;
55875587
case SUPPORTS_BUFFER_DEVICE_ADDRESS:
55885588
return true;
5589+
case SUPPORTS_IMAGE_ATOMIC_32_BIT:
5590+
return true;
55895591
default:
55905592
return false;
55915593
}

drivers/metal/SCsub

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ thirdparty_obj = []
1212
thirdparty_dir = "#thirdparty/spirv-cross/"
1313
thirdparty_sources = [
1414
"spirv_cfg.cpp",
15-
"spirv_cross_util.cpp",
1615
"spirv_cross.cpp",
1716
"spirv_parser.cpp",
1817
"spirv_msl.cpp",

drivers/metal/metal_device_properties.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,8 @@ struct API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MetalFeatures {
9494
bool metal_fx_spatial = false; /**< If true, Metal FX spatial functions are supported. */
9595
bool metal_fx_temporal = false; /**< If true, Metal FX temporal functions are supported. */
9696
bool supports_gpu_address = false; /**< If true, referencing a GPU address in a shader is supported. */
97+
bool supports_image_atomic_32_bit = false; /**< If true, 32-bit atomic operations on images are supported. */
98+
bool supports_image_atomic_64_bit = false; /**< If true, 64-bit atomic operations on images are supported. */
9799
};
98100

99101
struct MetalLimits {

drivers/metal/metal_device_properties.mm

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,12 @@
121121
features.simdPermute = [p_device supportsFamily:MTLGPUFamilyApple6];
122122
features.simdReduction = [p_device supportsFamily:MTLGPUFamilyApple7];
123123
features.argument_buffers_tier = p_device.argumentBuffersSupport;
124+
features.supports_image_atomic_32_bit = [p_device supportsFamily:MTLGPUFamilyApple6];
125+
features.supports_image_atomic_64_bit = [p_device supportsFamily:MTLGPUFamilyApple8];
126+
if (OS::get_singleton()->get_environment("GODOT_MTL_DISABLE_IMAGE_ATOMICS") == "1") {
127+
features.supports_image_atomic_32_bit = false;
128+
features.supports_image_atomic_64_bit = false;
129+
}
124130

125131
if (@available(macOS 13.0, iOS 16.0, tvOS 16.0, *)) {
126132
features.needs_arg_encoders = !([p_device supportsFamily:MTLGPUFamilyMetal3] && features.argument_buffers_tier == MTLArgumentBuffersTier2);

drivers/metal/metal_objects.h

Lines changed: 80 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -309,9 +309,23 @@ class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDRenderPass {
309309

310310
class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDCommandBuffer {
311311
private:
312+
#pragma mark - Common State
313+
314+
// From RenderingDevice
315+
static constexpr uint32_t MAX_PUSH_CONSTANT_SIZE = 128;
316+
312317
RenderingDeviceDriverMetal *device_driver = nullptr;
313318
id<MTLCommandQueue> queue = nil;
314319
id<MTLCommandBuffer> commandBuffer = nil;
320+
bool state_begin = false;
321+
322+
_FORCE_INLINE_ id<MTLCommandBuffer> command_buffer() {
323+
DEV_ASSERT(state_begin);
324+
if (commandBuffer == nil) {
325+
commandBuffer = queue.commandBuffer;
326+
}
327+
return commandBuffer;
328+
}
315329

316330
void _end_compute_dispatch();
317331
void _end_blit();
@@ -326,6 +340,11 @@ class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDCommandBuffer {
326340
void _end_render_pass();
327341
void _render_clear_render_area();
328342

343+
#pragma mark - Compute
344+
345+
void _compute_set_dirty_state();
346+
void _compute_bind_uniform_sets();
347+
329348
public:
330349
MDCommandBufferStateType type = MDCommandBufferStateType::None;
331350

@@ -349,25 +368,28 @@ class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDCommandBuffer {
349368
LocalVector<NSUInteger> vertex_offsets;
350369
ResourceUsageMap resource_usage;
351370
// clang-format off
352-
enum DirtyFlag: uint8_t {
353-
DIRTY_NONE = 0b0000'0000,
354-
DIRTY_PIPELINE = 0b0000'0001, //! pipeline state
355-
DIRTY_UNIFORMS = 0b0000'0010, //! uniform sets
356-
DIRTY_DEPTH = 0b0000'0100, //! depth / stencil state
357-
DIRTY_VERTEX = 0b0000'1000, //! vertex buffers
358-
DIRTY_VIEWPORT = 0b0001'0000, //! viewport rectangles
359-
DIRTY_SCISSOR = 0b0010'0000, //! scissor rectangles
360-
DIRTY_BLEND = 0b0100'0000, //! blend state
361-
DIRTY_RASTER = 0b1000'0000, //! encoder state like cull mode
362-
363-
DIRTY_ALL = 0xff,
371+
enum DirtyFlag: uint16_t {
372+
DIRTY_NONE = 0,
373+
DIRTY_PIPELINE = 1 << 0, //! pipeline state
374+
DIRTY_UNIFORMS = 1 << 1, //! uniform sets
375+
DIRTY_PUSH = 1 << 2, //! push constants
376+
DIRTY_DEPTH = 1 << 3, //! depth / stencil state
377+
DIRTY_VERTEX = 1 << 4, //! vertex buffers
378+
DIRTY_VIEWPORT = 1 << 5, //! viewport rectangles
379+
DIRTY_SCISSOR = 1 << 6, //! scissor rectangles
380+
DIRTY_BLEND = 1 << 7, //! blend state
381+
DIRTY_RASTER = 1 << 8, //! encoder state like cull mode
382+
DIRTY_ALL = (1 << 9) - 1,
364383
};
365384
// clang-format on
366385
BitField<DirtyFlag> dirty = DIRTY_NONE;
367386

368387
LocalVector<MDUniformSet *> uniform_sets;
369388
// Bit mask of the uniform sets that are dirty, to prevent redundant binding.
370389
uint64_t uniform_set_mask = 0;
390+
uint8_t push_constant_data[MAX_PUSH_CONSTANT_SIZE];
391+
uint32_t push_constant_data_len = 0;
392+
uint32_t push_constant_bindings[2] = { 0 };
371393

372394
_FORCE_INLINE_ void reset();
373395
void end_encoding();
@@ -422,6 +444,13 @@ class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDCommandBuffer {
422444
dirty.set_flag(DirtyFlag::DIRTY_UNIFORMS);
423445
}
424446

447+
_FORCE_INLINE_ void mark_push_constants_dirty() {
448+
if (push_constant_data_len == 0) {
449+
return;
450+
}
451+
dirty.set_flag(DirtyFlag::DIRTY_PUSH);
452+
}
453+
425454
_FORCE_INLINE_ void mark_blend_dirty() {
426455
if (!blend_constants.has_value()) {
427456
return;
@@ -464,16 +493,46 @@ class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDCommandBuffer {
464493
MDComputePipeline *pipeline = nullptr;
465494
id<MTLComputeCommandEncoder> encoder = nil;
466495
ResourceUsageMap resource_usage;
467-
_FORCE_INLINE_ void reset() {
468-
pipeline = nil;
469-
encoder = nil;
470-
// Keep the keys, as they are likely to be used again.
471-
for (KeyValue<StageResourceUsage, LocalVector<__unsafe_unretained id<MTLResource>>> &kv : resource_usage) {
472-
kv.value.clear();
496+
// clang-format off
497+
enum DirtyFlag: uint16_t {
498+
DIRTY_NONE = 0,
499+
DIRTY_PIPELINE = 1 << 0, //! pipeline state
500+
DIRTY_UNIFORMS = 1 << 1, //! uniform sets
501+
DIRTY_PUSH = 1 << 2, //! push constants
502+
DIRTY_ALL = (1 << 3) - 1,
503+
};
504+
// clang-format on
505+
BitField<DirtyFlag> dirty = DIRTY_NONE;
506+
507+
LocalVector<MDUniformSet *> uniform_sets;
508+
// Bit mask of the uniform sets that are dirty, to prevent redundant binding.
509+
uint64_t uniform_set_mask = 0;
510+
uint8_t push_constant_data[MAX_PUSH_CONSTANT_SIZE];
511+
uint32_t push_constant_data_len = 0;
512+
uint32_t push_constant_bindings[1] = { 0 };
513+
514+
_FORCE_INLINE_ void reset();
515+
void end_encoding();
516+
517+
_FORCE_INLINE_ void mark_uniforms_dirty(void) {
518+
if (uniform_sets.is_empty()) {
519+
return;
473520
}
521+
for (uint32_t i = 0; i < uniform_sets.size(); i++) {
522+
if (uniform_sets[i] != nullptr) {
523+
uniform_set_mask |= 1 << i;
524+
}
525+
}
526+
dirty.set_flag(DirtyFlag::DIRTY_UNIFORMS);
527+
}
528+
529+
_FORCE_INLINE_ void mark_push_constants_dirty() {
530+
if (push_constant_data_len == 0) {
531+
return;
532+
}
533+
dirty.set_flag(DirtyFlag::DIRTY_PUSH);
474534
}
475535

476-
void end_encoding();
477536
} compute;
478537

479538
// State specific to a blit pass.
@@ -496,6 +555,7 @@ class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDCommandBuffer {
496555
void encodeRenderCommandEncoderWithDescriptor(MTLRenderPassDescriptor *p_desc, NSString *p_label);
497556

498557
void bind_pipeline(RDD::PipelineID p_pipeline);
558+
void encode_push_constant_data(RDD::ShaderID p_shader, VectorView<uint32_t> p_data);
499559

500560
#pragma mark - Render Commands
501561

@@ -661,8 +721,6 @@ class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDShader {
661721
Vector<UniformSet> sets;
662722
bool uses_argument_buffers = true;
663723

664-
virtual void encode_push_constant_data(VectorView<uint32_t> p_data, MDCommandBuffer *p_cb) = 0;
665-
666724
MDShader(CharString p_name, Vector<UniformSet> p_sets, bool p_uses_argument_buffers) :
667725
name(p_name), sets(p_sets), uses_argument_buffers(p_uses_argument_buffers) {}
668726
virtual ~MDShader() = default;
@@ -671,15 +729,13 @@ class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDShader {
671729
class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDComputeShader final : public MDShader {
672730
public:
673731
struct {
674-
uint32_t binding = -1;
732+
int32_t binding = -1;
675733
uint32_t size = 0;
676734
} push_constants;
677735
MTLSize local = {};
678736

679737
MDLibrary *kernel;
680738

681-
void encode_push_constant_data(VectorView<uint32_t> p_data, MDCommandBuffer *p_cb) final;
682-
683739
MDComputeShader(CharString p_name, Vector<UniformSet> p_sets, bool p_uses_argument_buffers, MDLibrary *p_kernel);
684740
};
685741

@@ -700,8 +756,6 @@ class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDRenderShader final : p
700756
MDLibrary *vert;
701757
MDLibrary *frag;
702758

703-
void encode_push_constant_data(VectorView<uint32_t> p_data, MDCommandBuffer *p_cb) final;
704-
705759
MDRenderShader(CharString p_name,
706760
Vector<UniformSet> p_sets,
707761
bool p_needs_view_mask_buffer,

0 commit comments

Comments
 (0)