Skip to content

Commit fa5dc88

Browse files
committed
Use HW transform on batch nodes if possible.
1 parent af0afb5 commit fa5dc88

File tree

1 file changed

+103
-50
lines changed

1 file changed

+103
-50
lines changed

cocos2d/CCDrawNode.m

Lines changed: 103 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -35,13 +35,23 @@
3535
#import "CCConfiguration.h"
3636
#import "CCMetalSupport_Private.h"
3737

38+
// Vertex shader that performs the modelview-projection multiplication on the GPU.
39+
// Faster for draw nodes that draw many vertexes, but can't be batched.
40+
static NSString *CCDrawNodeHWTransformVertexShaderSource =
41+
@"uniform highp mat4 u_MVP;\n"
42+
@"void main(){\n"
43+
@" gl_Position = u_MVP*cc_Position;\n"
44+
@" cc_FragColor = clamp(cc_Color, 0.0, 1.0);\n"
45+
@" cc_FragTexCoord1 = cc_TexCoord1;\n"
46+
@"}\n";
47+
3848
#ifdef ANDROID // Many Android devices do NOT support GL_OES_standard_derivatives correctly
39-
static NSString *CCDrawNodeShaderSource =
49+
static NSString *CCDrawNodeFragmentShaderSource =
4050
@"void main(){\n"
4151
@" gl_FragColor = cc_FragColor*step(0.0, 1.0 - length(cc_FragTexCoord1));\n"
4252
@"}\n";
4353
#else
44-
static NSString *CCDrawNodeShaderSource =
54+
static NSString *CCDrawNodeFragmentShaderSource =
4555
@"#ifdef GL_ES\n"
4656
@"#extension GL_OES_standard_derivatives : enable\n"
4757
@"#endif\n"
@@ -55,31 +65,35 @@ @implementation CCDrawNode {
5565
GLsizei _vertexCount, _vertexCapacity;
5666
CCVertex *_vertexes;
5767

58-
GLsizei _elementCount, _elementCapacity;
59-
GLushort *_elements;
68+
GLsizei _indexCount, _indexCapacity;
69+
GLushort *_indexes;
70+
71+
BOOL _useBatchMode;
6072
}
6173

62-
+ (CCShader *)fragmentShader
74+
CCShader *CCDRAWNODE_HWTRANSFORM_SHADER = nil;
75+
CCShader *CCDRAWNODE_BATCH_SHADER = nil;
76+
77+
+(void)initialize
6378
{
64-
static CCShader *shader = nil;
65-
static dispatch_once_t once = 0L;
66-
dispatch_once(&once, ^{
6779
#if __CC_METAL_SUPPORTED_AND_ENABLED
68-
if([CCConfiguration sharedConfiguration].graphicsAPI == CCGraphicsAPIMetal){
69-
id<MTLLibrary> library = [CCMetalContext currentContext].library;
70-
NSAssert(library, @"Metal shader library not found.");
71-
72-
id<MTLFunction> vertexFunc = [library newFunctionWithName:@"CCVertexFunctionDefault"];
73-
74-
shader = [[CCShader alloc] initWithMetalVertexFunction:vertexFunc fragmentFunction:[library newFunctionWithName:@"CCFragmentFunctionDefaultDrawNode"]];
75-
shader.debugName = @"CCFragmentFunctionDefaultDrawNode";
76-
} else
80+
if([CCConfiguration sharedConfiguration].graphicsAPI == CCGraphicsAPIMetal){
81+
id<MTLLibrary> library = [CCMetalContext currentContext].library;
82+
NSAssert(library, @"Metal shader library not found.");
83+
84+
id<MTLFunction> vertexFunc = [library newFunctionWithName:@"CCVertexFunctionDefault"];
85+
86+
CCDRAWNODE_BATCH_SHADER = [[CCShader alloc] initWithMetalVertexFunction:vertexFunc fragmentFunction:[library newFunctionWithName:@"CCFragmentFunctionDefaultDrawNode"]];
87+
CCDRAWNODE_BATCH_SHADER.debugName = @"CCFragmentFunctionDefaultDrawNode";
88+
} else
7789
#endif
78-
{
79-
shader = [[CCShader alloc] initWithFragmentShaderSource:CCDrawNodeShaderSource];
80-
}
81-
});
82-
return shader;
90+
{
91+
CCDRAWNODE_HWTRANSFORM_SHADER = [[CCShader alloc] initWithVertexShaderSource:CCDrawNodeHWTransformVertexShaderSource fragmentShaderSource:CCDrawNodeFragmentShaderSource];
92+
CCDRAWNODE_HWTRANSFORM_SHADER.debugName = @"CCDRAWNODE_HWTRANSFORM_SHADER";
93+
94+
CCDRAWNODE_BATCH_SHADER = [[CCShader alloc] initWithFragmentShaderSource:CCDrawNodeFragmentShaderSource];
95+
CCDRAWNODE_BATCH_SHADER.debugName = @"CCDRAWNODE_BATCH_SHADER";
96+
}
8397
}
8498

8599
#pragma mark memory
@@ -88,44 +102,47 @@ -(CCRenderBuffer)bufferVertexes:(GLsizei)vertexCount andTriangleCount:(GLsizei)t
88102
{
89103
GLsizei requiredVertexes = _vertexCount + vertexCount;
90104
if(requiredVertexes > _vertexCapacity){
91-
// Double the size of the buffer until it fits.
92-
while(requiredVertexes >= _vertexCapacity) _vertexCapacity *= 2;
93-
105+
_vertexCapacity = requiredVertexes*1.5;
94106
_vertexes = realloc(_vertexes, _vertexCapacity*sizeof(*_vertexes));
95107
}
96108

97-
GLsizei elementCount = 3*triangleCount;
98-
GLsizei requiredElements = _elementCount + elementCount;
99-
if(requiredElements > _elementCapacity){
100-
// Double the size of the buffer until it fits.
101-
while(requiredElements >= _elementCapacity) _elementCapacity *= 2;
102-
103-
_elements = realloc(_elements, _elementCapacity*sizeof(*_elements));
109+
GLsizei indexCount = 3*triangleCount;
110+
GLsizei requiredIndexes = _indexCount + indexCount;
111+
if(requiredIndexes > _indexCapacity){
112+
_indexCapacity = requiredIndexes*1.5;
113+
_indexes = realloc(_indexes, _indexCapacity*sizeof(*_indexes));
104114
}
105115

106116
CCRenderBuffer buffer = {
107117
_vertexes + _vertexCount,
108-
_elements + _elementCount,
118+
_indexes + _indexCount,
109119
_vertexCount
110120
};
111121

112122
_vertexCount += vertexCount;
113-
_elementCount += elementCount;
123+
_indexCount += indexCount;
114124

115125
return buffer;
116126
}
117127

118128
-(id)init
119129
{
120130
if((self = [super init])){
121-
self.blendMode = [CCBlendMode premultipliedAlphaMode];
122-
self.shader = [CCDrawNode fragmentShader];
131+
_blendMode = [CCBlendMode premultipliedAlphaMode];
132+
133+
if(CCDRAWNODE_HWTRANSFORM_SHADER){
134+
_shader = CCDRAWNODE_HWTRANSFORM_SHADER;
135+
} else {
136+
// HWTransform shader not currently supported for Metal rendering.
137+
_shader = CCDRAWNODE_BATCH_SHADER;
138+
_useBatchMode = YES;
139+
}
123140

124141
_vertexCapacity = 128;
125142
_vertexes = calloc(_vertexCapacity, sizeof(*_vertexes));
126143

127-
_elementCapacity = 128;
128-
_elements = calloc(_elementCapacity, sizeof(*_elements));
144+
_indexCapacity = 128;
145+
_indexes = calloc(_indexCapacity, sizeof(*_indexes));
129146
}
130147

131148
return self;
@@ -134,25 +151,61 @@ -(id)init
134151
-(void)dealloc
135152
{
136153
free(_vertexes); _vertexes = NULL;
137-
free(_elements); _elements = NULL;
154+
free(_indexes); _indexes = NULL;
138155
}
139156

140157
#pragma mark Rendering
141158

159+
-(void)enableBatchMode
160+
{
161+
_useBatchMode = YES;
162+
163+
if(_shader == CCDRAWNODE_HWTRANSFORM_SHADER){
164+
_shader = CCDRAWNODE_BATCH_SHADER;
165+
}
166+
167+
// Reset the render state.
168+
_renderState = nil;
169+
}
170+
171+
// Force batch mode on if the user changes the blendmode or shader.
172+
-(void)setBlendMode:(CCBlendMode *)blendMode
173+
{
174+
[super setBlendMode:blendMode];
175+
[self enableBatchMode];
176+
}
177+
178+
-(void)setShader:(CCShader *)shader
179+
{
180+
[super setShader:shader];
181+
[self enableBatchMode];
182+
}
183+
142184
-(void)draw:(CCRenderer *)renderer transform:(const GLKMatrix4 *)transform
143185
{
144-
if(_elementCount == 0) return;
145-
146-
CCRenderBuffer buffer = [renderer enqueueTriangles:_elementCount/3 andVertexes:_vertexCount withState:self.renderState globalSortOrder:0];
186+
if(_indexCount == 0) return;
187+
188+
// If batch mode is disabled (default), update the MVP matrix in the uniforms.
189+
if(!_useBatchMode){
190+
self.shaderUniforms[@"u_MVP"] = [NSValue valueWithGLKMatrix4:*transform];
191+
}
192+
193+
CCRenderBuffer buffer = [renderer enqueueTriangles:_indexCount/3 andVertexes:_vertexCount withState:self.renderState globalSortOrder:0];
147194

148-
// TODO Maybe it would be even better to skip the CPU transform and use a uniform matrix?
149-
for(int i=0; i<_vertexCount; i++){
150-
CCRenderBufferSetVertex(buffer, i, CCVertexApplyTransform(_vertexes[i], transform));
151-
}
195+
if(_useBatchMode){
196+
// Transform the vertexes on the CPU.
197+
for(int i=0; i<_vertexCount; i++){
198+
CCRenderBufferSetVertex(buffer, i, CCVertexApplyTransform(_vertexes[i], transform));
199+
}
200+
} else {
201+
// memcpy() the buffer and let the GPU handle the transform.
202+
memcpy(buffer.vertexes, _vertexes, _vertexCount*sizeof(*_vertexes));
203+
}
152204

153-
for(int i=0; i<_elementCount; i++){
154-
buffer.elements[i] = _elements[i] + buffer.startIndex;
155-
}
205+
// Offset the indices.
206+
for(int i=0; i<_indexCount; i++){
207+
buffer.elements[i] = _indexes[i] + buffer.startIndex;
208+
}
156209
}
157210

158211
#pragma mark Immediate Mode
@@ -290,7 +343,7 @@ -(void)drawPolyWithVerts:(const CGPoint *)_verts count:(NSUInteger)count fillCol
290343
-(void)clear
291344
{
292345
_vertexCount = 0;
293-
_elementCount = 0;
346+
_indexCount = 0;
294347
}
295348

296349
@end

0 commit comments

Comments
 (0)