Skip to content

Commit 1670e3a

Browse files
committed
Cocoa Port: OpenGL display views can now perform color conversions and apply the NDS Master Brightness on the GPU during video fetch, just like how Metal display views do it.
- Systems with fast non-Metal capable GPUs (such as the MacBook Pro 17" Late-2011 or the iMac 27" Mid-2011) will see the most benefit, especially when running 18-bit video at high GPU scaling factors. - For older systems with slower GPUs, running the new GPU-based video postprocessing may result in an overall performance loss. In order to run the video postprocessing on the CPU like before, you can use the menu option "View > Run Filters on GPU" to control whether video postprocessing happens on the GPU or on the CPU.
1 parent a779eb7 commit 1670e3a

File tree

8 files changed

+1209
-492
lines changed

8 files changed

+1209
-492
lines changed

desmume/src/frontend/cocoa/OGLDisplayOutput.cpp

Lines changed: 1047 additions & 422 deletions
Large diffs are not rendered by default.

desmume/src/frontend/cocoa/OGLDisplayOutput.h

Lines changed: 81 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -52,14 +52,38 @@ enum ShaderSupportTier
5252
ShaderSupport_FutureTier = 6
5353
};
5454

55-
struct OGLProcessedFrameInfo
55+
enum OGLDisplayTextureUnitID
5656
{
57+
OGLDisplayTextureUnitID_Video = 0,
58+
OGLDisplayTextureUnitID_MasterBrightnessMode = 1,
59+
OGLDisplayTextureUnitID_MasterBrightnessIntensity
60+
};
61+
62+
struct OGLFrameInfoFetch
63+
{
64+
uint64_t sequenceNumber;
5765
uint8_t bufferIndex;
5866
GLuint texID[2];
59-
bool isMainDisplayProcessed;
60-
bool isTouchDisplayProcessed;
67+
GLsizei width[2];
68+
GLsizei height[2];
69+
float backlightIntensity[2];
70+
71+
bool isNativeRender[2];
72+
bool isNativeSize[2];
73+
bool isDisplayEnabled[2];
74+
bool isDisplayAllWhite[2];
75+
bool isDisplayAllBlack[2];
76+
bool isDisplayProcessPossible[2];
77+
};
78+
typedef struct OGLFrameInfoFetch OGLFrameInfoFetch;
79+
80+
struct OGLFrameInfoProcessed
81+
{
82+
uint64_t sequenceNumber;
83+
GLuint texID[2];
84+
float backlightIntensity[2];
6185
};
62-
typedef struct OGLProcessedFrameInfo OGLProcessedFrameInfo;
86+
typedef struct OGLFrameInfoProcessed OGLFrameInfoProcessed;
6387

6488
class OGLContextInfo
6589
{
@@ -70,11 +94,13 @@ class OGLContextInfo
7094
char _rendererString[256];
7195
ShaderSupportTier _shaderSupport;
7296
bool _useShader150;
97+
bool _isIntegerTextureSupported;
7398

7499
bool _isVBOSupported;
75100
bool _isVAOSupported;
76101
bool _isPBOSupported;
77102
bool _isFBOSupported;
103+
bool _isTBOSupported;
78104

79105
public:
80106
OGLContextInfo();
@@ -86,11 +112,13 @@ class OGLContextInfo
86112
const char* GetRendererString() const;
87113

88114
bool IsUsingShader150();
115+
bool IsIntegerTextureSupported() const;
89116
bool IsVBOSupported();
90117
bool IsVAOSupported();
91118
bool IsPBOSupported();
92119
bool IsShaderSupported();
93120
bool IsFBOSupported();
121+
bool IsTBOSupported() const;
94122
ShaderSupportTier GetShaderSupport();
95123

96124
virtual void GetExtensionSetOGL(std::set<std::string> *oglExtensionSet) = 0;
@@ -114,8 +142,8 @@ class OGLShaderProgram
114142
GLuint _programID;
115143
ShaderSupportTier _shaderSupport;
116144

117-
virtual GLuint LoadShaderOGL(GLenum shaderType, const char *shaderProgram, bool useShader150);
118-
virtual bool LinkOGL();
145+
virtual GLuint _LoadShaderOGL(GLenum shaderType, const char *shaderProgram, bool useShader150, bool useIntegerTextures);
146+
virtual bool _LinkOGL();
119147

120148
public:
121149
OGLShaderProgram();
@@ -126,8 +154,8 @@ class OGLShaderProgram
126154
GLuint GetVertexShaderID();
127155
void SetVertexShaderOGL(const char *shaderProgram, bool useVtxColors, bool useShader150);
128156
GLuint GetFragmentShaderID();
129-
void SetFragmentShaderOGL(const char *shaderProgram, bool useShader150);
130-
void SetVertexAndFragmentShaderOGL(const char *vertShaderProgram, const char *fragShaderProgram, bool useVtxColors, bool useShader150);
157+
void SetFragmentShaderOGL(const char *shaderProgram, bool useShader150, bool useIntegerTextures);
158+
void SetVertexAndFragmentShaderOGL(const char *vertShaderProgram, const char *fragShaderProgram, bool useVtxColors, bool useShader150, bool useIntegerTextures);
131159
GLuint GetProgramID();
132160
};
133161

@@ -163,6 +191,8 @@ class OGLFilter
163191
virtual void SetSrcSizeOGL(GLsizei w, GLsizei h);
164192
GLfloat GetScale();
165193
void SetScaleOGL(GLfloat scale, void *buffer);
194+
GLsizei GetDstWidth() const;
195+
GLsizei GetDstHeight() const;
166196
virtual GLuint RunFilterOGL(GLuint srcTexID);
167197
void DownloadDstBufferOGL(uint32_t *dstBuffer, size_t lineOffset, size_t readLineCount);
168198
};
@@ -326,6 +356,7 @@ class OGLDisplayLayer : public OGLVideoLayer
326356

327357
void _UpdateRotationScaleOGL();
328358
void _UpdateVerticesOGL();
359+
GLuint _ProcessDisplayByID_OGL(NDSDisplayID displayID, GLuint texID, size_t bufferIndex, bool useDeposterize, VideoFilterTypeID filterID, VideoFilter *cpuFilter, GLsizei &inoutWidth, GLsizei &inoutHeight);
329360

330361
public:
331362
OGLDisplayLayer() {};
@@ -346,40 +377,74 @@ class OGLClientSharedData
346377
{
347378
protected:
348379
OGLContextInfo *_contextInfo;
380+
381+
GLuint _vboVideoProcVtxID;
382+
GLuint _vboVideoProcTexCoordNativeID;
383+
GLuint _vboVideoProcTexCoordCustomID;
384+
GLuint _vaoVideoProcNativeID;
385+
GLuint _vaoVideoProcCustomID;
386+
GLuint _fboDisplayNativeID[2];
387+
GLuint _fboDisplayCustomID[2];
388+
349389
GLenum _fetchColorFormatOGL;
350390
GLuint _texDisplayFetchNative[2][OPENGL_FETCH_BUFFER_COUNT];
351391
GLuint _texDisplayFetchCustom[2][OPENGL_FETCH_BUFFER_COUNT];
392+
GLuint _texDisplayPostprocessNative[2];
393+
GLuint _texDisplayPostprocessCustom[2];
394+
GLuint _texDisplayAllWhite;
395+
GLuint _texDisplayAllBlack;
396+
397+
GLuint _texMasterBrightnessMode[2][OPENGL_FETCH_BUFFER_COUNT];
398+
GLuint _texMasterBrightnessIntensity[2][OPENGL_FETCH_BUFFER_COUNT];
352399

353400
GLuint _texLQ2xLUT;
354401
GLuint _texHQ2xLUT;
355402
GLuint _texHQ3xLUT;
356403
GLuint _texHQ4xLUT;
357404

358-
GLuint _texFetch[2];
405+
GLuint _texFetchSelected[2];
359406

407+
bool _canProcessFetchOnGPU;
408+
bool _preferCPUVideoProcessing;
360409
bool _useDirectToCPUFilterPipeline;
361410
uint32_t *_srcNativeCloneMaster;
362411
uint32_t *_srcNativeClone[2][OPENGL_FETCH_BUFFER_COUNT];
363412
pthread_rwlock_t _srcCloneRWLock[2][OPENGL_FETCH_BUFFER_COUNT];
364-
pthread_rwlock_t _texFetchRWLock[2];
365413
bool _srcCloneNeedsUpdate[2][OPENGL_FETCH_BUFFER_COUNT];
366414

415+
OGLShaderProgram *_programFetch666ConvertOnly;
416+
OGLShaderProgram *_programFetch666WithMB;
417+
OGLShaderProgram *_programFetch888WithMB;
418+
GLint _vtxTexCoordBufferNative[8];
419+
GLint _vtxTexCoordBufferCustom[8];
420+
421+
pthread_rwlock_t _fetchInfoRWLock;
422+
OGLFrameInfoFetch _fetchInfo;
423+
424+
GLuint _FetchFromDisplayID_OGL(const NDSDisplayInfo &currentDisplayInfo, NDSDisplayID displayID, GLuint texVideoID, GLuint texMBModeID, GLuint texMBIntensityID);
425+
367426
public:
368427
OGLClientSharedData();
369428
virtual ~OGLClientSharedData();
370429

371430
void SetContextInfo(OGLContextInfo *contextInfo);
372431
OGLContextInfo* GetContextInfo() const;
373432

433+
bool CanProcessFetchOnGPU() const;
434+
435+
void SetPreferCPUVideoProcessing(bool prefersCPUVideoProcessing);
436+
bool PreferCPUVideoProcessing() const;
437+
374438
void SetUseDirectToCPUFilterPipeline(bool willUseDirectCPU);
375439
bool UseDirectToCPUFilterPipeline() const;
376440

377-
virtual GLuint GetFetchTexture(const NDSDisplayID displayID);
378-
virtual void SetFetchTexture(const NDSDisplayID displayID, GLuint texID);
441+
OGLFrameInfoFetch GetFetchInfo();
379442

380443
uint32_t* GetSrcClone(const NDSDisplayID displayID, const u8 bufferIndex) const;
381444
GLuint GetTexNative(const NDSDisplayID displayID, const u8 bufferIndex) const;
382445
GLuint GetTexCustom(const NDSDisplayID displayID, const u8 bufferIndex) const;
446+
GLuint GetTexDisplayAllWhite() const;
447+
GLuint GetTexDisplayAllBlack() const;
383448

384449
// For lack of a better place, we're putting the HQnx LUTs in the fetch object because
385450
// we know that it will be shared for all display views.
@@ -394,7 +459,7 @@ class OGLClientSharedData
394459

395460
// OpenGL-specific functions that must be called in response to their
396461
// corresponding GPUClientFetchObject methods.
397-
void InitOGL();
462+
void InitOGL(const NDSDisplayInfo *displayInfoList, const NDSDisplayInfo &currentDisplayInfo);
398463
void SetFetchBuffersOGL(const NDSDisplayInfo *displayInfoList, const NDSDisplayInfo &currentDisplayInfo);
399464
void FetchFromBufferIndexOGL(const u8 index, const NDSDisplayInfo &currentDisplayInfo);
400465
void FetchNativeDisplayByID_OGL(const NDSDisplayInfo *displayInfoList, const NDSDisplayID displayID, const u8 bufferIndex);
@@ -413,7 +478,7 @@ class OGLVideoOutput : public ClientDisplay3DPresenter
413478
GLuint _texCPUFilterDstID[2];
414479
GLuint _fboFrameCopyID;
415480

416-
OGLProcessedFrameInfo _processedFrameInfo;
481+
OGLFrameInfoProcessed _processedFrameInfo;
417482

418483
std::vector<OGLVideoLayer *> *_layerList;
419484

@@ -459,8 +524,8 @@ class OGLVideoOutput : public ClientDisplay3DPresenter
459524
virtual void PrerenderStateSetupOGL();
460525
virtual void RenderFrameOGL(bool isRenderingFlipped);
461526

462-
virtual const OGLProcessedFrameInfo& GetProcessedFrameInfo();
463-
virtual void SetProcessedFrameInfo(const OGLProcessedFrameInfo &processedInfo);
527+
virtual const OGLFrameInfoProcessed& GetFrameInfoProcessed();
528+
virtual void SetFrameInfoProcessed(const OGLFrameInfoProcessed &processedInfo);
464529

465530
virtual void WriteLockEmuFramebuffer(const uint8_t bufferIndex);
466531
virtual void ReadLockEmuFramebuffer(const uint8_t bufferIndex);

desmume/src/frontend/cocoa/OGLDisplayOutput_3_2.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ void glGenVertexArrays_3_2(GLsizei n, GLuint *vaoIDs)
4141
OGLContextInfo_3_2::OGLContextInfo_3_2()
4242
{
4343
_useShader150 = true;
44+
_isIntegerTextureSupported = true;
4445
_isVBOSupported = true;
4546
_isVAOSupported = true;
4647
_isPBOSupported = true;

desmume/src/frontend/cocoa/cocoa_GPU.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ class MacGPUFetchObjectDisplayLink : public MacGPUFetchObjectAsync
114114
pthread_rwlock_t *_rwlockOutputList;
115115
pthread_mutex_t _mutexDisplayLinkLists;
116116
NSMutableArray *_cdsOutputList;
117+
volatile int32_t _numberViewsPreferringCPUVideoProcessing;
117118
volatile int32_t _numberViewsUsingDirectToCPUFiltering;
118119

119120
DisplayLinksActiveMap _displayLinksActiveList;
@@ -123,11 +124,17 @@ class MacGPUFetchObjectDisplayLink : public MacGPUFetchObjectAsync
123124
MacGPUFetchObjectDisplayLink();
124125
~MacGPUFetchObjectDisplayLink();
125126

127+
volatile int32_t GetNumberViewsPreferringCPUVideoProcessing() const;
126128
volatile int32_t GetNumberViewsUsingDirectToCPUFiltering() const;
127129

128130
void SetOutputList(NSMutableArray *theOutputList, pthread_rwlock_t *theRWLock);
131+
132+
void IncrementViewsPreferringCPUVideoProcessing();
133+
void DecrementViewsPreferringCPUVideoProcessing();
134+
129135
void IncrementViewsUsingDirectToCPUFiltering();
130136
void DecrementViewsUsingDirectToCPUFiltering();
137+
131138
void PushVideoDataToAllDisplayViews();
132139

133140
void DisplayLinkStartUsingID(CGDirectDisplayID displayID);

desmume/src/frontend/cocoa/cocoa_GPU.mm

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,7 @@ - (id)init
173173
{
174174
#ifdef PORT_VERSION_OS_X_APP
175175
fetchObject = new MacOGLClientFetchObject;
176+
GPU->SetWillPostprocessDisplays(false);
176177
#else
177178
fetchObject = new OE_OGLClientFetchObject;
178179
#endif
@@ -1474,6 +1475,7 @@ static CVReturn MacDisplayLinkCallback(CVDisplayLinkRef displayLink,
14741475

14751476
_rwlockOutputList = NULL;
14761477
_cdsOutputList = nil;
1478+
_numberViewsPreferringCPUVideoProcessing = 0;
14771479
_numberViewsUsingDirectToCPUFiltering = 0;
14781480

14791481
_displayLinksActiveList.clear();
@@ -1532,6 +1534,11 @@ static CVReturn MacDisplayLinkCallback(CVDisplayLinkRef displayLink,
15321534
}
15331535
}
15341536

1537+
volatile int32_t MacGPUFetchObjectDisplayLink::GetNumberViewsPreferringCPUVideoProcessing() const
1538+
{
1539+
return this->_numberViewsPreferringCPUVideoProcessing;
1540+
}
1541+
15351542
volatile int32_t MacGPUFetchObjectDisplayLink::GetNumberViewsUsingDirectToCPUFiltering() const
15361543
{
15371544
return this->_numberViewsUsingDirectToCPUFiltering;
@@ -1558,6 +1565,16 @@ static CVReturn MacDisplayLinkCallback(CVDisplayLinkRef displayLink,
15581565
this->_rwlockOutputList = theRWLock;
15591566
}
15601567

1568+
void MacGPUFetchObjectDisplayLink::IncrementViewsPreferringCPUVideoProcessing()
1569+
{
1570+
atomic_inc_32(&this->_numberViewsPreferringCPUVideoProcessing);
1571+
}
1572+
1573+
void MacGPUFetchObjectDisplayLink::DecrementViewsPreferringCPUVideoProcessing()
1574+
{
1575+
atomic_dec_32(&this->_numberViewsPreferringCPUVideoProcessing);
1576+
}
1577+
15611578
void MacGPUFetchObjectDisplayLink::IncrementViewsUsingDirectToCPUFiltering()
15621579
{
15631580
atomic_inc_32(&this->_numberViewsUsingDirectToCPUFiltering);

desmume/src/frontend/cocoa/userinterface/DisplayWindowController.mm

Lines changed: 32 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2005,23 +2005,40 @@ - (BOOL) useVerticalSync
20052005

20062006
- (void) setVideoFiltersPreferGPU:(BOOL)theState
20072007
{
2008-
const BOOL oldState = ( ![[self cdsVideoOutput] willFilterOnGPU] && ![[self cdsVideoOutput] sourceDeposterize] && ([[self cdsVideoOutput] pixelScaler] != VideoFilterTypeID_None) );
2008+
const BOOL oldStateWillFilterDirectToCPU = ( ![[self cdsVideoOutput] willFilterOnGPU] && ![[self cdsVideoOutput] sourceDeposterize] && ([[self cdsVideoOutput] pixelScaler] != VideoFilterTypeID_None) );
2009+
const BOOL oldStateRequestFilterOnCPU = ![[self cdsVideoOutput] videoFiltersPreferGPU];
20092010
[[self cdsVideoOutput] setVideoFiltersPreferGPU:theState];
2010-
const BOOL newState = ( ![[self cdsVideoOutput] willFilterOnGPU] && ![[self cdsVideoOutput] sourceDeposterize] && ([[self cdsVideoOutput] pixelScaler] != VideoFilterTypeID_None) );
2011+
const BOOL newStateRequestFilterOnCPU = ![[self cdsVideoOutput] videoFiltersPreferGPU];
2012+
const BOOL newStateWillFilterDirectToCPU = ( ![[self cdsVideoOutput] willFilterOnGPU] && ![[self cdsVideoOutput] sourceDeposterize] && ([[self cdsVideoOutput] pixelScaler] != VideoFilterTypeID_None) );
20112013

2012-
if (oldState != newState)
2014+
if ( (oldStateRequestFilterOnCPU != newStateRequestFilterOnCPU) || (oldStateWillFilterDirectToCPU != newStateWillFilterDirectToCPU) )
20132015
{
20142016
DisplayWindowController *windowController = (DisplayWindowController *)[[self window] delegate];
20152017
CocoaDSCore *cdsCore = (CocoaDSCore *)[[[windowController emuControl] cdsCoreController] content];
20162018
MacGPUFetchObjectDisplayLink *dlFetchObj = (MacGPUFetchObjectDisplayLink *)[[cdsCore cdsGPU] fetchObject];
20172019

2018-
if (newState)
2020+
if (oldStateRequestFilterOnCPU != newStateRequestFilterOnCPU)
20192021
{
2020-
dlFetchObj->IncrementViewsUsingDirectToCPUFiltering();
2022+
if (newStateRequestFilterOnCPU)
2023+
{
2024+
dlFetchObj->IncrementViewsPreferringCPUVideoProcessing();
2025+
}
2026+
else
2027+
{
2028+
dlFetchObj->DecrementViewsPreferringCPUVideoProcessing();
2029+
}
20212030
}
2022-
else
2031+
2032+
if (oldStateWillFilterDirectToCPU != newStateWillFilterDirectToCPU)
20232033
{
2024-
dlFetchObj->DecrementViewsUsingDirectToCPUFiltering();
2034+
if (newStateWillFilterDirectToCPU)
2035+
{
2036+
dlFetchObj->IncrementViewsUsingDirectToCPUFiltering();
2037+
}
2038+
else
2039+
{
2040+
dlFetchObj->DecrementViewsUsingDirectToCPUFiltering();
2041+
}
20252042
}
20262043

20272044
[[self cdsVideoOutput] signalMessage:MESSAGE_RELOAD_REPROCESS_REDRAW];
@@ -2035,17 +2052,17 @@ - (BOOL) videoFiltersPreferGPU
20352052

20362053
- (void) setSourceDeposterize:(BOOL)theState
20372054
{
2038-
const BOOL oldState = ( ![[self cdsVideoOutput] willFilterOnGPU] && ![[self cdsVideoOutput] sourceDeposterize] && ([[self cdsVideoOutput] pixelScaler] != VideoFilterTypeID_None) );
2055+
const BOOL oldStateWillFilterDirectToCPU = ( ![[self cdsVideoOutput] willFilterOnGPU] && ![[self cdsVideoOutput] sourceDeposterize] && ([[self cdsVideoOutput] pixelScaler] != VideoFilterTypeID_None) );
20392056
[[self cdsVideoOutput] setSourceDeposterize:theState];
2040-
const BOOL newState = ( ![[self cdsVideoOutput] willFilterOnGPU] && ![[self cdsVideoOutput] sourceDeposterize] && ([[self cdsVideoOutput] pixelScaler] != VideoFilterTypeID_None) );
2057+
const BOOL newStateWillFilterDirectToCPU = ( ![[self cdsVideoOutput] willFilterOnGPU] && ![[self cdsVideoOutput] sourceDeposterize] && ([[self cdsVideoOutput] pixelScaler] != VideoFilterTypeID_None) );
20412058

2042-
if (oldState != newState)
2059+
if (oldStateWillFilterDirectToCPU != newStateWillFilterDirectToCPU)
20432060
{
20442061
DisplayWindowController *windowController = (DisplayWindowController *)[[self window] delegate];
20452062
CocoaDSCore *cdsCore = (CocoaDSCore *)[[[windowController emuControl] cdsCoreController] content];
20462063
MacGPUFetchObjectDisplayLink *dlFetchObj = (MacGPUFetchObjectDisplayLink *)[[cdsCore cdsGPU] fetchObject];
20472064

2048-
if (newState)
2065+
if (newStateWillFilterDirectToCPU)
20492066
{
20502067
dlFetchObj->IncrementViewsUsingDirectToCPUFiltering();
20512068
}
@@ -2076,17 +2093,17 @@ - (NSInteger) outputFilter
20762093

20772094
- (void) setPixelScaler:(NSInteger)filterID
20782095
{
2079-
const BOOL oldState = ( ![[self cdsVideoOutput] willFilterOnGPU] && ![[self cdsVideoOutput] sourceDeposterize] && ([[self cdsVideoOutput] pixelScaler] != VideoFilterTypeID_None) );
2096+
const BOOL oldStateWillFilterDirectToCPU = ( ![[self cdsVideoOutput] willFilterOnGPU] && ![[self cdsVideoOutput] sourceDeposterize] && ([[self cdsVideoOutput] pixelScaler] != VideoFilterTypeID_None) );
20802097
[[self cdsVideoOutput] setPixelScaler:filterID];
2081-
const BOOL newState = ( ![[self cdsVideoOutput] willFilterOnGPU] && ![[self cdsVideoOutput] sourceDeposterize] && ([[self cdsVideoOutput] pixelScaler] != VideoFilterTypeID_None) );
2098+
const BOOL newStateWillFilterDirectToCPU = ( ![[self cdsVideoOutput] willFilterOnGPU] && ![[self cdsVideoOutput] sourceDeposterize] && ([[self cdsVideoOutput] pixelScaler] != VideoFilterTypeID_None) );
20822099

2083-
if (oldState != newState)
2100+
if (oldStateWillFilterDirectToCPU != newStateWillFilterDirectToCPU)
20842101
{
20852102
DisplayWindowController *windowController = (DisplayWindowController *)[[self window] delegate];
20862103
CocoaDSCore *cdsCore = (CocoaDSCore *)[[[windowController emuControl] cdsCoreController] content];
20872104
MacGPUFetchObjectDisplayLink *dlFetchObj = (MacGPUFetchObjectDisplayLink *)[[cdsCore cdsGPU] fetchObject];
20882105

2089-
if (newState)
2106+
if (newStateWillFilterDirectToCPU)
20902107
{
20912108
dlFetchObj->IncrementViewsUsingDirectToCPUFiltering();
20922109
}

0 commit comments

Comments
 (0)