Skip to content

Commit eff4626

Browse files
#2590 Radeon mac optimization pass (#3277)
- Skip updating of reflection probes that are not the default probe when probe coverage is set to "None" - enable RenderAppleUseMultGL and disable occlusion culling on Macs with AMD GPUs - Reduce the number of texture decode threads on Macs with intel cpus. - Move texture deletion to LLImageGL::updateClass and prevent textures from staying resident in vram longer than 3 frames - Disable SSAO by default on Macs with intel CPUs
1 parent 5a62957 commit eff4626

File tree

10 files changed

+112
-55
lines changed

10 files changed

+112
-55
lines changed

indra/llcommon/llcommon.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ void* ll_tracy_new(size_t size)
5454
{
5555
throw std::bad_alloc();
5656
}
57-
TracyAlloc(ptr, size);
57+
LL_PROFILE_ALLOC(ptr, size);
5858
return ptr;
5959
}
6060

@@ -70,7 +70,7 @@ void* operator new[](std::size_t count)
7070

7171
void ll_tracy_delete(void* ptr)
7272
{
73-
TracyFree(ptr);
73+
LL_PROFILE_FREE(ptr);
7474
if (gProfilerEnabled)
7575
{
7676
//LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY;
@@ -102,13 +102,13 @@ void operator delete[](void* ptr) noexcept
102102
void *tracy_aligned_malloc(size_t size, size_t alignment)
103103
{
104104
auto ptr = ll_aligned_malloc_fallback(size, alignment);
105-
if (ptr) TracyAlloc(ptr, size);
105+
if (ptr) LL_PROFILE_ALLOC(ptr, size);
106106
return ptr;
107107
}
108108

109109
void tracy_aligned_free(void *memblock)
110110
{
111-
TracyFree(memblock);
111+
LL_PROFILE_FREE(memblock);
112112
ll_aligned_free_fallback(memblock);
113113
}
114114

indra/llcommon/llmemory.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,7 @@ inline void* ll_aligned_realloc_16(void* ptr, size_t size, size_t old_size) // r
222222
ll_aligned_free_16(ptr);
223223
}
224224
#endif
225-
LL_PROFILE_ALLOC(ptr, size);
225+
LL_PROFILE_ALLOC(ret, size);
226226
return ret;
227227
}
228228

indra/llrender/llimagegl.cpp

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1052,7 +1052,7 @@ U32 type_width_from_pixtype(U32 pixtype)
10521052
bool should_stagger_image_set(bool compressed)
10531053
{
10541054
#if LL_DARWIN
1055-
return false;
1055+
return !compressed && on_main_thread() && gGLManager.mIsAMD;
10561056
#else
10571057
// glTexSubImage2D doesn't work with compressed textures on select tested Nvidia GPUs on Windows 10 -Cosmic,2023-03-08
10581058
// Setting media textures off-thread seems faster when not using sub_image_lines (Nvidia/Windows 10) -Cosmic,2023-03-31
@@ -1270,37 +1270,37 @@ void LLImageGL::generateTextures(S32 numTextures, U32 *textures)
12701270
}
12711271
}
12721272

1273+
constexpr int DELETE_DELAY = 3; // number of frames to wait before deleting textures
1274+
static std::vector<U32> sFreeList[DELETE_DELAY+1];
1275+
12731276
// static
12741277
void LLImageGL::updateClass()
12751278
{
12761279
sFrameCount++;
1280+
1281+
// wait a few frames before actually deleting the textures to avoid
1282+
// synchronization issues with the GPU
1283+
U32 idx = (sFrameCount+DELETE_DELAY) % (DELETE_DELAY+1);
1284+
1285+
if (!sFreeList[idx].empty())
1286+
{
1287+
free_tex_images((GLsizei) sFreeList[idx].size(), sFreeList[idx].data());
1288+
glDeleteTextures((GLsizei)sFreeList[idx].size(), sFreeList[idx].data());
1289+
sFreeList[idx].resize(0);
1290+
}
12771291
}
12781292

12791293
// static
12801294
void LLImageGL::deleteTextures(S32 numTextures, const U32 *textures)
12811295
{
1282-
// wait a few frames before actually deleting the textures to avoid
1283-
// synchronization issues with the GPU
1284-
static std::vector<U32> sFreeList[4];
1285-
12861296
if (gGLManager.mInited)
12871297
{
12881298
LL_PROFILE_ZONE_SCOPED_CATEGORY_TEXTURE;
1289-
U32 idx = sFrameCount % 4;
1290-
1299+
U32 idx = sFrameCount % (DELETE_DELAY+1);
12911300
for (S32 i = 0; i < numTextures; ++i)
12921301
{
12931302
sFreeList[idx].push_back(textures[i]);
12941303
}
1295-
1296-
idx = (sFrameCount + 3) % 4;
1297-
1298-
if (!sFreeList[idx].empty())
1299-
{
1300-
free_tex_images((GLsizei) sFreeList[idx].size(), sFreeList[idx].data());
1301-
glDeleteTextures((GLsizei)sFreeList[idx].size(), sFreeList[idx].data());
1302-
sFreeList[idx].resize(0);
1303-
}
13041304
}
13051305
}
13061306

indra/llwindow/llwindowmacosx.cpp

Lines changed: 36 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,41 @@ namespace
6868

6969
bool LLWindowMacOSX::sUseMultGL = false;
7070

71+
//static
72+
void LLWindowMacOSX::setUseMultGL(bool use_mult_gl)
73+
{
74+
bool was_enabled = sUseMultGL;
75+
76+
sUseMultGL = use_mult_gl;
77+
78+
if (gGLManager.mInited)
79+
{
80+
CGLContextObj ctx = CGLGetCurrentContext();
81+
//enable multi-threaded OpenGL (whether or not sUseMultGL actually changed)
82+
if (sUseMultGL)
83+
{
84+
CGLError cgl_err;
85+
86+
cgl_err = CGLEnable( ctx, kCGLCEMPEngine);
87+
88+
if (cgl_err != kCGLNoError )
89+
{
90+
LL_INFOS("GLInit") << "Multi-threaded OpenGL not available." << LL_ENDL;
91+
sUseMultGL = false;
92+
}
93+
else
94+
{
95+
LL_INFOS("GLInit") << "Multi-threaded OpenGL enabled." << LL_ENDL;
96+
}
97+
}
98+
else if (was_enabled)
99+
{
100+
CGLDisable( ctx, kCGLCEMPEngine);
101+
LL_INFOS("GLInit") << "Multi-threaded OpenGL disabled." << LL_ENDL;
102+
}
103+
}
104+
}
105+
71106
// Cross-platform bits:
72107

73108
bool check_for_card(const char* RENDERER, const char* bad_card)
@@ -704,23 +739,8 @@ bool LLWindowMacOSX::createContext(int x, int y, int width, int height, int bits
704739
// Disable vertical sync for swap
705740
toggleVSync(enable_vsync);
706741

707-
//enable multi-threaded OpenGL
708-
if (sUseMultGL)
709-
{
710-
CGLError cgl_err;
711-
CGLContextObj ctx = CGLGetCurrentContext();
712-
713-
cgl_err = CGLEnable( ctx, kCGLCEMPEngine);
742+
setUseMultGL(sUseMultGL);
714743

715-
if (cgl_err != kCGLNoError )
716-
{
717-
LL_INFOS("GLInit") << "Multi-threaded OpenGL not available." << LL_ENDL;
718-
}
719-
else
720-
{
721-
LL_INFOS("GLInit") << "Multi-threaded OpenGL enabled." << LL_ENDL;
722-
}
723-
}
724744
makeFirstResponder(mWindow, mGLView);
725745

726746
return true;

indra/llwindow/llwindowmacosx.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,9 @@ class LLWindowMacOSX : public LLWindow
147147

148148
void toggleVSync(bool enable_vsync) override;
149149

150+
// enable or disable multithreaded GL
151+
static void setUseMultGL(bool use_mult_gl);
152+
150153
protected:
151154
LLWindowMacOSX(LLWindowCallbacks* callbacks,
152155
const std::string& title, const std::string& name, int x, int y, int width, int height, U32 flags,

indra/newview/featuretable_mac.txt

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
version 68
1+
version 71
22
// The version number above should be incremented IF AND ONLY IF some
33
// change has been made that is sufficiently important to justify
44
// resetting the graphics preferences of all users to the recommended
@@ -67,9 +67,9 @@ RenderFSAAType 1 2
6767
RenderFSAASamples 1 3
6868
RenderMaxTextureIndex 1 16
6969
RenderGLContextCoreProfile 1 1
70-
RenderGLMultiThreadedTextures 1 0
71-
RenderGLMultiThreadedMedia 1 0
72-
RenderAppleUseMultGL 1 0
70+
RenderGLMultiThreadedTextures 1 1
71+
RenderGLMultiThreadedMedia 1 1
72+
RenderAppleUseMultGL 1 1
7373
RenderReflectionsEnabled 1 1
7474
RenderReflectionProbeDetail 1 2
7575
RenderScreenSpaceReflections 1 1
@@ -405,20 +405,30 @@ list TexUnit16orLess
405405
RenderTerrainPBRDetail 1 -1
406406

407407
list AMD
408-
RenderDeferredSSAO 1 0
408+
UseOcclusion 1 0
409+
RenderGLMultiThreadedTextures 1 0
410+
411+
list NVIDIA
412+
RenderGLMultiThreadedTextures 1 0
413+
RenderGLMultiThreadedMedia 1 0
414+
RenderAppleUseMultGL 1 0
409415

410416
list Intel
411417
RenderAnisotropic 1 0
412418
RenderFSAASamples 1 0
419+
RenderGLMultiThreadedTextures 1 0
420+
RenderGLMultiThreadedMedia 1 0
421+
RenderAppleUseMultGL 1 0
413422

414423
// AppleGPU and NonAppleGPU can be thought of as Apple silicon vs Intel Mac
415424
list AppleGPU
416425
RenderGLMultiThreadedMedia 1 0
417426
RenderAppleUseMultGL 1 0
427+
RenderGLMultiThreadedTextures 1 0
428+
RenderGLMultiThreadedMedia 1 0
418429

419430
list NonAppleGPU
420-
RenderGLMultiThreadedMedia 1 0
421-
RenderAppleUseMultGL 1 0
431+
RenderDeferredSSAO 1 0
422432

423433
list GL3
424434
RenderFSAASamples 0 0

indra/newview/llappviewer.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2171,7 +2171,12 @@ bool LLAppViewer::initThreads()
21712171

21722172
// get the number of concurrent threads that can run
21732173
S32 cores = std::thread::hardware_concurrency();
2174-
2174+
#if LL_DARWIN
2175+
if (!gGLManager.mIsApple)
2176+
{
2177+
cores /= 2;
2178+
}
2179+
#endif
21752180
U32 max_cores = gSavedSettings.getU32("EmulateCoreCount");
21762181
if (max_cores != 0)
21772182
{

indra/newview/llreflectionmapmanager.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -404,6 +404,13 @@ void LLReflectionMapManager::update()
404404
{
405405
closestDynamic = probe;
406406
}
407+
408+
if (sLevel == 0)
409+
{
410+
// only update default probe when coverage is set to none
411+
llassert(probe == mDefaultProbe);
412+
break;
413+
}
407414
}
408415

409416
if (realtime && closestDynamic != nullptr)
@@ -713,6 +720,7 @@ void LLReflectionMapManager::updateProbeFace(LLReflectionMap* probe, U32 face)
713720
}
714721
else
715722
{
723+
llassert(gSavedSettings.getS32("RenderReflectionProbeLevel") > 0); // should never update a probe that's not the default probe if reflection coverage is none
716724
probe->update(mRenderTarget.getWidth(), face);
717725
}
718726

indra/newview/llviewercontrol.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,10 @@
7777
#include "llstartup.h"
7878
#include "llperfstats.h"
7979

80+
#if LL_DARWIN
81+
#include "llwindowmacosx.h"
82+
#endif
83+
8084
// Third party library includes
8185
#include <boost/algorithm/string.hpp>
8286

@@ -453,6 +457,17 @@ static bool handleReflectionProbeDetailChanged(const LLSD& newvalue)
453457
return true;
454458
}
455459

460+
#if LL_DARWIN
461+
static bool handleAppleUseMultGLChanged(const LLSD& newvalue)
462+
{
463+
if (gGLManager.mInited)
464+
{
465+
LLWindowMacOSX::setUseMultGL(newvalue.asBoolean());
466+
}
467+
return true;
468+
}
469+
#endif
470+
456471
static bool handleHeroProbeResolutionChanged(const LLSD &newvalue)
457472
{
458473
if (gPipeline.isInit())
@@ -820,6 +835,9 @@ void settings_setup_listeners()
820835
setting_setup_signal_listener(gSavedSettings, "RenderReflectionProbeLevel", handleReflectionProbeDetailChanged);
821836
setting_setup_signal_listener(gSavedSettings, "RenderReflectionProbeDetail", handleReflectionProbeDetailChanged);
822837
setting_setup_signal_listener(gSavedSettings, "RenderReflectionsEnabled", handleReflectionProbeDetailChanged);
838+
#if LL_DARWIN
839+
setting_setup_signal_listener(gSavedSettings, "RenderAppleUseMultGL", handleAppleUseMultGLChanged);
840+
#endif
823841
setting_setup_signal_listener(gSavedSettings, "RenderScreenSpaceReflections", handleReflectionProbeDetailChanged);
824842
setting_setup_signal_listener(gSavedSettings, "RenderMirrors", handleReflectionProbeDetailChanged);
825843
setting_setup_signal_listener(gSavedSettings, "RenderHeroProbeResolution", handleHeroProbeResolutionChanged);

indra/newview/llviewerwindow.cpp

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5391,6 +5391,8 @@ bool LLViewerWindow::cubeSnapshot(const LLVector3& origin, LLCubeMapArray* cubea
53915391
camera->setUserClipPlane(clipPlane);
53925392
}
53935393

5394+
gPipeline.pushRenderTypeMask();
5395+
53945396
glClear(GL_DEPTH_BUFFER_BIT | GL_COLOR_BUFFER_BIT); // stencil buffer is deprecated | GL_STENCIL_BUFFER_BIT);
53955397

53965398
U32 dynamic_render_types[] = {
@@ -5479,16 +5481,7 @@ bool LLViewerWindow::cubeSnapshot(const LLVector3& origin, LLCubeMapArray* cubea
54795481
}
54805482
}
54815483

5482-
if (!dynamic_render)
5483-
{
5484-
for (int i = 0; i < dynamic_render_type_count; ++i)
5485-
{
5486-
if (prev_dynamic_render_type[i])
5487-
{
5488-
gPipeline.toggleRenderType(dynamic_render_types[i]);
5489-
}
5490-
}
5491-
}
5484+
gPipeline.popRenderTypeMask();
54925485

54935486
if (hide_hud)
54945487
{

0 commit comments

Comments
 (0)