Skip to content

Commit 295022b

Browse files
authored
Add support for CPU-specific optimizations to Unix version. (#293)
1 parent 3d30fb3 commit 295022b

File tree

14 files changed

+306
-44
lines changed

14 files changed

+306
-44
lines changed

platform/x86/avx/dirinfo.dox

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
/// @dir
2+
/// Source code files containing AVX-specific implementations of dynamically dispatched code.
3+
///
4+
/// The files in this directory contain code that is intended for dynamic dispatch,
5+
/// and therefore may need to be compiled with different options than the rest of POV-Ray.
6+
/// For instance, to compile these files with gcc, the `-mavx` flag will be needed.

platform/x86/avx2fma3/dirinfo.dox

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
/// @dir
2+
/// Source code files containing AVX2/FMA3-specific implementations of dynamically dispatched code.
3+
///
4+
/// The files in this directory contain code that is intended for dynamic dispatch,
5+
/// and therefore may need to be compiled with different options than the rest of POV-Ray.
6+
/// For instance, to compile these files with gcc, the `-mavx2 -fma3` flags will be needed.

platform/x86/avxfma4/dirinfo.dox

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
/// @dir
2+
/// Source code files containing AVX/FMA4-specific implementations of dynamically dispatched code.
3+
///
4+
/// The files in this directory contain code that is intended for dynamic dispatch,
5+
/// and therefore may need to be compiled with different options than the rest of POV-Ray.
6+
/// For instance, to compile these files with gcc, the `-mavx -fma4` flags will be needed.

platform/x86/cpuid.cpp

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@
3636
// Unit header file must be the first file included within POV-Ray *.cpp files (pulls in config)
3737
#include "cpuid.h"
3838

39+
#include <cstring>
40+
3941
#ifdef MACHINE_INTRINSICS_H
4042
#include MACHINE_INTRINSICS_H
4143
#endif
@@ -50,7 +52,7 @@
5052
#error "Don't know how to read XCR0 register in this build environment."
5153
#endif
5254

53-
#elif defined(__linux__) // Build environment: GNU/Linux (presumably GCC or Clang)
55+
#elif defined(__GNUC__) // Build environment: GCC (or Clang imitating GCC)
5456

5557
#define CPUID cpuid
5658
static void cpuid(int *out, int in) __attribute__((noinline));
@@ -96,7 +98,7 @@ static unsigned long long getXCR0()
9698
}
9799

98100
#else // Build environment
99-
#error "Don't know how to invoke CPUID in this build environment."
101+
#error "Don't know how to invoke CPUID or read XCR0 register in this build environment."
100102
#endif // Build environment
101103

102104
// Indices into the CPUID result table corresponding to the individual CPU registers.
@@ -220,11 +222,10 @@ bool IsIntelCPU()
220222
{
221223
int info[4];
222224
char vendor[12];
223-
224225
CPUID(info, 0x0);
225-
memcpy(vendor, &info[CPUID_EBX], 4);
226-
memcpy(vendor + 4, &info[CPUID_EDX], 4);
227-
memcpy(vendor + 8, &info[CPUID_ECX], 4);
226+
std::memcpy(vendor, &info[CPUID_EBX], 4);
227+
std::memcpy(vendor + 4, &info[CPUID_EDX], 4);
228+
std::memcpy(vendor + 8, &info[CPUID_ECX], 4);
228229

229-
return strncmp("GenuineIntel", vendor, 12) == 0;
230+
return std::strncmp("GenuineIntel", vendor, 12) == 0;
230231
}

platform/x86/optimizednoise.cpp

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ namespace pov
6161
{
6262

6363
bool TryOptimizedNoise(NoiseFunction* pFnNoise, DNoiseFunction* pFnDNoise,
64-
std::string* pDetected, std::string* pImpl)
64+
std::string* pImpl, std::string* pInfo)
6565
{
6666
bool doInit = (pFnNoise || pFnDNoise);
6767
#ifdef TRY_OPTIMIZED_NOISE_AVX2FMA3
@@ -70,8 +70,8 @@ bool TryOptimizedNoise(NoiseFunction* pFnNoise, DNoiseFunction* pFnDNoise,
7070
if (doInit) AVX2FMA3NoiseInit();
7171
if (pFnNoise) *pFnNoise = AVX2FMA3Noise;
7272
if (pFnDNoise) *pFnDNoise = AVX2FMA3DNoise;
73-
if (pDetected) *pDetected = "AVX2,FMA3,Intel";
74-
if (pImpl) *pImpl = "hand-optimized for AVX2/FMA3 by Intel";
73+
if (pImpl) *pImpl = "AVX2FMA3-Intel";
74+
if (pInfo) *pInfo = "hand-optimized by Intel";
7575
return true;
7676
}
7777
#endif
@@ -80,8 +80,8 @@ bool TryOptimizedNoise(NoiseFunction* pFnNoise, DNoiseFunction* pFnDNoise,
8080
{
8181
if (pFnNoise) *pFnNoise = AVXFMA4Noise;
8282
if (pFnDNoise) *pFnDNoise = AVXFMA4DNoise;
83-
if (pDetected) *pDetected = "AVX,FMA4";
84-
if (pImpl) *pImpl = "hand-optimized for AVX/FMA4 by AMD (2017-04 update)";
83+
if (pImpl) *pImpl = "AVXFMA4-AMD.2";
84+
if (pInfo) *pInfo = "hand-optimized by AMD, 2017-04 update";
8585
return true;
8686
}
8787
#endif
@@ -91,8 +91,8 @@ bool TryOptimizedNoise(NoiseFunction* pFnNoise, DNoiseFunction* pFnDNoise,
9191
if (doInit) AVXNoiseInit();
9292
if (pFnNoise) *pFnNoise = AVXNoise;
9393
if (pFnDNoise) *pFnDNoise = AVXDNoise;
94-
if (pDetected) *pDetected = "AVX,Intel";
95-
if (pImpl) *pImpl = "hand-optimized for AVX by Intel";
94+
if (pImpl) *pImpl = "AVX-Intel";
95+
if (pInfo) *pInfo = "hand-optimized by Intel";
9696
return true;
9797
}
9898
#endif
@@ -101,8 +101,8 @@ bool TryOptimizedNoise(NoiseFunction* pFnNoise, DNoiseFunction* pFnDNoise,
101101
{
102102
if (pFnNoise) *pFnNoise = AVXPortableNoise;
103103
if (pFnDNoise) *pFnDNoise = AVXPortableDNoise;
104-
if (pDetected) *pDetected = "AVX";
105-
if (pImpl) *pImpl = "compiler-optimized for AVX";
104+
if (pImpl) *pImpl = "AVX-Portable";
105+
if (pInfo) *pInfo = "auto-optimized by compiler";
106106
return true;
107107
}
108108
#endif

source/backend/povray.cpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -483,6 +483,31 @@ void BuildInitInfo(POVMSObjectPtr msg)
483483
#endif // DONT_SHOW_IMAGE_LIB_VERSIONS
484484
if(err == kNoErr)
485485
err = POVMSObject_Set(msg, &attrlist, kPOVAttrib_ImageLibVersions);
486+
487+
if (err == kNoErr)
488+
err = POVMSAttrList_New(&attrlist);
489+
if (err == kNoErr)
490+
{
491+
#ifdef TRY_OPTIMIZED_NOISE
492+
std::string noiseGenSelection;
493+
std::string noiseGenInfo;
494+
if (TryOptimizedNoise(NULL, NULL, &noiseGenSelection, &noiseGenInfo))
495+
noiseGenInfo = "Noise generator: " + noiseGenSelection + " (" + noiseGenInfo + ")";
496+
else
497+
noiseGenInfo = "Noise generator: Portable";
498+
err = POVMSAttr_New(&attr);
499+
if (err == kNoErr)
500+
{
501+
err = POVMSAttr_Set(&attr, kPOVMSType_CString, reinterpret_cast<const void *>(noiseGenInfo.c_str()), noiseGenInfo.length() + 1);
502+
if (err == kNoErr)
503+
err = POVMSAttrList_Append(&attrlist, &attr);
504+
else
505+
err = POVMSAttr_Delete(&attr);
506+
}
507+
#endif
508+
}
509+
if (err == kNoErr)
510+
err = POVMSObject_Set(msg, &attrlist, kPOVAttrib_Optimizations);
486511
}
487512

488513
void ExtractLibraryVersion(const char *str, char *buffer)

source/core/material/texture.h

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,8 @@ typedef void(*DNoiseFunction) (Vector3d& result, const Vector3d& EPoint);
178178
///
179179
/// This function shall select one of multiple implementations of the noise generator functions
180180
/// (the portable default implementations being @ref PortableNoise() and @ref PortableDNoise(),
181-
/// respectively), depending on the runtime environment (typically CPU features).
181+
/// respectively), depending on the runtime environment (typically CPU features), an provide
182+
/// meta-information about the implementation selected.
182183
///
183184
/// This function shall also perform any additional initialization required by the selected noise
184185
/// generator.
@@ -200,14 +201,15 @@ typedef void(*DNoiseFunction) (Vector3d& result, const Vector3d& EPoint);
200201
///
201202
/// @param[out] pFnNoise Selected implementation of @ref PortableNoise().
202203
/// @param[out] pFnDNoise Selected implementation of @ref PortableDNoise().
203-
/// @param[out] pDetected String identifying the machine features detected (e.g. "AVX,non-Intel")
204-
/// on which the selection was based.
205204
/// @param[out] pImpl String unambiguously identifying the implementation selected.
205+
/// The recommended format is `CPUFEATURES-AUTHOR`[`.REVISION`].
206+
/// @param[out] pInfo String providing additional noteworthy information about the
207+
/// implementation selected.
206208
/// @return `true` if an alternative implementation was chosen and the parameters
207209
/// set accordingly, `false` otherwise.
208210
///
209211
bool TryOptimizedNoise(NoiseFunction* pFnNoise, DNoiseFunction* pFnDNoise,
210-
std::string* pDetected = NULL, std::string* pImpl = NULL);
212+
std::string* pImpl = NULL, std::string* pInfo = NULL);
211213

212214
extern NoiseFunction Noise;
213215
extern DNoiseFunction DNoise;

source/frontend/renderfrontend.cpp

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -849,7 +849,6 @@ void InitInfo(POVMS_Object& cppmsg, TextStreamBuffer *tsb)
849849
tsb->printf("\n");
850850
tsb->printf("Other contributors are listed in the documentation.\n");
851851

852-
tsb->printf("\n");
853852
if(POVMSObject_Get(msg, &attrlist, kPOVAttrib_ImageLibVersions) == kNoErr)
854853
{
855854
cnt = 0;
@@ -858,6 +857,7 @@ void InitInfo(POVMS_Object& cppmsg, TextStreamBuffer *tsb)
858857
{
859858
if(cnt > 0)
860859
{
860+
tsb->printf("\n");
861861
tsb->printf("Support libraries used by POV-Ray:\n");
862862

863863
for(i = 1; i <= cnt; i++)
@@ -878,6 +878,35 @@ void InitInfo(POVMS_Object& cppmsg, TextStreamBuffer *tsb)
878878
(void)POVMSAttrList_Delete(&attrlist);
879879
}
880880

881+
if (POVMSObject_Get(msg, &attrlist, kPOVAttrib_Optimizations) == kNoErr)
882+
{
883+
cnt = 0;
884+
885+
if (POVMSAttrList_Count(&attrlist, &cnt) == kNoErr)
886+
{
887+
if (cnt > 0)
888+
{
889+
tsb->printf("\n");
890+
tsb->printf("Dynamic optimizations active:\n");
891+
892+
for (i = 1; i <= cnt; i++)
893+
{
894+
if (POVMSAttrList_GetNth(&attrlist, i, &item) == kNoErr)
895+
{
896+
l = 1023;
897+
charbuf[0] = 0;
898+
if (POVMSAttr_Get(&item, kPOVMSType_CString, charbuf, &l) == kNoErr)
899+
tsb->printf(" %s\n", charbuf);
900+
901+
(void)POVMSAttr_Delete(&item);
902+
}
903+
}
904+
}
905+
}
906+
907+
(void)POVMSAttrList_Delete(&attrlist);
908+
}
909+
881910
POVMSObject_Delete(msg);
882911
}
883912

source/povms/povmsid.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,7 @@ enum
282282
kPOVAttrib_AssistingDevs = 'Asst',
283283
kPOVAttrib_ContributingDevs = 'Cont',
284284
kPOVAttrib_ImageLibVersions = 'ILVe',
285+
kPOVAttrib_Optimizations = 'Opti',
285286

286287
// options handled by frontend
287288
kPOVAttrib_TestAbort = 'TstA', // currently not supported by code
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
# ===========================================================================
2+
# http://www.gnu.org/software/autoconf-archive/ax_check_compile_flag.html
3+
# ===========================================================================
4+
#
5+
# SYNOPSIS
6+
#
7+
# AX_CHECK_COMPILE_FLAG(FLAG, [ACTION-SUCCESS], [ACTION-FAILURE], [EXTRA-FLAGS], [INPUT])
8+
#
9+
# DESCRIPTION
10+
#
11+
# Check whether the given FLAG works with the current language's compiler
12+
# or gives an error. (Warnings, however, are ignored)
13+
#
14+
# ACTION-SUCCESS/ACTION-FAILURE are shell commands to execute on
15+
# success/failure.
16+
#
17+
# If EXTRA-FLAGS is defined, it is added to the current language's default
18+
# flags (e.g. CFLAGS) when the check is done. The check is thus made with
19+
# the flags: "CFLAGS EXTRA-FLAGS FLAG". This can for example be used to
20+
# force the compiler to issue an error when a bad flag is given.
21+
#
22+
# INPUT gives an alternative input source to AC_COMPILE_IFELSE.
23+
#
24+
# NOTE: Implementation based on AX_CFLAGS_GCC_OPTION. Please keep this
25+
# macro in sync with AX_CHECK_{PREPROC,LINK}_FLAG.
26+
#
27+
# LICENSE
28+
#
29+
# Copyright (c) 2008 Guido U. Draheim <[email protected]>
30+
# Copyright (c) 2011 Maarten Bosmans <[email protected]>
31+
#
32+
# This program is free software: you can redistribute it and/or modify it
33+
# under the terms of the GNU General Public License as published by the
34+
# Free Software Foundation, either version 3 of the License, or (at your
35+
# option) any later version.
36+
#
37+
# This program is distributed in the hope that it will be useful, but
38+
# WITHOUT ANY WARRANTY; without even the implied warranty of
39+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
40+
# Public License for more details.
41+
#
42+
# You should have received a copy of the GNU General Public License along
43+
# with this program. If not, see <http://www.gnu.org/licenses/>.
44+
#
45+
# As a special exception, the respective Autoconf Macro's copyright owner
46+
# gives unlimited permission to copy, distribute and modify the configure
47+
# scripts that are the output of Autoconf when processing the Macro. You
48+
# need not follow the terms of the GNU General Public License when using
49+
# or distributing such scripts, even though portions of the text of the
50+
# Macro appear in them. The GNU General Public License (GPL) does govern
51+
# all other use of the material that constitutes the Autoconf Macro.
52+
#
53+
# This special exception to the GPL applies to versions of the Autoconf
54+
# Macro released by the Autoconf Archive. When you make and distribute a
55+
# modified version of the Autoconf Macro, you may extend this special
56+
# exception to the GPL to apply to your modified version as well.
57+
58+
#serial 4
59+
60+
AC_DEFUN([AX_CHECK_COMPILE_FLAG],
61+
[AC_PREREQ(2.64)dnl for _AC_LANG_PREFIX and AS_VAR_IF
62+
AS_VAR_PUSHDEF([CACHEVAR],[ax_cv_check_[]_AC_LANG_ABBREV[]flags_$4_$1])dnl
63+
AC_CACHE_CHECK([whether _AC_LANG compiler accepts $1], CACHEVAR, [
64+
ax_check_save_flags=$[]_AC_LANG_PREFIX[]FLAGS
65+
_AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $4 $1"
66+
AC_COMPILE_IFELSE([m4_default([$5],[AC_LANG_PROGRAM()])],
67+
[AS_VAR_SET(CACHEVAR,[yes])],
68+
[AS_VAR_SET(CACHEVAR,[no])])
69+
_AC_LANG_PREFIX[]FLAGS=$ax_check_save_flags])
70+
AS_VAR_IF(CACHEVAR,yes,
71+
[m4_default([$2], :)],
72+
[m4_default([$3], :)])
73+
AS_VAR_POPDEF([CACHEVAR])dnl
74+
])dnl AX_CHECK_COMPILE_FLAGS

0 commit comments

Comments
 (0)