Skip to content

Commit dfbe4f2

Browse files
Merge pull request #152 from Devsh-Graphics-Programming/criss_master
VT and material compiler/mitsuba loader fixes (normal map support)
2 parents abb39cc + fdae3d1 commit dfbe4f2

22 files changed

+806
-198
lines changed

.gitmodules

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,3 +99,9 @@
9999
[submodule "3rdparty/volk"]
100100
path = 3rdparty/volk
101101
url = https://github.com/zeux/volk.git
102+
[submodule "3rdparty/simdjson"]
103+
path = 3rdparty/simdjson
104+
url = https://github.com/simdjson/simdjson
105+
[submodule "3rdparty/glTFSampleModels"]
106+
path = 3rdparty/glTFSampleModels
107+
url = https://github.com/KhronosGroup/glTF-Sample-Models

examples_tests/20.Megatexture/main.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -149,17 +149,18 @@ struct commit_t
149149
};
150150
STextureData getTextureData(core::vector<commit_t>& _out_commits, const asset::ICPUImage* _img, asset::ICPUVirtualTexture* _vt, asset::ISampler::E_TEXTURE_CLAMP _uwrap, asset::ISampler::E_TEXTURE_CLAMP _vwrap, asset::ISampler::E_TEXTURE_BORDER_COLOR _borderColor)
151151
{
152-
const auto& extent = _img->getCreationParameters().extent;
152+
auto img = _vt->createUpscaledImage(_img);
153+
const auto& extent = img->getCreationParameters().extent;
153154

154-
auto imgAndOrigSz = asset::ICPUVirtualTexture::createPoTPaddedSquareImageWithMipLevels(_img, _uwrap, _vwrap, _borderColor);
155+
auto imgAndOrigSz = asset::ICPUVirtualTexture::createPoTPaddedSquareImageWithMipLevels(img.get(), _uwrap, _vwrap, _borderColor);
155156

156157
asset::IImage::SSubresourceRange subres;
157158
subres.baseMipLevel = 0u;
158159
subres.levelCount = core::findLSB(core::roundDownToPoT<uint32_t>(std::max(extent.width, extent.height))) + 1;
159160
subres.baseArrayLayer = 0u;
160161
subres.layerCount = 1u;
161162

162-
auto addr = _vt->alloc(_img->getCreationParameters().format, imgAndOrigSz.second, subres, _uwrap, _vwrap);
163+
auto addr = _vt->alloc(img->getCreationParameters().format, imgAndOrigSz.second, subres, _uwrap, _vwrap);
163164
commit_t cm{ addr, std::move(imgAndOrigSz.first), subres, _uwrap, _vwrap, _borderColor };
164165

165166
_out_commits.push_back(cm);

examples_tests/22.RaytracedAO/Renderer.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include "nbl/ext/ScreenShot/ScreenShot.h"
66

77
#include "../source/Nabla/COpenCLHandler.h"
8+
#include "COpenGLDriver.h"
89

910

1011
#ifndef _NBL_BUILD_OPTIX_

examples_tests/22.RaytracedAO/main.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ int main()
115115

116116
// TODO: Move into renderer?
117117
bool rightHandedCamera = true;
118+
float moveSpeed = core::nan<float>();
118119
auto camera = smgr->addCameraSceneNode(nullptr);
119120
auto isOkSensorType = [](const ext::MitsubaLoader::CElementSensor& sensor) -> bool {
120121
return sensor.type == ext::MitsubaLoader::CElementSensor::Type::PERSPECTIVE || sensor.type == ext::MitsubaLoader::CElementSensor::Type::THINLENS;
@@ -204,6 +205,7 @@ int main()
204205
camera->setProjectionMatrix(core::matrix4SIMD::buildProjectionMatrixPerspectiveFovRH(core::radians(realFoVDegrees), aspectRatio, nearClip, persp->farClip));
205206
else
206207
camera->setProjectionMatrix(core::matrix4SIMD::buildProjectionMatrixPerspectiveFovLH(core::radians(realFoVDegrees), aspectRatio, nearClip, persp->farClip));
208+
moveSpeed = persp->moveSpeed;
207209
}
208210
else
209211
{
@@ -271,7 +273,9 @@ int main()
271273
core::vector3df_SIMD ptu[] = {core::vectorSIMDf().set(camera->getPosition()),camera->getTarget(),camera->getUpVector()};
272274
auto proj = camera->getProjectionMatrix();
273275

274-
camera = smgr->addCameraSceneNodeFPS(nullptr, 80.f, core::min(extent.X, extent.Y, extent.Z) * 0.0001f);
276+
if (core::isnan(moveSpeed))
277+
moveSpeed = core::min(extent.X,extent.Y,extent.Z)*0.0001f;
278+
camera = smgr->addCameraSceneNodeFPS(nullptr,80.f,moveSpeed);
275279
camera->setPosition(ptu[0].getAsVector3df());
276280
camera->setTarget(ptu[1].getAsVector3df());
277281
camera->setUpVector(ptu[2]);

include/nbl/asset/filters/CBlitImageFilter.h

Lines changed: 22 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -289,13 +289,10 @@ class CBlitImageFilter : public CImageFilter<CBlitImageFilter<Normalize,Clamp,Sw
289289

290290
// filtering and alpha handling happens separately for every layer, so save on scratch memory size
291291
const auto inImageType = inParams.type;
292-
const auto window_last = [&kernelX,&kernelY,&kernelZ]() -> core::vectorSIMDi32
293-
{
294-
return core::vectorSIMDi32(kernelX.getWindowSize().x-1,kernelY.getWindowSize().y-1,kernelZ.getWindowSize().z-1,0);
295-
}();
292+
const auto window_end = getWindowEnd(inImageType,kernelX,kernelY,kernelZ);
296293
const core::vectorSIMDi32 intermediateExtent[3] = {
297-
core::vectorSIMDi32(outExtent.width,inExtent.height+window_last[1],inExtent.depth+window_last[2]),
298-
core::vectorSIMDi32(outExtent.width,outExtent.height,inExtent.depth+window_last[2]),
294+
core::vectorSIMDi32(outExtent.width,inExtent.height+window_end[1],inExtent.depth+window_end[2]),
295+
core::vectorSIMDi32(outExtent.width,outExtent.height,inExtent.depth+window_end[2]),
299296
core::vectorSIMDi32(outExtent.width,outExtent.height,outExtent.depth)
300297
};
301298
const core::vectorSIMDi32 intermediateLastCoord[3] = {
@@ -465,7 +462,7 @@ class CBlitImageFilter : public CImageFilter<CBlitImageFilter<Normalize,Clamp,Sw
465462
lineBuffer = intermediateStorage[axis-1]+core::dot(static_cast<const core::vectorSIMDi32&>(intermediateStrides[axis-1]),localTexCoord)[0];
466463
else
467464
{
468-
const auto windowEnd = inExtent.width+window_last.x;
465+
const auto windowEnd = inExtent.width+window_end.x;
469466
decode_offset = alloc_decode_scratch();
470467
lineBuffer = intermediateStorage[1]+decode_offset*MaxChannels*windowEnd;
471468
for (auto& i=localTexCoord.x; i<windowEnd; i++)
@@ -566,6 +563,21 @@ class CBlitImageFilter : public CImageFilter<CBlitImageFilter<Normalize,Clamp,Sw
566563

567564
private:
568565
static inline constexpr uint32_t VectorizationBoundSTL = /*AVX2*/16u;
566+
//
567+
static inline core::vectorSIMDi32 getWindowEnd(const IImage::E_TYPE inImageType,
568+
const CScaledImageFilterKernel<KernelX>& kernelX,
569+
const CScaledImageFilterKernel<KernelY>& kernelY,
570+
const CScaledImageFilterKernel<KernelZ>& kernelZ
571+
)
572+
{
573+
// TODO: investigate properly if its supposed be `size` or `size-1` (polyphase kinda shows need for `size`)
574+
core::vectorSIMDi32 last(kernelX.getWindowSize().x,0,0,0);
575+
if (inImageType>=IImage::ET_2D)
576+
last.y = kernelY.getWindowSize().x;
577+
if (inImageType>=IImage::ET_3D)
578+
last.z = kernelZ.getWindowSize().x;
579+
return last;
580+
}
569581
// the blit filter will filter one axis at a time, hence necessitating "ping ponging" between two scratch buffers
570582
static inline uint32_t getScratchOffset(const state_type* state, bool secondPong)
571583
{
@@ -574,17 +586,14 @@ class CBlitImageFilter : public CImageFilter<CBlitImageFilter<Normalize,Clamp,Sw
574586
const auto kernelY = state->contructScaledKernel(state->kernelY);
575587
const auto kernelZ = state->contructScaledKernel(state->kernelZ);
576588

577-
const auto window_last = [&kernelX,&kernelY,&kernelZ]() -> core::vectorSIMDi32
578-
{
579-
return core::vectorSIMDi32(kernelX.getWindowSize().x-1,kernelY.getWindowSize().y-1,kernelZ.getWindowSize().z-1,0);
580-
}();
589+
const auto window_end = getWindowEnd(state->inImage->getCreationParameters().type,kernelX,kernelY,kernelZ);
581590
// TODO: account for the size needed for coverage adjustment
582591
// the first pass will be along X, so new temporary image will have the width of the output extent, but the height and depth will need to be padded
583592
// but the last pass will be along Z and the new temporary image will have the exact dimensions of `outExtent` which is why there is a `core::max`
584-
auto texelCount = state->outExtent.width*core::max<uint32_t>((state->inExtent.height+window_last[1])*(state->inExtent.depth+window_last[2]),state->outExtent.height*state->outExtent.depth);
593+
auto texelCount = state->outExtent.width*core::max<uint32_t>((state->inExtent.height+window_end[1])*(state->inExtent.depth+window_end[2]),state->outExtent.height*state->outExtent.depth);
585594
// the second pass will result in an image that has the width and height equal to `outExtent`
586595
if (secondPong)
587-
texelCount += core::max<uint32_t>(state->outExtent.width*state->outExtent.height*(state->inExtent.depth+window_last[2]),(state->inExtent.width+window_last[0])*std::thread::hardware_concurrency()*VectorizationBoundSTL);
596+
texelCount += core::max<uint32_t>(state->outExtent.width*state->outExtent.height*(state->inExtent.depth+window_end[2]),(state->inExtent.width+window_end[0])*std::thread::hardware_concurrency()*VectorizationBoundSTL);
588597
// obviously we have multiple channels and each channel has a certain type for arithmetic
589598
return texelCount*MaxChannels*sizeof(value_type);
590599
}

0 commit comments

Comments
 (0)