Devsh-Graphics-Programming
diff --git a/‎.gitmodules
Lines changed: 6 additions & 0 deletions b/‎.gitmodules
Lines changed: 6 additions & 0 deletions
diff --git a/‎examples_tests/20.Megatexture/main.cpp
Lines changed: 4 additions & 3 deletions b/‎examples_tests/20.Megatexture/main.cpp
Lines changed: 4 additions & 3 deletions
diff --git a/‎examples_tests/22.RaytracedAO/Renderer.cpp
Lines changed: 1 addition & 0 deletions b/‎examples_tests/22.RaytracedAO/Renderer.cpp
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples_tests/22.RaytracedAO/main.cpp
Lines changed: 5 additions & 1 deletion b/‎examples_tests/22.RaytracedAO/main.cpp
Lines changed: 5 additions & 1 deletion
diff --git a/‎include/nbl/asset/filters/CBlitImageFilter.h
Lines changed: 22 additions & 13 deletions b/‎include/nbl/asset/filters/CBlitImageFilter.h
Lines changed: 22 additions & 13 deletions
@@ -99,3 +99,9 @@
 [submodule "3rdparty/volk"]
 	path = 3rdparty/volk
 	url = https://github.com/zeux/volk.git
+[submodule "3rdparty/simdjson"]
+	path = 3rdparty/simdjson
+	url = https://github.com/simdjson/simdjson
+[submodule "3rdparty/glTFSampleModels"]
+	path = 3rdparty/glTFSampleModels
+	url = https://github.com/KhronosGroup/glTF-Sample-Models
@@ -149,17 +149,18 @@ struct commit_t
 };
 STextureData getTextureData(core::vector<commit_t>& _out_commits, const asset::ICPUImage* _img, asset::ICPUVirtualTexture* _vt, asset::ISampler::E_TEXTURE_CLAMP _uwrap, asset::ISampler::E_TEXTURE_CLAMP _vwrap, asset::ISampler::E_TEXTURE_BORDER_COLOR _borderColor)
 {
-    const auto& extent = _img->getCreationParameters().extent;
+    auto img = _vt->createUpscaledImage(_img);
+    const auto& extent = img->getCreationParameters().extent;
 
-    auto imgAndOrigSz = asset::ICPUVirtualTexture::createPoTPaddedSquareImageWithMipLevels(_img, _uwrap, _vwrap, _borderColor);
+    auto imgAndOrigSz = asset::ICPUVirtualTexture::createPoTPaddedSquareImageWithMipLevels(img.get(), _uwrap, _vwrap, _borderColor);
 
     asset::IImage::SSubresourceRange subres;
     subres.baseMipLevel = 0u;
     subres.levelCount = core::findLSB(core::roundDownToPoT<uint32_t>(std::max(extent.width, extent.height))) + 1;
     subres.baseArrayLayer = 0u;
     subres.layerCount = 1u;
 
-    auto addr = _vt->alloc(_img->getCreationParameters().format, imgAndOrigSz.second, subres, _uwrap, _vwrap);
+    auto addr = _vt->alloc(img->getCreationParameters().format, imgAndOrigSz.second, subres, _uwrap, _vwrap);
     commit_t cm{ addr, std::move(imgAndOrigSz.first), subres, _uwrap, _vwrap, _borderColor };
 
     _out_commits.push_back(cm);
 
@@ -5,6 +5,7 @@
 #include "nbl/ext/ScreenShot/ScreenShot.h"
 
 #include "../source/Nabla/COpenCLHandler.h"
+#include "COpenGLDriver.h"
 
 
 #ifndef _NBL_BUILD_OPTIX_
 
@@ -115,6 +115,7 @@ int main()
 
 	// TODO: Move into renderer?
 	bool rightHandedCamera = true;
+	float moveSpeed = core::nan<float>();
 	auto camera = smgr->addCameraSceneNode(nullptr);
 	auto isOkSensorType = [](const ext::MitsubaLoader::CElementSensor& sensor) -> bool {
 		return sensor.type == ext::MitsubaLoader::CElementSensor::Type::PERSPECTIVE || sensor.type == ext::MitsubaLoader::CElementSensor::Type::THINLENS;
@@ -204,6 +205,7 @@ int main()
 			camera->setProjectionMatrix(core::matrix4SIMD::buildProjectionMatrixPerspectiveFovRH(core::radians(realFoVDegrees), aspectRatio, nearClip, persp->farClip));
 		else
 			camera->setProjectionMatrix(core::matrix4SIMD::buildProjectionMatrixPerspectiveFovLH(core::radians(realFoVDegrees), aspectRatio, nearClip, persp->farClip));
+		moveSpeed = persp->moveSpeed;
 	}
 	else
 	{
@@ -271,7 +273,9 @@ int main()
 		core::vector3df_SIMD ptu[] = {core::vectorSIMDf().set(camera->getPosition()),camera->getTarget(),camera->getUpVector()};
 		auto proj = camera->getProjectionMatrix();
 
-		camera = smgr->addCameraSceneNodeFPS(nullptr, 80.f, core::min(extent.X, extent.Y, extent.Z) * 0.0001f);
+		if (core::isnan(moveSpeed))
+			moveSpeed = core::min(extent.X,extent.Y,extent.Z)*0.0001f;
+		camera = smgr->addCameraSceneNodeFPS(nullptr,80.f,moveSpeed);
 		camera->setPosition(ptu[0].getAsVector3df());
 		camera->setTarget(ptu[1].getAsVector3df());
 		camera->setUpVector(ptu[2]);
 
@@ -289,13 +289,10 @@ class CBlitImageFilter : public CImageFilter<CBlitImageFilter<Normalize,Clamp,Sw
 
 			// filtering and alpha handling happens separately for every layer, so save on scratch memory size
 			const auto inImageType = inParams.type;
-			const auto window_last = [&kernelX,&kernelY,&kernelZ]() -> core::vectorSIMDi32
-			{
-				return core::vectorSIMDi32(kernelX.getWindowSize().x-1,kernelY.getWindowSize().y-1,kernelZ.getWindowSize().z-1,0);
-			}();
+			const auto window_end = getWindowEnd(inImageType,kernelX,kernelY,kernelZ);
 			const core::vectorSIMDi32 intermediateExtent[3] = {
-				core::vectorSIMDi32(outExtent.width,inExtent.height+window_last[1],inExtent.depth+window_last[2]),
-				core::vectorSIMDi32(outExtent.width,outExtent.height,inExtent.depth+window_last[2]),
+				core::vectorSIMDi32(outExtent.width,inExtent.height+window_end[1],inExtent.depth+window_end[2]),
+				core::vectorSIMDi32(outExtent.width,outExtent.height,inExtent.depth+window_end[2]),
 				core::vectorSIMDi32(outExtent.width,outExtent.height,outExtent.depth)
 			};
 			const core::vectorSIMDi32 intermediateLastCoord[3] = {
@@ -465,7 +462,7 @@ class CBlitImageFilter : public CImageFilter<CBlitImageFilter<Normalize,Clamp,Sw
 							lineBuffer = intermediateStorage[axis-1]+core::dot(static_cast<const core::vectorSIMDi32&>(intermediateStrides[axis-1]),localTexCoord)[0];
 						else
 						{
-							const auto windowEnd = inExtent.width+window_last.x;
+							const auto windowEnd = inExtent.width+window_end.x;
 							decode_offset = alloc_decode_scratch();
 							lineBuffer = intermediateStorage[1]+decode_offset*MaxChannels*windowEnd;
 							for (auto& i=localTexCoord.x; i<windowEnd; i++)
@@ -566,6 +563,21 @@ class CBlitImageFilter : public CImageFilter<CBlitImageFilter<Normalize,Clamp,Sw
 
 	private:
 		static inline constexpr uint32_t VectorizationBoundSTL = /*AVX2*/16u;
+		//
+		static inline core::vectorSIMDi32 getWindowEnd(const IImage::E_TYPE inImageType,
+			const CScaledImageFilterKernel<KernelX>& kernelX,
+			const CScaledImageFilterKernel<KernelY>& kernelY,
+			const CScaledImageFilterKernel<KernelZ>& kernelZ
+		)
+		{
+			// TODO: investigate properly if its supposed be `size` or `size-1` (polyphase kinda shows need for `size`)
+			core::vectorSIMDi32 last(kernelX.getWindowSize().x,0,0,0);
+			if (inImageType>=IImage::ET_2D)
+				last.y = kernelY.getWindowSize().x;
+			if (inImageType>=IImage::ET_3D)
+				last.z = kernelZ.getWindowSize().x;
+			return last;
+		}
 		// the blit filter will filter one axis at a time, hence necessitating "ping ponging" between two scratch buffers
 		static inline uint32_t getScratchOffset(const state_type* state, bool secondPong)
 		{
@@ -574,17 +586,14 @@ class CBlitImageFilter : public CImageFilter<CBlitImageFilter<Normalize,Clamp,Sw
 			const auto kernelY = state->contructScaledKernel(state->kernelY);
 			const auto kernelZ = state->contructScaledKernel(state->kernelZ);
 
-			const auto window_last = [&kernelX,&kernelY,&kernelZ]() -> core::vectorSIMDi32
-			{
-				return core::vectorSIMDi32(kernelX.getWindowSize().x-1,kernelY.getWindowSize().y-1,kernelZ.getWindowSize().z-1,0);
-			}();
+			const auto window_end = getWindowEnd(state->inImage->getCreationParameters().type,kernelX,kernelY,kernelZ);
 			// TODO: account for the size needed for coverage adjustment
 			// the first pass will be along X, so new temporary image will have the width of the output extent, but the height and depth will need to be padded
 			// but the last pass will be along Z and the new temporary image will have the exact dimensions of `outExtent` which is why there is a `core::max`
-			auto texelCount = state->outExtent.width*core::max<uint32_t>((state->inExtent.height+window_last[1])*(state->inExtent.depth+window_last[2]),state->outExtent.height*state->outExtent.depth);
+			auto texelCount = state->outExtent.width*core::max<uint32_t>((state->inExtent.height+window_end[1])*(state->inExtent.depth+window_end[2]),state->outExtent.height*state->outExtent.depth);
 			// the second pass will result in an image that has the width and height equal to `outExtent`
 			if (secondPong)
-				texelCount += core::max<uint32_t>(state->outExtent.width*state->outExtent.height*(state->inExtent.depth+window_last[2]),(state->inExtent.width+window_last[0])*std::thread::hardware_concurrency()*VectorizationBoundSTL);
+				texelCount += core::max<uint32_t>(state->outExtent.width*state->outExtent.height*(state->inExtent.depth+window_end[2]),(state->inExtent.width+window_end[0])*std::thread::hardware_concurrency()*VectorizationBoundSTL);
 			// obviously we have multiple channels and each channel has a certain type for arithmetic
 			return texelCount*MaxChannels*sizeof(value_type);
 		}