save the work for tommorrow

devshgraphicsprogramming · devshgraphicsprogramming · commit e6a1e8207d58 · 2020-12-11T02:16:31.000+01:00
diff --git a/examples_tests/22.RaytracedAO/common.glsl b/examples_tests/22.RaytracedAO/common.glsl
@@ -7,6 +7,14 @@
 
 #ifdef __cplusplus
 	#define uint uint32_t
+	struct uvec2
+	{
+		uint32_t x,y;
+	};
+	struct vec2
+	{
+		float x,y;
+	};
 	struct vec3
 	{
 		float x,y,z;
@@ -16,6 +24,14 @@
 #endif
 
 
+struct RaytraceShaderCommonData_t
+{
+	uvec2   imageDimensions;
+	uint    samplesPerPixelPerDispatch;
+	uint    samplesPerRowPerDispatch;
+};
+
+
 struct SLight
 {
 	#ifdef __cplusplus
diff --git a/examples_tests/22.RaytracedAO/dirty_source/ExtraCrap.cpp b/examples_tests/22.RaytracedAO/dirty_source/ExtraCrap.cpp
@@ -1066,7 +1066,7 @@ void Renderer::render(irr::ITimer* timer)
 	camera->render();
 
 	const auto currentViewProj = camera->getConcatenatedMatrix();
-	//if (!core::equals(prevViewProj,currentViewProj,core::ROUNDING_ERROR<core::matrix4SIMD>()*1000.0))
+	if (!core::equals(prevViewProj,currentViewProj,core::ROUNDING_ERROR<core::matrix4SIMD>()*1000.0))
 	{
 		m_framesDone = 0u;
 
@@ -1142,12 +1142,12 @@ void Renderer::render(irr::ITimer* timer)
 			float uImageSize2Rcp[4] = {1.f/static_cast<float>(m_renderSize[0]),1.f/static_cast<float>(m_renderSize[1]),0.5f/static_cast<float>(m_renderSize[0]),0.5f/static_cast<float>(m_renderSize[1])};
 			COpenGLExtensionHandler::pGlProgramUniform4fv(m_raygenProgram, 6, 1, uImageSize2Rcp);
 		}*/
-
-		m_driver->bindDescriptorSets(EPBP_COMPUTE, m_raygenLayout.get(), 0, 1, &m_globalBackendDataDS.get(), nullptr);
-		m_driver->bindDescriptorSets(EPBP_COMPUTE, m_raygenLayout.get(), 2, 1, &m_raygenDS2.get(), nullptr);
+#endif		
+		IGPUDescriptorSet* descriptorSets[] = {m_globalBackendDataDS.get(),m_raygenDS.get()};
+		m_driver->bindDescriptorSets(EPBP_COMPUTE, m_raygenPipelineLayout.get(), 0, 2, descriptorSets, nullptr);
 		m_driver->bindComputePipeline(m_raygenPipeline.get());
+		m_driver->pushConstants(m_raygenPipelineLayout.get(),ISpecializedShader::ESS_COMPUTE,0u,sizeof(RaygenShaderData_t),&m_raygenShaderData);
 		m_driver->dispatch(m_raygenWorkGroups[0], m_raygenWorkGroups[1], 1);
-#endif		
 		// probably wise to flush all caches
 		COpenGLExtensionHandler::pGlMemoryBarrier(GL_ALL_BARRIER_BITS);
 	}
diff --git a/examples_tests/22.RaytracedAO/raygen.comp b/examples_tests/22.RaytracedAO/raygen.comp
@@ -1,49 +1,21 @@
 #version 430 core
-#define WORK_GROUP_DIM 16u
-layout(local_size_x = WORK_GROUP_DIM, local_size_y = WORK_GROUP_DIM) in;
-#define WORK_GROUP_SIZE (WORK_GROUP_DIM*WORK_GROUP_DIM)
+#include "raygenCommon.glsl"
 
 
-#include "irr/builtin/glsl/utils/NormalDecode.glsl"
-
-
-#include "common.glsl"
-
-
-// TODO transform into push constants
-// uniforms
-layout(location = 0) uniform vec3 uCameraPos;
-layout(location = 1) uniform float uDepthLinearizationConstant;
-layout(location = 2) uniform mat4 uFrustumCorners;
-layout(location = 3) uniform uvec2 uImageSize;
-layout(location = 4) uniform uvec4 uImageWidth_ImageArea_TotalImageSamples_Samples;
-layout(location = 5) uniform uint uSamplesComputed;
-layout(location = 6) uniform vec4 uImageSize2Rcp;
-
-// image views
-layout(set = 2, binding = 0) uniform usamplerBuffer sampleSequence;
-layout(set = 2, binding = 1) uniform usampler2D scramblebuf;
-layout(set = 2, binding = 2) uniform sampler2D depthbuf;
-layout(set = 2, binding = 3) uniform usampler2D objectTriangleFrontFacing;
-layout(set = 2, binding = 4) uniform sampler2D encodedNormal;
-layout(set = 2, binding = 5) uniform sampler2D uv;
-
-// SSBOs
-#include "irr/builtin/glsl/ext/RadeonRays/ray.glsl"
-layout(set = 2, binding = 6, std430) restrict writeonly buffer Rays
+#include <irr/builtin/glsl/utils/normal_decode.glsl>
+//
+layout(set = 2, binding = 0, row_major) uniform RaygenData
 {
-	RadeonRays_ray rays[];
-};
-
-layout(set = 1, binding = 0, std430) restrict readonly buffer CumulativeLightPDF
-{
-	uint lightCDF[];
-};
-
-layout(set = 1, binding = 1, std430, row_major) restrict readonly buffer Lights
-{
-	SLight light[];
+	RaygenShaderData_t raygenData;
 };
+// rng
+layout(set = 2, binding = 1) uniform usamplerBuffer sampleSequence;
+layout(set = 2, binding = 2) uniform usampler2D scramblebuf;
+// vis buffer
+layout(set = 2, binding = 3) uniform sampler2D depthbuf;
+layout(set = 2, binding = 4) uniform usampler2D objectTriangleFrontFacing;
+layout(set = 2, binding = 5) uniform sampler2D encodedNormal;
+layout(set = 2, binding = 6) uniform sampler2D uv;
 
 
 
@@ -58,10 +30,12 @@ float linearizeZBufferVal(in float nonLinearZBufferVal)
 	// positive [0,1] Z: `B/(C-A-Cy)/(B/(C-A))`
 	// positive [0,1] Z: `(C-A)/(C-A-Cy)`
 	// positive [0,1] Z: `D/(D-Cy)`
-    return 1.0/(uDepthLinearizationConstant*nonLinearZBufferVal+1.0);
+    return 1.0/(pc.data.depthLinearizationConstant*nonLinearZBufferVal+1.0);
 }
 
-float maxAbs1(in float val)
+
+/*
+float maxAbs1(in float val) 
 {
 	return abs(val);
 }
@@ -97,28 +71,19 @@ float ULP3(in vec3 val, in uint accuracy)
 	float x = maxAbs3(val);
 	return uintBitsToFloat(floatBitsToUint(x) + accuracy)-x;
 }
+*/
 
 
-
-uint ugen_uniform_sample1(in uint dimension, in uint sampleIx, in uint scramble);
-uvec2 ugen_uniform_sample2(in uint dimension, in uint sampleIx, in uint scramble);
-
-vec2 gen_uniform_sample2(in uint dimension, in uint sampleIx, in uint scramble);
-
-
-uint ugen_uniform_sample1(in uint dimension, in uint sampleIx, in uint scramble)
-{
-	return ugen_uniform_sample2(dimension,sampleIx,scramble).x;
-}
-uvec2 ugen_uniform_sample2(in uint dimension, in uint sampleIx, in uint scramble)
+struct SamplingData_t
 {
-	uint address = (dimension>>1u)*MAX_ACCUMULATED_SAMPLES+(sampleIx&(MAX_ACCUMULATED_SAMPLES-1u));
-	return texelFetch(sampleSequence,int(address)).xy^uvec2(scramble);
-}
-
-vec2 gen_uniform_sample2(in uint dimension, in uint sampleIx, in uint scramble)
+	uint sampleID;
+};
+bool gen_sample_ray(out float maxT, out vec3 direction, out vec3 throughput, in SamplingData_t samplingData)
 {
-	return vec2(ugen_uniform_sample2(dimension,sampleIx,scramble))/vec2(~0u);
+	maxT = FLT_MAX;
+	direction = normalize(vec3(0.5,0.5,0.5));
+	throughput = vec3(1.0);
+	return true;
 }
 
 #ifdef TODO
@@ -189,49 +154,71 @@ vec3 light_sample(out vec3 incoming, in uint sampleIx, in uint scramble, inout f
 void main()
 {
 	uvec2 outputLocation = gl_GlobalInvocationID.xy;
-	bool alive = all(lessThan(outputLocation,uImageSize));
-	if (alive)
+	if (all(lessThan(outputLocation,pc.data.imageDimensions)))
 	{
-		// TODO: accelerate texture fetching
 		ivec2 uv = ivec2(outputLocation);
 		float revdepth = texelFetch(depthbuf,uv,0).r;
 
-		uint outputID = outputLocation.x+uImageWidth_ImageArea_TotalImageSamples_Samples.x*outputLocation.y;
+		uint outputID = outputLocation.y*pc.data.samplesPerRowPerDispatch+outputLocation.x;
 
+		SamplingData_t samplingData;
 		// unproject
 		vec3 viewDir;
 		vec3 position;
 		{
-			vec2 NDC = vec2(outputLocation)*uImageSize2Rcp.xy+uImageSize2Rcp.zw;
-			viewDir = mix(uFrustumCorners[0]*NDC.x+uFrustumCorners[1],uFrustumCorners[2]*NDC.x+uFrustumCorners[3],NDC.yyyy).xyz;
-			position = viewDir*linearizeZBufferVal(revdepth)+uCameraPos;
-		}
+			const mat4x3 frustumCorners = pc.data.frustumCorners;
+			const vec2 NDC = vec2(outputLocation)*pc.data.rcpPixelSize+pc.data.rcpHalfPixelSize;
 
-		alive = revdepth>0.0;
+			viewDir = mix(frustumCorners[0]*NDC.x+frustumCorners[1],frustumCorners[2]*NDC.x+frustumCorners[3],NDC.yyy);
+			position = viewDir*linearizeZBufferVal(revdepth)+pc.data.cameraPosition;
+		}
+		
+		bool alive = false;
+		uint scramble,objectID,triangleID;
+		bool backfacing;
+		vec3 normal = vec3(0.0);
+		vec2 uvCoord;
+		if (revdepth>0.0)
+		{
+			scramble = texelFetch(scramblebuf,uv,0).r;
 
-		uint scramble = texelFetch(scramblebuf,uv,0).r;
+			alive = true;
+		}
+#ifdef USE_OPTIX_DENOISER
+		// TODO: translate normal into float16_t buff
+#endif
 
-		RadeonRays_ray newray;
-		newray.time = 0.0;
-		newray.mask = alive ? -1:0;
-#ifdef TODO
-		for (uint i=0u; i<uImageWidth_ImageArea_TotalImageSamples_Samples.w; i++)
+		for (uint i=0u; i<pc.data.samplesPerPixelPerDispatch; i++)
 		{
-			vec4 throughput = vec4(0.0,0.0,0.0,-1.0);
-			float error = GET_MAGNITUDE(1.0-revdepth)*0.1;
+			vec3 direction; // TODO: just use irr_glsl_LightSample?
+			float maxT;
+			vec4 throughput = vec4(0.0,0.0,0.0,-1.0); // -1 needs to be there to ensure no backface culling on rays
 
-			newray.maxT = FLT_MAX;
+			bool validRay = false;
 			if (alive)
-				throughput.rgb = light_sample(newray.direction,uSamplesComputed+i,scramble,newray.maxT,alive,position);
-
-			newray.origin = position+newray.direction*error/maxAbs3(newray.direction);
-			newray._active = alive ? 1:0;
-			newray.backfaceCulling = int(packHalf2x16(throughput.ab));
-			newray.useless_padding = int(packHalf2x16(throughput.gr));
-
-			// TODO: repack rays for coalescing
-			rays[outputID+i*uImageWidth_ImageArea_TotalImageSamples_Samples.y] = newray;
+			{
+				samplingData.sampleID = pc.data.samplesComputedPerPixel+i;
+				validRay = gen_sample_ray(maxT,direction,throughput.rgb,samplingData);
+			}
+			
+			// TODO: repack rays in smem for coalescing
+			const uint realOutputID = outputID+i;
+			if (validRay)
+			{
+				rays[realOutputID].origin = position;/*+newray.direction*err?; TODO */
+				rays[realOutputID].maxT = 0.0;
+				rays[realOutputID].direction = direction;
+				rays[realOutputID].mask = -1;
+				rays[realOutputID]._active = 1;
+				rays[realOutputID].backfaceCulling = int(packHalf2x16(throughput.ab));
+				rays[realOutputID].useless_padding = int(packHalf2x16(throughput.gr));
+			}
+			else
+			{
+				rays[realOutputID].maxT = 0.0;
+				rays[realOutputID].mask = 0;
+				rays[realOutputID]._active = 0;
+			}
 		}
-#endif
 	}
 }
diff --git a/examples_tests/22.RaytracedAO/resolve.comp b/examples_tests/22.RaytracedAO/resolve.comp
@@ -1,4 +1,5 @@
 #version 430 core
+
 #define WORK_GROUP_DIM 32u
 layout(local_size_x = WORK_GROUP_DIM, local_size_y = WORK_GROUP_DIM) in;
 #define WORK_GROUP_SIZE (WORK_GROUP_DIM*WORK_GROUP_DIM)
@@ -7,48 +8,16 @@ layout(local_size_x = WORK_GROUP_DIM, local_size_y = WORK_GROUP_DIM) in;
 #include "common.glsl"
 
 
-// TODO translate into push contants
-// uniforms
-layout(location = 0) uniform uvec2 uImageSize;
-layout(location = 1) uniform uvec4 uImageWidth_ImageArea_TotalImageSamples_Samples;
-layout(location = 2) uniform float uRcpFramesDone;
-layout(location = 3) uniform mat3 uNormalMatrix;
-
-// image views
-layout(set = 2, binding = 0) uniform usampler2D lightIndex;
-layout(set = 2, binding = 1) uniform sampler2D albedobuf;
-layout(set = 2, binding = 2) uniform sampler2D normalbuf;
-layout(set = 2, binding = 3, rgba32f) restrict uniform image2D framebuffer;
-
-// SSBOs
-layout(set = 2, binding = 4, std430) restrict readonly buffer Rays
-{
-	RadeonRays_ray rays[];
-};
-layout(set = 2, binding = 5, std430) restrict buffer Queries
+layout(push_constant, row_major) uniform PushConstants
 {
-	int hit[];
-};
+	ResolveShaderData_t data;
+} pc;
 
-layout(set = 2, binding = 6, std430, row_major) restrict readonly buffer LightRadiances
-{
-	vec3 lightRadiance[]; // Watts / steriadian / steradian
-};
 
-#ifdef USE_OPTIX_DENOISER
-layout(set = 2, binding = 7, std430) restrict writeonly buffer DenoiserColorInput
+layout(set = 2, binding = 0, std430) restrict buffer Queries
 {
-	float16_t colorOutput[];
-};
-layout(set = 2, binding = 8, std430) restrict writeonly buffer DenoiserAlbedoInput
-{
-	float16_t albedoOutput[];
-};
-layout(set = 2, binding = 9, std430) restrict writeonly buffer DenoiserNormalInput
-{
-	float16_t normalOutput[];
+	int hit[];
 };
-#endif
 
 
 
@@ -70,10 +39,6 @@ void main()
 	uint baseID = gl_GlobalInvocationID.x+uImageWidth_ImageArea_TotalImageSamples_Samples.x*gl_GlobalInvocationID.y;
 	bool alive = all(lessThan(gl_GlobalInvocationID.xy,uImageSize));
 
-	vec3 normal;
-	if (alive)
-		normal = irr_glsl_NormalDecode_signedSpherical(texelFetch(normalbuf,pixelCoord,0).rg);
-
 	vec4 acc = vec4(0.0);
 	if (uRcpFramesDone<1.0 && alive)
 		acc = imageLoad(framebuffer,pixelCoord);
@@ -107,8 +72,6 @@ void main()
 		for (uint j=localID; j<CACHE_SIZE; j+=CACHE_DIM*CACHE_DIM)
 		{
 			vec3 raydiance = vec4(unpackHalf2x16(rayScratch0[j]),unpackHalf2x16(rayScratch1[j])).gra;
-			// TODO: sophisticated BSDF eval
-			raydiance *= max(dot(vec3(rayScratch2[j],rayScratch3[j],rayScratch4[j]),normal),0.0)/kPI;
 			color += raydiance;
 		}
 
@@ -122,29 +85,11 @@ void main()
 
 	if (alive)
 	{
-		// TODO: sophisticated BSDF eval
-		vec3 albedo = texelFetch(albedobuf,pixelCoord,0).rgb;
-		color *= albedo;
-
-		// TODO: move  ray gen, for fractional sampling
+		// TODO: move to ray gen, for fractional sampling?
 		color *= 1.0/float(uImageWidth_ImageArea_TotalImageSamples_Samples.w);
 
-		uint lightID = texelFetch(lightIndex,pixelCoord,0)[0];
-		if (lightID!=0xdeadbeefu)
-			color += lightRadiance[lightID];
-
 		// TODO: optimize the color storage (RGB9E5/RGB19E7 anyone?)
 		acc.rgb += (color-acc.rgb)*uRcpFramesDone;
 		imageStore(framebuffer,pixelCoord,acc);
-#ifdef USE_OPTIX_DENOISER
-		for (uint i=0u; i<3u; i++)
-			colorOutput[baseID*3+i] = float16_t(acc[i]);
-			//colorOutput[baseID*3+i] = float16_t(clamp(acc[i],0.0001,10000.0));
-		for (uint i=0u; i<3u; i++)
-			albedoOutput[baseID*3+i] = float16_t(albedo[i]);
-		normal = uNormalMatrix*normal;
-		for (uint i=0u; i<3u; i++)
-			normalOutput[baseID*3+i] = float16_t(normal[i]);
-#endif
 	}
 }
diff --git a/include/irr/builtin/glsl/ext/RadeonRays/ray.glsl b/include/irr/builtin/glsl/ext/RadeonRays/ray.glsl
@@ -15,7 +15,7 @@ struct irr_glsl_ext_RadeonRays_ray
 
 irr_glsl_ext_RadeonRays_ray irr_glsl_ext_RadeonRays_constructDefaultRay(in vec3 origin, in vec3 direction, in float maxLen, in int userData)
 {
-	RadeonRays_ray retval;
+	irr_glsl_ext_RadeonRays_ray retval;
 	retval.origin = origin;
 	retval.maxT = maxLen;
 	retval.direction = direction;

Original file line number	Diff line number	Diff line change
`@@ -15,7 +15,7 @@ struct irr_glsl_ext_RadeonRays_ray`
`15`	`15`
`16`	`16`	`irr_glsl_ext_RadeonRays_ray irr_glsl_ext_RadeonRays_constructDefaultRay(in vec3 origin, in vec3 direction, in float maxLen, in int userData)`
`17`	`17`	`{`
`18`		`- RadeonRays_ray retval;`
	`18`	`+ irr_glsl_ext_RadeonRays_ray retval;`
`19`	`19`	`retval.origin = origin;`
`20`	`20`	`retval.maxT = maxLen;`
`21`	`21`	`retval.direction = direction;`