From 039caccfeaa2174adc3526ace200113df507c49d Mon Sep 17 00:00:00 2001 From: Joshua Ashton Date: Wed, 25 Oct 2023 03:40:18 +0100 Subject: [PATCH] rendervulkan: Move away from push constants CTM data is too big to fit in push constants --- src/rendervulkan.cpp | 101 +++++++++++-------- src/rendervulkan.hpp | 17 +++- src/shaders/blit_push_data.h | 2 +- src/shaders/cs_composite_blit.comp | 1 + src/shaders/cs_composite_blur.comp | 1 + src/shaders/cs_composite_blur_cond.comp | 1 + src/shaders/cs_composite_rcas.comp | 3 +- src/shaders/cs_easu.comp | 3 +- src/shaders/cs_easu_fp16.comp | 3 +- src/shaders/cs_gaussian_blur_horizontal.comp | 3 +- src/shaders/cs_nis.comp | 4 +- src/shaders/cs_nis_fp16.comp | 4 +- src/shaders/cs_rgb_to_nv12.comp | 3 +- src/shaders/descriptor_set.h | 14 +-- 14 files changed, 103 insertions(+), 57 deletions(-) diff --git a/src/rendervulkan.cpp b/src/rendervulkan.cpp index e7c7625..01e123b 100644 --- a/src/rendervulkan.cpp +++ b/src/rendervulkan.cpp @@ -557,7 +557,9 @@ bool CVulkanDevice::createDevice() VkPhysicalDeviceVulkan12Features vulkan12Features = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES, .pNext = std::exchange(features2.pNext, &vulkan12Features), + .uniformAndStorageBuffer8BitAccess = VK_TRUE, .shaderFloat16 = m_bSupportsFp16, + .scalarBlockLayout = VK_TRUE, .timelineSemaphore = VK_TRUE, }; @@ -684,10 +686,10 @@ bool CVulkanDevice::createLayouts() for (auto& sampler : ycbcrSamplers) sampler = m_ycbcrSampler; - std::array layoutBindings = { + std::array layoutBindings = { VkDescriptorSetLayoutBinding { .binding = 0, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, .descriptorCount = 1, .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, }, @@ -699,8 +701,8 @@ bool CVulkanDevice::createLayouts() }, VkDescriptorSetLayoutBinding { .binding = 2, - .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .descriptorCount = VKR_SAMPLER_SLOTS, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .descriptorCount = 1, .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, }, VkDescriptorSetLayoutBinding { @@ -708,16 +710,22 @@ bool CVulkanDevice::createLayouts() .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, .descriptorCount = VKR_SAMPLER_SLOTS, .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = ycbcrSamplers.data(), }, VkDescriptorSetLayoutBinding { .binding = 4, .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .descriptorCount = VKR_SAMPLER_SLOTS, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = ycbcrSamplers.data(), + }, + VkDescriptorSetLayoutBinding { + .binding = 5, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, .descriptorCount = VKR_LUT3D_COUNT, .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, }, VkDescriptorSetLayoutBinding { - .binding = 5, + .binding = 6, .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, .descriptorCount = VKR_LUT3D_COUNT, .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, @@ -738,18 +746,10 @@ bool CVulkanDevice::createLayouts() return false; } - VkPushConstantRange pushConstantRange = { - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .offset = 0, - .size = 256, - }; - VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo = { .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, .setLayoutCount = 1, .pSetLayouts = &m_descriptorSetLayout, - .pushConstantRangeCount = 1, - .pPushConstantRanges = &pushConstantRange, }; res = vk.CreatePipelineLayout(device(), &pipelineLayoutCreateInfo, nullptr, &m_pipelineLayout); @@ -790,7 +790,11 @@ bool CVulkanDevice::createPools() return false; } - VkDescriptorPoolSize poolSizes[2] { + VkDescriptorPoolSize poolSizes[3] { + { + VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + uint32_t(m_descriptorSets.size()), + }, { VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, uint32_t(m_descriptorSets.size()) * 2, @@ -887,8 +891,8 @@ bool CVulkanDevice::createScratchResources() VkBufferCreateInfo bufferCreateInfo = { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, - .size = 512 * 512 * 4, - .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT, + .size = upload_buffer_size, + .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, }; res = vk.CreateBuffer( device(), &bufferCreateInfo, nullptr, &m_uploadBuffer ); @@ -1354,11 +1358,13 @@ void CVulkanCmdBuffer::clearState() } template -void CVulkanCmdBuffer::pushConstants(Args&&... args) +void CVulkanCmdBuffer::uploadConstants(Args&&... args) { - static_assert(sizeof(PushData) <= 256, "Only 256 bytes push constants."); PushData data(std::forward(args)...); - m_device->vk.CmdPushConstants(m_cmdBuffer, m_device->pipelineLayout(), VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(data), &data); + + void *ptr = m_device->uploadBufferData(sizeof(data)); + m_renderBufferOffset = m_device->m_uploadBufferOffset - sizeof(data); + memcpy(ptr, &data, sizeof(data)); } void CVulkanCmdBuffer::bindPipeline(VkPipeline pipeline) @@ -1379,12 +1385,13 @@ void CVulkanCmdBuffer::dispatch(uint32_t x, uint32_t y, uint32_t z) VkDescriptorSet descriptorSet = m_device->descriptorSet(); - std::array writeDescriptorSets; + std::array writeDescriptorSets; std::array imageDescriptors = {}; std::array ycbcrImageDescriptors = {}; std::array targetDescriptors = {}; std::array shaperLutDescriptor = {}; std::array lut3DDescriptor = {}; + VkDescriptorBufferInfo scratchDescriptor = {}; writeDescriptorSets[0] = { .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, @@ -1392,8 +1399,8 @@ void CVulkanCmdBuffer::dispatch(uint32_t x, uint32_t y, uint32_t z) .dstBinding = 0, .dstArrayElement = 0, .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .pImageInfo = &targetDescriptors[0], + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .pBufferInfo = &scratchDescriptor, }; writeDescriptorSets[1] = { @@ -1403,7 +1410,7 @@ void CVulkanCmdBuffer::dispatch(uint32_t x, uint32_t y, uint32_t z) .dstArrayElement = 0, .descriptorCount = 1, .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .pImageInfo = &targetDescriptors[1], + .pImageInfo = &targetDescriptors[0], }; writeDescriptorSets[2] = { @@ -1411,9 +1418,9 @@ void CVulkanCmdBuffer::dispatch(uint32_t x, uint32_t y, uint32_t z) .dstSet = descriptorSet, .dstBinding = 2, .dstArrayElement = 0, - .descriptorCount = imageDescriptors.size(), - .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .pImageInfo = imageDescriptors.data(), + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .pImageInfo = &targetDescriptors[1], }; writeDescriptorSets[3] = { @@ -1421,9 +1428,9 @@ void CVulkanCmdBuffer::dispatch(uint32_t x, uint32_t y, uint32_t z) .dstSet = descriptorSet, .dstBinding = 3, .dstArrayElement = 0, - .descriptorCount = ycbcrImageDescriptors.size(), + .descriptorCount = imageDescriptors.size(), .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .pImageInfo = ycbcrImageDescriptors.data(), + .pImageInfo = imageDescriptors.data(), }; writeDescriptorSets[4] = { @@ -1431,9 +1438,9 @@ void CVulkanCmdBuffer::dispatch(uint32_t x, uint32_t y, uint32_t z) .dstSet = descriptorSet, .dstBinding = 4, .dstArrayElement = 0, - .descriptorCount = shaperLutDescriptor.size(), + .descriptorCount = ycbcrImageDescriptors.size(), .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .pImageInfo = shaperLutDescriptor.data(), + .pImageInfo = ycbcrImageDescriptors.data(), }; writeDescriptorSets[5] = { @@ -1441,11 +1448,25 @@ void CVulkanCmdBuffer::dispatch(uint32_t x, uint32_t y, uint32_t z) .dstSet = descriptorSet, .dstBinding = 5, .dstArrayElement = 0, + .descriptorCount = shaperLutDescriptor.size(), + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .pImageInfo = shaperLutDescriptor.data(), + }; + + writeDescriptorSets[6] = { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = descriptorSet, + .dstBinding = 6, + .dstArrayElement = 0, .descriptorCount = lut3DDescriptor.size(), .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, .pImageInfo = lut3DDescriptor.data(), }; + scratchDescriptor.buffer = m_device->m_uploadBuffer; + scratchDescriptor.offset = m_renderBufferOffset; + scratchDescriptor.range = VK_WHOLE_SIZE; + for (uint32_t i = 0; i < VKR_SAMPLER_SLOTS; i++) { imageDescriptors[i].sampler = m_device->sampler(m_samplerState[i]); @@ -3450,7 +3471,7 @@ bool vulkan_screenshot( const struct FrameInfo_t *frameInfo, std::shared_ptrbindPipeline( g_device.pipeline(SHADER_TYPE_BLIT, frameInfo->layerCount, frameInfo->ycbcrMask(), 0u, frameInfo->colorspaceMask(), EOTF_Gamma22 )); bind_all_layers(cmdBuffer.get(), frameInfo); cmdBuffer->bindTarget(pScreenshotTexture); - cmdBuffer->pushConstants(frameInfo); + cmdBuffer->uploadConstants(frameInfo); const int pixelsPerGroup = 8; @@ -3529,7 +3550,7 @@ bool vulkan_composite( struct FrameInfo_t *frameInfo, std::shared_ptrsetTextureSrgb(0, true); cmdBuffer->setSamplerUnnormalized(0, false); cmdBuffer->setSamplerNearest(0, false); - cmdBuffer->pushConstants(inputX, inputY, tempX, tempY); + cmdBuffer->uploadConstants(inputX, inputY, tempX, tempY); int pixelsPerGroup = 16; @@ -3542,7 +3563,7 @@ bool vulkan_composite( struct FrameInfo_t *frameInfo, std::shared_ptrsetSamplerUnnormalized(0, false); cmdBuffer->setSamplerNearest(0, false); cmdBuffer->bindTarget(compositeImage); - cmdBuffer->pushConstants(frameInfo, g_upscaleFilterSharpness / 10.0f); + cmdBuffer->uploadConstants(frameInfo, g_upscaleFilterSharpness / 10.0f); cmdBuffer->dispatch(div_roundup(currentOutputWidth, pixelsPerGroup), div_roundup(currentOutputHeight, pixelsPerGroup)); } @@ -3570,7 +3591,7 @@ bool vulkan_composite( struct FrameInfo_t *frameInfo, std::shared_ptrbindTexture(VKR_NIS_COEF_USM_SLOT, g_output.nisUsmImage); cmdBuffer->setSamplerUnnormalized(VKR_NIS_COEF_USM_SLOT, false); cmdBuffer->setSamplerNearest(VKR_NIS_COEF_USM_SLOT, false); - cmdBuffer->pushConstants(inputX, inputY, tempX, tempY, nisSharpness); + cmdBuffer->uploadConstants(inputX, inputY, tempX, tempY, nisSharpness); int pixelsPerGroupX = 32; int pixelsPerGroupY = 24; @@ -3585,7 +3606,7 @@ bool vulkan_composite( struct FrameInfo_t *frameInfo, std::shared_ptrbindPipeline( g_device.pipeline(SHADER_TYPE_BLIT, nisFrameInfo.layerCount, nisFrameInfo.ycbcrMask())); bind_all_layers(cmdBuffer.get(), &nisFrameInfo); cmdBuffer->bindTarget(compositeImage); - cmdBuffer->pushConstants(&nisFrameInfo); + cmdBuffer->uploadConstants(&nisFrameInfo); int pixelsPerGroup = 8; @@ -3611,7 +3632,7 @@ bool vulkan_composite( struct FrameInfo_t *frameInfo, std::shared_ptrsetSamplerUnnormalized(i, true); cmdBuffer->setSamplerNearest(i, false); } - cmdBuffer->pushConstants(frameInfo); + cmdBuffer->uploadConstants(frameInfo); int pixelsPerGroup = 8; @@ -3635,7 +3656,7 @@ bool vulkan_composite( struct FrameInfo_t *frameInfo, std::shared_ptrbindPipeline( g_device.pipeline(SHADER_TYPE_BLIT, frameInfo->layerCount, frameInfo->ycbcrMask(), 0u, frameInfo->colorspaceMask(), g_ColorMgmt.current.outputEncodingEOTF )); bind_all_layers(cmdBuffer.get(), frameInfo); cmdBuffer->bindTarget(compositeImage); - cmdBuffer->pushConstants(frameInfo); + cmdBuffer->uploadConstants(frameInfo); const int pixelsPerGroup = 8; @@ -3657,12 +3678,12 @@ bool vulkan_composite( struct FrameInfo_t *frameInfo, std::shared_ptrstreamColorspace() ) ); constants.halfExtent[0] = pPipewireTexture->width() / 2.0f; constants.halfExtent[1] = pPipewireTexture->height() / 2.0f; - cmdBuffer->pushConstants(constants); + cmdBuffer->uploadConstants(constants); } else { BlitPushData_t constants( scale ); - cmdBuffer->pushConstants(constants); + cmdBuffer->uploadConstants(constants); } for (uint32_t i = 0; i < EOTF_Count; i++) diff --git a/src/rendervulkan.hpp b/src/rendervulkan.hpp index 19d8f4d..405016a 100644 --- a/src/rendervulkan.hpp +++ b/src/rendervulkan.hpp @@ -686,6 +686,11 @@ static inline uint32_t div_roundup(uint32_t x, uint32_t y) VK_FUNC(WaitSemaphores) \ VK_FUNC(SetHdrMetadataEXT) +template +constexpr T align(T what, U to) { +return (what + to - 1) & ~(to - 1); +} + class CVulkanDevice { public: @@ -730,8 +735,12 @@ public: { assert(size < upload_buffer_size); + m_uploadBufferOffset = align(m_uploadBufferOffset, 16); if (m_uploadBufferOffset + size > upload_buffer_size) + { + fprintf(stderr, "Exceeded uploadBufferData\n"); waitIdle(false); + } uint8_t *ptr = ((uint8_t*)m_uploadBufferData) + m_uploadBufferOffset; m_uploadBufferOffset += size; @@ -748,7 +757,9 @@ public: void resetCmdBuffers(uint64_t sequence); -private: +protected: + friend class CVulkanCmdBuffer; + bool selectPhysDev(VkSurfaceKHR surface); bool createDevice(); bool createLayouts(); @@ -848,7 +859,7 @@ public: void bindTarget(std::shared_ptr target); void clearState(); template - void pushConstants(Args&&... args); + void uploadConstants(Args&&... args); void bindPipeline(VkPipeline pipeline); void dispatch(uint32_t x, uint32_t y = 1, uint32_t z = 1); void copyImage(std::shared_ptr src, std::shared_ptr dst); @@ -883,6 +894,8 @@ private: std::array m_shaperLut; std::array m_lut3D; + + uint32_t m_renderBufferOffset = 0; }; uint32_t VulkanFormatToDRM( VkFormat vkFormat ); diff --git a/src/shaders/blit_push_data.h b/src/shaders/blit_push_data.h index 5b845a7..002a924 100644 --- a/src/shaders/blit_push_data.h +++ b/src/shaders/blit_push_data.h @@ -1,4 +1,4 @@ -layout(push_constant) +layout(binding = 0, std430) uniform layers_t { vec2 u_scale[VKR_MAX_LAYERS]; vec2 u_offset[VKR_MAX_LAYERS]; diff --git a/src/shaders/cs_composite_blit.comp b/src/shaders/cs_composite_blit.comp index 6048940..52bd9d5 100644 --- a/src/shaders/cs_composite_blit.comp +++ b/src/shaders/cs_composite_blit.comp @@ -2,6 +2,7 @@ #extension GL_GOOGLE_include_directive : require #extension GL_EXT_shader_explicit_arithmetic_types_int8 : require +#extension GL_EXT_scalar_block_layout : require #include "descriptor_set.h" diff --git a/src/shaders/cs_composite_blur.comp b/src/shaders/cs_composite_blur.comp index cccc28b..52062bd 100644 --- a/src/shaders/cs_composite_blur.comp +++ b/src/shaders/cs_composite_blur.comp @@ -2,6 +2,7 @@ #extension GL_GOOGLE_include_directive : require #extension GL_EXT_shader_explicit_arithmetic_types_int8 : require +#extension GL_EXT_scalar_block_layout : require #include "descriptor_set.h" diff --git a/src/shaders/cs_composite_blur_cond.comp b/src/shaders/cs_composite_blur_cond.comp index 0752311..ffed4d5 100644 --- a/src/shaders/cs_composite_blur_cond.comp +++ b/src/shaders/cs_composite_blur_cond.comp @@ -2,6 +2,7 @@ #extension GL_GOOGLE_include_directive : require #extension GL_EXT_shader_explicit_arithmetic_types_int8 : require +#extension GL_EXT_scalar_block_layout : require #include "descriptor_set.h" diff --git a/src/shaders/cs_composite_rcas.comp b/src/shaders/cs_composite_rcas.comp index 8eb7178..a578951 100644 --- a/src/shaders/cs_composite_rcas.comp +++ b/src/shaders/cs_composite_rcas.comp @@ -2,6 +2,7 @@ #extension GL_GOOGLE_include_directive : require #extension GL_EXT_shader_explicit_arithmetic_types_int8 : require +#extension GL_EXT_scalar_block_layout : require #include "descriptor_set.h" @@ -10,7 +11,7 @@ layout( local_size_y = 1, local_size_z = 1) in; -layout(push_constant) +layout(binding = 0, std430) uniform layers_t { uvec2 u_layer0Offset; vec2 u_scale[VKR_MAX_LAYERS - 1]; diff --git a/src/shaders/cs_easu.comp b/src/shaders/cs_easu.comp index 123f0c8..8bfc389 100644 --- a/src/shaders/cs_easu.comp +++ b/src/shaders/cs_easu.comp @@ -2,6 +2,7 @@ #extension GL_GOOGLE_include_directive : require #extension GL_EXT_shader_explicit_arithmetic_types_int8 : require +#extension GL_EXT_scalar_block_layout : require #include "descriptor_set.h" @@ -10,7 +11,7 @@ layout( local_size_y = 1, local_size_z = 1) in; -layout(push_constant) +layout(binding = 0, std430) uniform layers_t { uvec4 c1, c2, c3, c4; }; diff --git a/src/shaders/cs_easu_fp16.comp b/src/shaders/cs_easu_fp16.comp index a3b7a8e..9d95f9f 100644 --- a/src/shaders/cs_easu_fp16.comp +++ b/src/shaders/cs_easu_fp16.comp @@ -3,6 +3,7 @@ #extension GL_GOOGLE_include_directive : require #extension GL_EXT_shader_explicit_arithmetic_types_float16 : require #extension GL_EXT_shader_explicit_arithmetic_types_int8 : require +#extension GL_EXT_scalar_block_layout : require #include "descriptor_set.h" @@ -11,7 +12,7 @@ layout( local_size_y = 1, local_size_z = 1) in; -layout(push_constant) +layout(binding = 0, std430) uniform layers_t { uvec4 c1, c2, c3, c4; }; diff --git a/src/shaders/cs_gaussian_blur_horizontal.comp b/src/shaders/cs_gaussian_blur_horizontal.comp index 351fd76..e39d5ef 100644 --- a/src/shaders/cs_gaussian_blur_horizontal.comp +++ b/src/shaders/cs_gaussian_blur_horizontal.comp @@ -2,6 +2,7 @@ #extension GL_GOOGLE_include_directive : require #extension GL_EXT_shader_explicit_arithmetic_types_int8 : require +#extension GL_EXT_scalar_block_layout : require #include "descriptor_set.h" @@ -10,7 +11,7 @@ layout( local_size_y = 8, local_size_z = 1) in; -layout(push_constant) +layout(binding = 0, std430) uniform layers_t { vec2 u_scale[VKR_MAX_LAYERS]; vec2 u_offset[VKR_MAX_LAYERS]; diff --git a/src/shaders/cs_nis.comp b/src/shaders/cs_nis.comp index d6a565e..b2aaca6 100644 --- a/src/shaders/cs_nis.comp +++ b/src/shaders/cs_nis.comp @@ -2,13 +2,15 @@ #extension GL_GOOGLE_include_directive : enable #extension GL_EXT_shader_explicit_arithmetic_types_int8 : require +#extension GL_EXT_scalar_block_layout : require #define NIS_GLSL 1 #define NIS_SCALER 1 #include "descriptor_set.h" -layout(push_constant) uniform const_buffer +layout(binding = 0, std430) +uniform const_buffer { float kDetectRatio; float kDetectThres; diff --git a/src/shaders/cs_nis_fp16.comp b/src/shaders/cs_nis_fp16.comp index 75a5a41..42f1f18 100644 --- a/src/shaders/cs_nis_fp16.comp +++ b/src/shaders/cs_nis_fp16.comp @@ -3,6 +3,7 @@ #extension GL_GOOGLE_include_directive : enable #extension GL_EXT_shader_explicit_arithmetic_types_float16 : require #extension GL_EXT_shader_explicit_arithmetic_types_int8 : require +#extension GL_EXT_scalar_block_layout : require #define NIS_GLSL 1 #define NIS_USE_HALF_PRECISION 1 @@ -10,7 +11,8 @@ #include "descriptor_set.h" -layout(push_constant) uniform const_buffer +layout(binding = 0, std430) +uniform const_buffer { float kDetectRatio; float kDetectThres; diff --git a/src/shaders/cs_rgb_to_nv12.comp b/src/shaders/cs_rgb_to_nv12.comp index 6e408ee..efee547 100644 --- a/src/shaders/cs_rgb_to_nv12.comp +++ b/src/shaders/cs_rgb_to_nv12.comp @@ -2,6 +2,7 @@ #extension GL_GOOGLE_include_directive : require #extension GL_EXT_shader_explicit_arithmetic_types_int8 : require +#extension GL_EXT_scalar_block_layout : require #include "descriptor_set.h" @@ -22,7 +23,7 @@ const float u_nitsToLinear = 1.0f / 100.0f; const float u_itmSdrNits = 100.f; const float u_itmTargetNits = 1000.f; -layout(push_constant) +layout(binding = 0, std430) uniform layers_t { vec2 u_scale[1]; vec2 u_offset[1]; diff --git a/src/shaders/descriptor_set.h b/src/shaders/descriptor_set.h index 57ff3f3..981e7ee 100644 --- a/src/shaders/descriptor_set.h +++ b/src/shaders/descriptor_set.h @@ -44,13 +44,13 @@ uint get_layer_colorspace(uint layerIdx) { return bitfieldExtract(c_colorspaceMask, int(layerIdx) * colorspace_max_bits, colorspace_max_bits); } -layout(binding = 0, rgba8) writeonly uniform image2D dst; +layout(binding = 1, rgba8) writeonly uniform image2D dst; // alias -layout(binding = 0, rgba8) writeonly uniform image2D dst_luma; -layout(binding = 1, rgba8) writeonly uniform image2D dst_chroma; +layout(binding = 1, rgba8) writeonly uniform image2D dst_luma; +layout(binding = 2, rgba8) writeonly uniform image2D dst_chroma; -layout(binding = 2) uniform sampler2D s_samplers[VKR_SAMPLER_SLOTS]; -layout(binding = 3) uniform sampler2D s_ycbcr_samplers[VKR_SAMPLER_SLOTS]; +layout(binding = 3) uniform sampler2D s_samplers[VKR_SAMPLER_SLOTS]; +layout(binding = 4) uniform sampler2D s_ycbcr_samplers[VKR_SAMPLER_SLOTS]; -layout(binding = 4) uniform sampler1D s_shaperLut[VKR_LUT3D_COUNT]; -layout(binding = 5) uniform sampler3D s_lut3D[VKR_LUT3D_COUNT]; +layout(binding = 5) uniform sampler1D s_shaperLut[VKR_LUT3D_COUNT]; +layout(binding = 6) uniform sampler3D s_lut3D[VKR_LUT3D_COUNT];