From 0a0a000a8963c74d434615610fbac1b7a5f46090 Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Sat, 21 May 2022 12:51:40 +0200 Subject: [PATCH] Use vulkan_create_texture_from_bits for NIS images. --- src/rendervulkan.cpp | 302 ++++++++----------------------------------- 1 file changed, 53 insertions(+), 249 deletions(-) diff --git a/src/rendervulkan.cpp b/src/rendervulkan.cpp index 971e891..f2d2b5d 100644 --- a/src/rendervulkan.cpp +++ b/src/rendervulkan.cpp @@ -63,12 +63,8 @@ struct VulkanOutput_t std::shared_ptr tmpOutput; // NIS - VkSampler nisSampler; - VkImage nisScalerImage; - VkImageView nisScalerView; - VkImage nisUsmImage; - VkImageView nisUsmView; - VkDeviceMemory nisMemory; + std::shared_ptr nisScalerImage; + std::shared_ptr nisUsmImage; }; @@ -109,9 +105,6 @@ struct VulkanSamplerCacheKey_t } }; -// Needed for use in init_nis_data -VkSampler vulkan_make_sampler( VulkanSamplerCacheKey_t key ); - namespace std { template <> @@ -205,16 +198,23 @@ struct wsi_memory_allocate_info { bool implicit_sync; }; +// DRM doesn't have 32bit floating point formats, so add our own +#define DRM_FORMAT_ABGR32323232F fourcc_code('A', 'B', '8', 'F') + struct { uint32_t DRMFormat; VkFormat vkFormat; VkFormat vkFormatSrgb; + uint32_t bpp; bool bHasAlpha; + bool internal; } s_DRMVKFormatTable[] = { - { DRM_FORMAT_ARGB8888, VK_FORMAT_B8G8R8A8_UNORM, VK_FORMAT_B8G8R8A8_SRGB, true }, - { DRM_FORMAT_XRGB8888, VK_FORMAT_B8G8R8A8_UNORM, VK_FORMAT_B8G8R8A8_SRGB, false }, - { DRM_FORMAT_NV12, VK_FORMAT_G8_B8R8_2PLANE_420_UNORM, VK_FORMAT_G8_B8R8_2PLANE_420_UNORM, false }, - { DRM_FORMAT_INVALID, VK_FORMAT_UNDEFINED, VK_FORMAT_UNDEFINED, false }, + { DRM_FORMAT_ARGB8888, VK_FORMAT_B8G8R8A8_UNORM, VK_FORMAT_B8G8R8A8_SRGB, 4, true, false }, + { DRM_FORMAT_XRGB8888, VK_FORMAT_B8G8R8A8_UNORM, VK_FORMAT_B8G8R8A8_SRGB, 4, false, false }, + { DRM_FORMAT_NV12, VK_FORMAT_G8_B8R8_2PLANE_420_UNORM, VK_FORMAT_G8_B8R8_2PLANE_420_UNORM, 0, false, false }, + { DRM_FORMAT_ABGR16161616F, VK_FORMAT_R16G16B16A16_SFLOAT, VK_FORMAT_R16G16B16A16_SFLOAT, 8, true, true }, + { DRM_FORMAT_ABGR32323232F, VK_FORMAT_R32G32B32A32_SFLOAT, VK_FORMAT_R32G32B32A32_SFLOAT, 16,true, true }, + { DRM_FORMAT_INVALID, VK_FORMAT_UNDEFINED, VK_FORMAT_UNDEFINED, false, true }, }; static inline uint32_t VulkanFormatToDRM( VkFormat vkFormat ) @@ -256,6 +256,19 @@ static inline bool DRMFormatHasAlpha( uint32_t nDRMFormat ) return false; } +static inline uint32_t DRMFormatGetBPP( uint32_t nDRMFormat ) +{ + for ( int i = 0; s_DRMVKFormatTable[i].vkFormat != VK_FORMAT_UNDEFINED; i++ ) + { + if ( s_DRMVKFormatTable[i].DRMFormat == nDRMFormat ) + { + return s_DRMVKFormatTable[i].bpp; + } + } + + return false; +} + struct scratchCmdBuffer_t { VkCommandBuffer cmdBuf; @@ -2024,6 +2037,9 @@ bool vulkan_init_formats() { for ( size_t i = 0; s_DRMVKFormatTable[i].DRMFormat != DRM_FORMAT_INVALID; i++ ) { + if (s_DRMVKFormatTable[i].internal) + continue; + VkFormat format = s_DRMVKFormatTable[i].vkFormat; VkFormat srgbFormat = s_DRMVKFormatTable[i].vkFormatSrgb; uint32_t drmFormat = s_DRMVKFormatTable[i].DRMFormat; @@ -2344,246 +2360,41 @@ static void update_tmp_images( uint32_t width, uint32_t height ) } -static bool allocate_nis_memory() -{ - VkMemoryRequirements memRequirements = {}; - - vkGetImageMemoryRequirements(g_device.device(), g_output.nisScalerImage, &memRequirements); - uint32_t supportedMemoryTypes = memRequirements.memoryTypeBits; - VkDeviceSize memorySize = memRequirements.size; - - vkGetImageMemoryRequirements(g_device.device(), g_output.nisUsmImage, &memRequirements); - supportedMemoryTypes &= memRequirements.memoryTypeBits; - - VkDeviceSize usmOffset = memorySize + (memRequirements.alignment - (memorySize % memRequirements.alignment)); - memorySize += memRequirements.size + usmOffset; - - int32_t memTypeIndex = g_device.findMemoryType(VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, supportedMemoryTypes); - if ( memTypeIndex == -1 ) - { - vk_log.errorf( "findMemoryType failed" ); - return false; - } - - VkMemoryAllocateInfo allocInfo = {}; - allocInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; - allocInfo.allocationSize = memorySize; - allocInfo.memoryTypeIndex = memTypeIndex; - - vkAllocateMemory(g_device.device(), &allocInfo, nullptr, &g_output.nisMemory ); - - vkBindImageMemory(g_device.device(), g_output.nisScalerImage, g_output.nisMemory, 0); - vkBindImageMemory(g_device.device(), g_output.nisUsmImage, g_output.nisMemory, usmOffset); - - return true; -} - static bool init_nis_data() { - // Create NIS Sampler + // Create the NIS images + // Select between the FP16 or FP32 coefficients + + void* coefScaleData = g_device.supportsFp16() ? (void*) coef_scale_fp16 : (void*) coef_scale; + + void* coefUsmData = g_device.supportsFp16() ? (void*) coef_usm_fp16 : (void*) coef_usm; + + uint32_t nisFormat = g_device.supportsFp16() ? DRM_FORMAT_ABGR16161616F : DRM_FORMAT_ABGR32323232F; + + uint32_t width = kFilterSize / 4; + uint32_t height = kPhaseCount; + + g_output.nisScalerImage = vulkan_create_texture_from_bits( width, height, width, height, nisFormat, {}, coefScaleData ); + vkDeviceWaitIdle(g_device.device()); + g_output.nisUsmImage = vulkan_create_texture_from_bits( width, height, width, height, nisFormat, {}, coefUsmData ); + vkDeviceWaitIdle(g_device.device()); + + + // Write all the static NIS descriptor set bindings VulkanSamplerCacheKey_t samplerKey; samplerKey.bNearest = false; samplerKey.bUnnormalized = false; - g_output.nisSampler = g_device.sampler(samplerKey); - assert ( g_output.nisSampler != VK_NULL_HANDLE ); - - // Create the NIS images - - VkImageCreateInfo imageInfo = {}; - imageInfo.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; - imageInfo.imageType = VK_IMAGE_TYPE_2D; - imageInfo.format = g_device.supportsFp16() ? VK_FORMAT_R16G16B16A16_SFLOAT : VK_FORMAT_R32G32B32A32_SFLOAT; - imageInfo.extent.width = kFilterSize / 4; - imageInfo.extent.height = kPhaseCount; - imageInfo.extent.depth = 1; - imageInfo.mipLevels = 1; - imageInfo.arrayLayers = 1; - imageInfo.samples = VK_SAMPLE_COUNT_1_BIT; - imageInfo.tiling = VK_IMAGE_TILING_OPTIMAL; - imageInfo.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT; - imageInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - imageInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; - - VkResult result = vkCreateImage(g_device.device(), &imageInfo, NULL, &g_output.nisScalerImage); - if(result != VK_SUCCESS) - { - vk_log.errorf( "vkCreateImage for scaler coef image failed" ); - return false; - } - - result = vkCreateImage(g_device.device(), &imageInfo, NULL, &g_output.nisUsmImage); - if(result != VK_SUCCESS) - { - vk_log.errorf( "vkCreateImage for usm coef image failed" ); - return false; - } - - if (!allocate_nis_memory()) - return false; - - VkImageViewCreateInfo viewInfo = {}; - viewInfo.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; - viewInfo.image = g_output.nisScalerImage; - viewInfo.viewType = VK_IMAGE_VIEW_TYPE_2D; - viewInfo.format = g_device.supportsFp16() ? VK_FORMAT_R16G16B16A16_SFLOAT : VK_FORMAT_R32G32B32A32_SFLOAT;; - viewInfo.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - viewInfo.subresourceRange.baseMipLevel = 0; - viewInfo.subresourceRange.levelCount = 1; - viewInfo.subresourceRange.baseArrayLayer = 0; - viewInfo.subresourceRange.layerCount = 1; - - result = vkCreateImageView(g_device.device(), &viewInfo, NULL, &g_output.nisScalerView); - if(result != VK_SUCCESS) - { - vk_log.errorf( "vkCreateImageView for scaler coef view failed" ); - return false; - } - - viewInfo.image = g_output.nisUsmImage; - - result = vkCreateImageView(g_device.device(), &viewInfo, NULL, &g_output.nisUsmView); - if(result != VK_SUCCESS) - { - vk_log.errorf( "vkCreateImageView for scaler coef view failed" ); - return false; - } - - // Select between the FP16 or FP32 coefficients - - const void* coefScaleData = g_device.supportsFp16() ? - static_cast(coef_scale_fp16) : static_cast(coef_scale); - uint32_t coefScaleSize = g_device.supportsFp16() ? sizeof(coef_scale_fp16) : sizeof(coef_scale); - - const void* coefUsmData = g_device.supportsFp16() ? - static_cast(coef_usm_fp16) : static_cast(coef_usm); - uint32_t coefUsmSize = g_device.supportsFp16() ? sizeof(coef_usm_fp16) : sizeof(coef_usm); - - // Create a staging buffer for uploading the data to the GPU - - VkBufferCreateInfo bufferInfo = {}; - bufferInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; - bufferInfo.size = coefScaleSize + coefUsmSize; - bufferInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; - bufferInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - - VkBuffer stagingBuffer = VK_NULL_HANDLE; - result = vkCreateBuffer( g_device.device(), &bufferInfo, NULL, &stagingBuffer ); - if(result != VK_SUCCESS) - { - vk_log.errorf( "Failed to create nis staging buffer" ); - return false; - } - - VkMemoryRequirements memRequirements = {}; - vkGetBufferMemoryRequirements(g_device.device(), stagingBuffer, &memRequirements); - - int32_t memTypeIndex = g_device.findMemoryType(VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, memRequirements.memoryTypeBits); - if ( memTypeIndex == -1 ) - { - vk_log.errorf( "findMemoryType failed for nis staging buffer" ); - return false; - } - - VkMemoryAllocateInfo allocInfo = {}; - allocInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; - allocInfo.allocationSize = memRequirements.size; - allocInfo.memoryTypeIndex = memTypeIndex; - - VkDeviceMemory stagingMem = VK_NULL_HANDLE; - vkAllocateMemory( g_device.device(), &allocInfo, nullptr, &stagingMem ); - vkBindBufferMemory( g_device.device(), stagingBuffer, stagingMem, 0 ); - - void* stagingData = nullptr; - vkMapMemory(g_device.device(), stagingMem, 0, VK_WHOLE_SIZE, 0, &stagingData); - - // Copy the data into the staging buffer and upload it to GPU memory - - memcpy(stagingData, coefScaleData, coefScaleSize); - memcpy(static_cast(stagingData) + coefScaleSize, coefUsmData, coefUsmSize); - - VkCommandBuffer commandBuffer = VK_NULL_HANDLE; - uint32_t handle = g_device.commandBuffer( commandBuffer ); - - std::array barriers = {}; - - barriers[0].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - barriers[0].oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; - barriers[0].newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; - barriers[0].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barriers[0].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barriers[0].image = g_output.nisScalerImage; - barriers[0].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - barriers[0].subresourceRange.baseMipLevel = 0; - barriers[0].subresourceRange.levelCount = 1; - barriers[0].subresourceRange.baseArrayLayer = 0; - barriers[0].subresourceRange.layerCount = 1; - barriers[0].srcAccessMask = 0; - barriers[0].dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - - barriers[1] = barriers[0]; - barriers[1].image = g_output.nisUsmImage; - - vkCmdPipelineBarrier( - commandBuffer, - VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - VK_PIPELINE_STAGE_TRANSFER_BIT, - 0, - 0, NULL, - 0, NULL, - barriers.size(), barriers.data()); - - VkBufferImageCopy imageCopy = {}; - - imageCopy.bufferOffset = 0; - imageCopy.imageExtent.width = kFilterSize / 4; - imageCopy.imageExtent.height = kPhaseCount; - imageCopy.imageExtent.depth = 1; - imageCopy.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - imageCopy.imageSubresource.mipLevel = 0; - imageCopy.imageSubresource.baseArrayLayer = 0; - imageCopy.imageSubresource.layerCount = 1; - - vkCmdCopyBufferToImage(commandBuffer, stagingBuffer, g_output.nisScalerImage, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &imageCopy); - - imageCopy.bufferOffset = coefScaleSize; - - vkCmdCopyBufferToImage(commandBuffer, stagingBuffer, g_output.nisUsmImage, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &imageCopy); - - barriers[0].oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; - barriers[0].newLayout = VK_IMAGE_LAYOUT_GENERAL; - barriers[0].srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - barriers[0].dstAccessMask = 0; - - barriers[1].oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; - barriers[1].newLayout = VK_IMAGE_LAYOUT_GENERAL; - barriers[1].srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - barriers[1].dstAccessMask = 0; - - vkCmdPipelineBarrier( - commandBuffer, - VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, - 0, - 0, NULL, - 0, NULL, - barriers.size(), barriers.data()); - - std::vector> refs; - g_device.submitCommandBuffer( handle, refs ); - - // Write all the static NIS descriptor set bindings - std::array< VkDescriptorImageInfo, 2 > nisImageDescriptors = {{ { - .sampler = g_output.nisSampler, - .imageView = g_output.nisScalerView, + .sampler = g_device.sampler(samplerKey), + .imageView = g_output.nisScalerImage->linearView(), .imageLayout = VK_IMAGE_LAYOUT_GENERAL, }, { - .sampler = g_output.nisSampler, - .imageView = g_output.nisUsmView, + .sampler = g_device.sampler(samplerKey), + .imageView = g_output.nisUsmImage->linearView(), .imageLayout = VK_IMAGE_LAYOUT_GENERAL, } }}; @@ -2628,13 +2439,6 @@ static bool init_nis_data() currentSet = g_device.descriptorSet(); } while (currentSet != firstSet); - // Wait for the upload to finish and then cleanup - - vkQueueWaitIdle(g_device.queue()); - - vkDestroyBuffer(g_device.device(), stagingBuffer, nullptr); - vkFreeMemory(g_device.device(), stagingMem, nullptr); - return true; } @@ -2661,7 +2465,7 @@ std::shared_ptr vulkan_create_texture_from_bits( uint32_t width, if ( pTex->BInit( width, height, drmFormat, texCreateFlags, nullptr, contentWidth, contentHeight) == false ) return nullptr; - memcpy( g_device.uploadBufferData(), bits, width * height * 4 ); + memcpy( g_device.uploadBufferData(), bits, width * height * DRMFormatGetBPP(drmFormat) ); VkCommandBuffer commandBuffer; uint32_t handle = g_device.commandBuffer( commandBuffer );