Use vulkan_create_texture_from_bits for NIS images.
This commit is contained in:
parent
b406bba47e
commit
0a0a000a89
1 changed files with 53 additions and 249 deletions
|
@ -63,12 +63,8 @@ struct VulkanOutput_t
|
|||
std::shared_ptr<CVulkanTexture> tmpOutput;
|
||||
|
||||
// NIS
|
||||
VkSampler nisSampler;
|
||||
VkImage nisScalerImage;
|
||||
VkImageView nisScalerView;
|
||||
VkImage nisUsmImage;
|
||||
VkImageView nisUsmView;
|
||||
VkDeviceMemory nisMemory;
|
||||
std::shared_ptr<CVulkanTexture> nisScalerImage;
|
||||
std::shared_ptr<CVulkanTexture> nisUsmImage;
|
||||
};
|
||||
|
||||
|
||||
|
@ -109,9 +105,6 @@ struct VulkanSamplerCacheKey_t
|
|||
}
|
||||
};
|
||||
|
||||
// Needed for use in init_nis_data
|
||||
VkSampler vulkan_make_sampler( VulkanSamplerCacheKey_t key );
|
||||
|
||||
namespace std
|
||||
{
|
||||
template <>
|
||||
|
@ -205,16 +198,23 @@ struct wsi_memory_allocate_info {
|
|||
bool implicit_sync;
|
||||
};
|
||||
|
||||
// DRM doesn't have 32bit floating point formats, so add our own
|
||||
#define DRM_FORMAT_ABGR32323232F fourcc_code('A', 'B', '8', 'F')
|
||||
|
||||
struct {
|
||||
uint32_t DRMFormat;
|
||||
VkFormat vkFormat;
|
||||
VkFormat vkFormatSrgb;
|
||||
uint32_t bpp;
|
||||
bool bHasAlpha;
|
||||
bool internal;
|
||||
} s_DRMVKFormatTable[] = {
|
||||
{ DRM_FORMAT_ARGB8888, VK_FORMAT_B8G8R8A8_UNORM, VK_FORMAT_B8G8R8A8_SRGB, true },
|
||||
{ DRM_FORMAT_XRGB8888, VK_FORMAT_B8G8R8A8_UNORM, VK_FORMAT_B8G8R8A8_SRGB, false },
|
||||
{ DRM_FORMAT_NV12, VK_FORMAT_G8_B8R8_2PLANE_420_UNORM, VK_FORMAT_G8_B8R8_2PLANE_420_UNORM, false },
|
||||
{ DRM_FORMAT_INVALID, VK_FORMAT_UNDEFINED, VK_FORMAT_UNDEFINED, false },
|
||||
{ DRM_FORMAT_ARGB8888, VK_FORMAT_B8G8R8A8_UNORM, VK_FORMAT_B8G8R8A8_SRGB, 4, true, false },
|
||||
{ DRM_FORMAT_XRGB8888, VK_FORMAT_B8G8R8A8_UNORM, VK_FORMAT_B8G8R8A8_SRGB, 4, false, false },
|
||||
{ DRM_FORMAT_NV12, VK_FORMAT_G8_B8R8_2PLANE_420_UNORM, VK_FORMAT_G8_B8R8_2PLANE_420_UNORM, 0, false, false },
|
||||
{ DRM_FORMAT_ABGR16161616F, VK_FORMAT_R16G16B16A16_SFLOAT, VK_FORMAT_R16G16B16A16_SFLOAT, 8, true, true },
|
||||
{ DRM_FORMAT_ABGR32323232F, VK_FORMAT_R32G32B32A32_SFLOAT, VK_FORMAT_R32G32B32A32_SFLOAT, 16,true, true },
|
||||
{ DRM_FORMAT_INVALID, VK_FORMAT_UNDEFINED, VK_FORMAT_UNDEFINED, false, true },
|
||||
};
|
||||
|
||||
static inline uint32_t VulkanFormatToDRM( VkFormat vkFormat )
|
||||
|
@ -256,6 +256,19 @@ static inline bool DRMFormatHasAlpha( uint32_t nDRMFormat )
|
|||
return false;
|
||||
}
|
||||
|
||||
static inline uint32_t DRMFormatGetBPP( uint32_t nDRMFormat )
|
||||
{
|
||||
for ( int i = 0; s_DRMVKFormatTable[i].vkFormat != VK_FORMAT_UNDEFINED; i++ )
|
||||
{
|
||||
if ( s_DRMVKFormatTable[i].DRMFormat == nDRMFormat )
|
||||
{
|
||||
return s_DRMVKFormatTable[i].bpp;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
struct scratchCmdBuffer_t
|
||||
{
|
||||
VkCommandBuffer cmdBuf;
|
||||
|
@ -2024,6 +2037,9 @@ bool vulkan_init_formats()
|
|||
{
|
||||
for ( size_t i = 0; s_DRMVKFormatTable[i].DRMFormat != DRM_FORMAT_INVALID; i++ )
|
||||
{
|
||||
if (s_DRMVKFormatTable[i].internal)
|
||||
continue;
|
||||
|
||||
VkFormat format = s_DRMVKFormatTable[i].vkFormat;
|
||||
VkFormat srgbFormat = s_DRMVKFormatTable[i].vkFormatSrgb;
|
||||
uint32_t drmFormat = s_DRMVKFormatTable[i].DRMFormat;
|
||||
|
@ -2344,246 +2360,41 @@ static void update_tmp_images( uint32_t width, uint32_t height )
|
|||
}
|
||||
|
||||
|
||||
static bool allocate_nis_memory()
|
||||
{
|
||||
VkMemoryRequirements memRequirements = {};
|
||||
|
||||
vkGetImageMemoryRequirements(g_device.device(), g_output.nisScalerImage, &memRequirements);
|
||||
uint32_t supportedMemoryTypes = memRequirements.memoryTypeBits;
|
||||
VkDeviceSize memorySize = memRequirements.size;
|
||||
|
||||
vkGetImageMemoryRequirements(g_device.device(), g_output.nisUsmImage, &memRequirements);
|
||||
supportedMemoryTypes &= memRequirements.memoryTypeBits;
|
||||
|
||||
VkDeviceSize usmOffset = memorySize + (memRequirements.alignment - (memorySize % memRequirements.alignment));
|
||||
memorySize += memRequirements.size + usmOffset;
|
||||
|
||||
int32_t memTypeIndex = g_device.findMemoryType(VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, supportedMemoryTypes);
|
||||
if ( memTypeIndex == -1 )
|
||||
{
|
||||
vk_log.errorf( "findMemoryType failed" );
|
||||
return false;
|
||||
}
|
||||
|
||||
VkMemoryAllocateInfo allocInfo = {};
|
||||
allocInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
|
||||
allocInfo.allocationSize = memorySize;
|
||||
allocInfo.memoryTypeIndex = memTypeIndex;
|
||||
|
||||
vkAllocateMemory(g_device.device(), &allocInfo, nullptr, &g_output.nisMemory );
|
||||
|
||||
vkBindImageMemory(g_device.device(), g_output.nisScalerImage, g_output.nisMemory, 0);
|
||||
vkBindImageMemory(g_device.device(), g_output.nisUsmImage, g_output.nisMemory, usmOffset);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool init_nis_data()
|
||||
{
|
||||
// Create NIS Sampler
|
||||
// Create the NIS images
|
||||
// Select between the FP16 or FP32 coefficients
|
||||
|
||||
void* coefScaleData = g_device.supportsFp16() ? (void*) coef_scale_fp16 : (void*) coef_scale;
|
||||
|
||||
void* coefUsmData = g_device.supportsFp16() ? (void*) coef_usm_fp16 : (void*) coef_usm;
|
||||
|
||||
uint32_t nisFormat = g_device.supportsFp16() ? DRM_FORMAT_ABGR16161616F : DRM_FORMAT_ABGR32323232F;
|
||||
|
||||
uint32_t width = kFilterSize / 4;
|
||||
uint32_t height = kPhaseCount;
|
||||
|
||||
g_output.nisScalerImage = vulkan_create_texture_from_bits( width, height, width, height, nisFormat, {}, coefScaleData );
|
||||
vkDeviceWaitIdle(g_device.device());
|
||||
g_output.nisUsmImage = vulkan_create_texture_from_bits( width, height, width, height, nisFormat, {}, coefUsmData );
|
||||
vkDeviceWaitIdle(g_device.device());
|
||||
|
||||
|
||||
// Write all the static NIS descriptor set bindings
|
||||
|
||||
VulkanSamplerCacheKey_t samplerKey;
|
||||
samplerKey.bNearest = false;
|
||||
samplerKey.bUnnormalized = false;
|
||||
|
||||
g_output.nisSampler = g_device.sampler(samplerKey);
|
||||
assert ( g_output.nisSampler != VK_NULL_HANDLE );
|
||||
|
||||
// Create the NIS images
|
||||
|
||||
VkImageCreateInfo imageInfo = {};
|
||||
imageInfo.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
|
||||
imageInfo.imageType = VK_IMAGE_TYPE_2D;
|
||||
imageInfo.format = g_device.supportsFp16() ? VK_FORMAT_R16G16B16A16_SFLOAT : VK_FORMAT_R32G32B32A32_SFLOAT;
|
||||
imageInfo.extent.width = kFilterSize / 4;
|
||||
imageInfo.extent.height = kPhaseCount;
|
||||
imageInfo.extent.depth = 1;
|
||||
imageInfo.mipLevels = 1;
|
||||
imageInfo.arrayLayers = 1;
|
||||
imageInfo.samples = VK_SAMPLE_COUNT_1_BIT;
|
||||
imageInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
|
||||
imageInfo.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;
|
||||
imageInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
|
||||
imageInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
|
||||
|
||||
VkResult result = vkCreateImage(g_device.device(), &imageInfo, NULL, &g_output.nisScalerImage);
|
||||
if(result != VK_SUCCESS)
|
||||
{
|
||||
vk_log.errorf( "vkCreateImage for scaler coef image failed" );
|
||||
return false;
|
||||
}
|
||||
|
||||
result = vkCreateImage(g_device.device(), &imageInfo, NULL, &g_output.nisUsmImage);
|
||||
if(result != VK_SUCCESS)
|
||||
{
|
||||
vk_log.errorf( "vkCreateImage for usm coef image failed" );
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!allocate_nis_memory())
|
||||
return false;
|
||||
|
||||
VkImageViewCreateInfo viewInfo = {};
|
||||
viewInfo.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
|
||||
viewInfo.image = g_output.nisScalerImage;
|
||||
viewInfo.viewType = VK_IMAGE_VIEW_TYPE_2D;
|
||||
viewInfo.format = g_device.supportsFp16() ? VK_FORMAT_R16G16B16A16_SFLOAT : VK_FORMAT_R32G32B32A32_SFLOAT;;
|
||||
viewInfo.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
|
||||
viewInfo.subresourceRange.baseMipLevel = 0;
|
||||
viewInfo.subresourceRange.levelCount = 1;
|
||||
viewInfo.subresourceRange.baseArrayLayer = 0;
|
||||
viewInfo.subresourceRange.layerCount = 1;
|
||||
|
||||
result = vkCreateImageView(g_device.device(), &viewInfo, NULL, &g_output.nisScalerView);
|
||||
if(result != VK_SUCCESS)
|
||||
{
|
||||
vk_log.errorf( "vkCreateImageView for scaler coef view failed" );
|
||||
return false;
|
||||
}
|
||||
|
||||
viewInfo.image = g_output.nisUsmImage;
|
||||
|
||||
result = vkCreateImageView(g_device.device(), &viewInfo, NULL, &g_output.nisUsmView);
|
||||
if(result != VK_SUCCESS)
|
||||
{
|
||||
vk_log.errorf( "vkCreateImageView for scaler coef view failed" );
|
||||
return false;
|
||||
}
|
||||
|
||||
// Select between the FP16 or FP32 coefficients
|
||||
|
||||
const void* coefScaleData = g_device.supportsFp16() ?
|
||||
static_cast<const void*>(coef_scale_fp16) : static_cast<const void*>(coef_scale);
|
||||
uint32_t coefScaleSize = g_device.supportsFp16() ? sizeof(coef_scale_fp16) : sizeof(coef_scale);
|
||||
|
||||
const void* coefUsmData = g_device.supportsFp16() ?
|
||||
static_cast<const void*>(coef_usm_fp16) : static_cast<const void*>(coef_usm);
|
||||
uint32_t coefUsmSize = g_device.supportsFp16() ? sizeof(coef_usm_fp16) : sizeof(coef_usm);
|
||||
|
||||
// Create a staging buffer for uploading the data to the GPU
|
||||
|
||||
VkBufferCreateInfo bufferInfo = {};
|
||||
bufferInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
|
||||
bufferInfo.size = coefScaleSize + coefUsmSize;
|
||||
bufferInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
|
||||
bufferInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
|
||||
|
||||
VkBuffer stagingBuffer = VK_NULL_HANDLE;
|
||||
result = vkCreateBuffer( g_device.device(), &bufferInfo, NULL, &stagingBuffer );
|
||||
if(result != VK_SUCCESS)
|
||||
{
|
||||
vk_log.errorf( "Failed to create nis staging buffer" );
|
||||
return false;
|
||||
}
|
||||
|
||||
VkMemoryRequirements memRequirements = {};
|
||||
vkGetBufferMemoryRequirements(g_device.device(), stagingBuffer, &memRequirements);
|
||||
|
||||
int32_t memTypeIndex = g_device.findMemoryType(VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, memRequirements.memoryTypeBits);
|
||||
if ( memTypeIndex == -1 )
|
||||
{
|
||||
vk_log.errorf( "findMemoryType failed for nis staging buffer" );
|
||||
return false;
|
||||
}
|
||||
|
||||
VkMemoryAllocateInfo allocInfo = {};
|
||||
allocInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
|
||||
allocInfo.allocationSize = memRequirements.size;
|
||||
allocInfo.memoryTypeIndex = memTypeIndex;
|
||||
|
||||
VkDeviceMemory stagingMem = VK_NULL_HANDLE;
|
||||
vkAllocateMemory( g_device.device(), &allocInfo, nullptr, &stagingMem );
|
||||
vkBindBufferMemory( g_device.device(), stagingBuffer, stagingMem, 0 );
|
||||
|
||||
void* stagingData = nullptr;
|
||||
vkMapMemory(g_device.device(), stagingMem, 0, VK_WHOLE_SIZE, 0, &stagingData);
|
||||
|
||||
// Copy the data into the staging buffer and upload it to GPU memory
|
||||
|
||||
memcpy(stagingData, coefScaleData, coefScaleSize);
|
||||
memcpy(static_cast<char*>(stagingData) + coefScaleSize, coefUsmData, coefUsmSize);
|
||||
|
||||
VkCommandBuffer commandBuffer = VK_NULL_HANDLE;
|
||||
uint32_t handle = g_device.commandBuffer( commandBuffer );
|
||||
|
||||
std::array<VkImageMemoryBarrier, 2> barriers = {};
|
||||
|
||||
barriers[0].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
|
||||
barriers[0].oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
|
||||
barriers[0].newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
|
||||
barriers[0].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
barriers[0].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
barriers[0].image = g_output.nisScalerImage;
|
||||
barriers[0].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
|
||||
barriers[0].subresourceRange.baseMipLevel = 0;
|
||||
barriers[0].subresourceRange.levelCount = 1;
|
||||
barriers[0].subresourceRange.baseArrayLayer = 0;
|
||||
barriers[0].subresourceRange.layerCount = 1;
|
||||
barriers[0].srcAccessMask = 0;
|
||||
barriers[0].dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
|
||||
|
||||
barriers[1] = barriers[0];
|
||||
barriers[1].image = g_output.nisUsmImage;
|
||||
|
||||
vkCmdPipelineBarrier(
|
||||
commandBuffer,
|
||||
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
|
||||
VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
0,
|
||||
0, NULL,
|
||||
0, NULL,
|
||||
barriers.size(), barriers.data());
|
||||
|
||||
VkBufferImageCopy imageCopy = {};
|
||||
|
||||
imageCopy.bufferOffset = 0;
|
||||
imageCopy.imageExtent.width = kFilterSize / 4;
|
||||
imageCopy.imageExtent.height = kPhaseCount;
|
||||
imageCopy.imageExtent.depth = 1;
|
||||
imageCopy.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
|
||||
imageCopy.imageSubresource.mipLevel = 0;
|
||||
imageCopy.imageSubresource.baseArrayLayer = 0;
|
||||
imageCopy.imageSubresource.layerCount = 1;
|
||||
|
||||
vkCmdCopyBufferToImage(commandBuffer, stagingBuffer, g_output.nisScalerImage, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &imageCopy);
|
||||
|
||||
imageCopy.bufferOffset = coefScaleSize;
|
||||
|
||||
vkCmdCopyBufferToImage(commandBuffer, stagingBuffer, g_output.nisUsmImage, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &imageCopy);
|
||||
|
||||
barriers[0].oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
|
||||
barriers[0].newLayout = VK_IMAGE_LAYOUT_GENERAL;
|
||||
barriers[0].srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
|
||||
barriers[0].dstAccessMask = 0;
|
||||
|
||||
barriers[1].oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
|
||||
barriers[1].newLayout = VK_IMAGE_LAYOUT_GENERAL;
|
||||
barriers[1].srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
|
||||
barriers[1].dstAccessMask = 0;
|
||||
|
||||
vkCmdPipelineBarrier(
|
||||
commandBuffer,
|
||||
VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
|
||||
0,
|
||||
0, NULL,
|
||||
0, NULL,
|
||||
barriers.size(), barriers.data());
|
||||
|
||||
std::vector<std::shared_ptr<CVulkanTexture>> refs;
|
||||
g_device.submitCommandBuffer( handle, refs );
|
||||
|
||||
// Write all the static NIS descriptor set bindings
|
||||
|
||||
std::array< VkDescriptorImageInfo, 2 > nisImageDescriptors = {{
|
||||
{
|
||||
.sampler = g_output.nisSampler,
|
||||
.imageView = g_output.nisScalerView,
|
||||
.sampler = g_device.sampler(samplerKey),
|
||||
.imageView = g_output.nisScalerImage->linearView(),
|
||||
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
},
|
||||
{
|
||||
.sampler = g_output.nisSampler,
|
||||
.imageView = g_output.nisUsmView,
|
||||
.sampler = g_device.sampler(samplerKey),
|
||||
.imageView = g_output.nisUsmImage->linearView(),
|
||||
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
}
|
||||
}};
|
||||
|
@ -2628,13 +2439,6 @@ static bool init_nis_data()
|
|||
currentSet = g_device.descriptorSet();
|
||||
} while (currentSet != firstSet);
|
||||
|
||||
// Wait for the upload to finish and then cleanup
|
||||
|
||||
vkQueueWaitIdle(g_device.queue());
|
||||
|
||||
vkDestroyBuffer(g_device.device(), stagingBuffer, nullptr);
|
||||
vkFreeMemory(g_device.device(), stagingMem, nullptr);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -2661,7 +2465,7 @@ std::shared_ptr<CVulkanTexture> vulkan_create_texture_from_bits( uint32_t width,
|
|||
if ( pTex->BInit( width, height, drmFormat, texCreateFlags, nullptr, contentWidth, contentHeight) == false )
|
||||
return nullptr;
|
||||
|
||||
memcpy( g_device.uploadBufferData(), bits, width * height * 4 );
|
||||
memcpy( g_device.uploadBufferData(), bits, width * height * DRMFormatGetBPP(drmFormat) );
|
||||
|
||||
VkCommandBuffer commandBuffer;
|
||||
uint32_t handle = g_device.commandBuffer( commandBuffer );
|
||||
|
|
Loading…
Reference in a new issue