Improve performance by using deko3d to do the framebuffer conversion on the GPU

This commit is contained in:
fincs 2020-11-11 17:00:25 +01:00
parent 244d058f1b
commit fcbc56acc4
No known key found for this signature in database
GPG Key ID: 62C7609ADA219C60
4 changed files with 156 additions and 8 deletions

View File

@ -63,7 +63,7 @@ CXXFLAGS := $(CFLAGS) -fno-rtti -fno-exceptions
ASFLAGS := -g $(ARCH)
LDFLAGS = -specs=$(DEVKITPRO)/libnx/switch.specs -g $(ARCH) -Wl,-Map,$(notdir $*.map)
LIBS := -lminizip `freetype-config --libs` -lconfig -lturbojpeg -lpng
LIBS := -ldeko3d -lminizip `freetype-config --libs` -lconfig -lturbojpeg -lpng
#---------------------------------------------------------------------------------
# list of directories containing libraries, this must be the top level containing

View File

@ -3,14 +3,13 @@
#include <stdio.h>
#include "../common/common.h"
#include "nx_graphics.h"
#include "nx_touch.h"
// Define the desired framebuffer resolution (here we set it to 720p).
#define FB_WIDTH 1280
#define FB_HEIGHT 720
Framebuffer g_framebufObj;
uint8_t* g_framebuf;
u32 g_framebuf_width;
@ -120,8 +119,7 @@ int main(int argc, char **argv)
if (errormsg[0]) error_screen = 1;
if (!error_screen) {
framebufferCreate(&g_framebufObj, nwindowGetDefault(), FB_WIDTH, FB_HEIGHT, PIXEL_FORMAT_RGBA_8888, 2);
framebufferMakeLinear(&g_framebufObj);
graphicsInit(FB_WIDTH, FB_HEIGHT);
}
else {
consoleInit(NULL);
@ -138,7 +136,7 @@ int main(int argc, char **argv)
if (!error_screen) {
if (!uiUpdate()) break;
g_framebuf = framebufferBegin(&g_framebufObj, &g_framebuf_width);
g_framebuf = graphicsFrameBegin(&g_framebuf_width);
#ifdef PERF_LOG
start_tick = armGetSystemTick();
#endif
@ -150,7 +148,7 @@ int main(int argc, char **argv)
}
if (!error_screen) {
framebufferEnd(&g_framebufObj);
graphicsFrameEnd();
#ifdef PERF_LOG
g_tickdiff_frame = armGetSystemTick() - start_tick;
@ -162,7 +160,7 @@ int main(int argc, char **argv)
}
if (!error_screen) {
framebufferClose(&g_framebufObj);
graphicsExit();
}
else {
consoleExit(NULL);

141
nx_main/nx_graphics.c Normal file
View File

@ -0,0 +1,141 @@
#include <switch.h>
#include <deko3d.h>
#include "nx_graphics.h"
#define FB_NUM 2
#define CMDMEMSIZE 0x1000
static u32 s_fbWidth, s_fbHeight;
static DkDevice s_device;
static DkMemBlock s_fbMemBlock, s_workMemBlock, s_cmdMemBlock;
static DkSwapchain s_swapchain;
static DkCmdBuf s_cmdBuf;
static DkCmdList s_cmdLists[FB_NUM];
static DkFence s_fence;
static DkQueue s_queue;
void graphicsInit(u32 width, u32 height)
{
DkImageLayoutMaker imgLayoutMaker;
DkMemBlockMaker memBlockMaker;
// Create the device, which is the root object
DkDeviceMaker deviceMaker;
dkDeviceMakerDefaults(&deviceMaker);
s_device = dkDeviceCreate(&deviceMaker);
// Calculate layout for the framebuffers
DkImageLayout fbLayout;
dkImageLayoutMakerDefaults(&imgLayoutMaker, s_device);
imgLayoutMaker.flags = DkImageFlags_UsagePresent;
imgLayoutMaker.format = DkImageFormat_RGBA8_Unorm;
imgLayoutMaker.dimensions[0] = s_fbWidth = width;
imgLayoutMaker.dimensions[1] = s_fbHeight = height;
dkImageLayoutInitialize(&fbLayout, &imgLayoutMaker);
// Retrieve necessary size and alignment for the framebuffers
uint32_t fbSize = dkImageLayoutGetSize(&fbLayout);
uint32_t fbAlign = dkImageLayoutGetAlignment(&fbLayout);
fbSize = (fbSize + fbAlign - 1) &~ (fbAlign - 1);
// Create a memory block that will host the framebuffers
dkMemBlockMakerDefaults(&memBlockMaker, s_device, FB_NUM*fbSize);
memBlockMaker.flags = DkMemBlockFlags_GpuCached | DkMemBlockFlags_Image;
s_fbMemBlock = dkMemBlockCreate(&memBlockMaker);
// Initialize the framebuffers with the layout and backing memory we've just created
DkImage fbImages[FB_NUM];
DkImage const* swapchainImages[FB_NUM];
for (unsigned i = 0; i < FB_NUM; i ++)
{
swapchainImages[i] = &fbImages[i];
dkImageInitialize(&fbImages[i], &fbLayout, s_fbMemBlock, i*fbSize);
}
// Create a swapchain out of the framebuffers we've just initialized
DkSwapchainMaker swapchainMaker;
dkSwapchainMakerDefaults(&swapchainMaker, s_device, nwindowGetDefault(), swapchainImages, FB_NUM);
s_swapchain = dkSwapchainCreate(&swapchainMaker);
// Create a memory block for the linear framebuffer
dkMemBlockMakerDefaults(&memBlockMaker, s_device, width*height*4);
memBlockMaker.flags = DkMemBlockFlags_CpuCached | DkMemBlockFlags_GpuUncached;
s_workMemBlock = dkMemBlockCreate(&memBlockMaker);
// Create a memory block for the command lists
dkMemBlockMakerDefaults(&memBlockMaker, s_device, CMDMEMSIZE);
memBlockMaker.flags = DkMemBlockFlags_CpuUncached | DkMemBlockFlags_GpuCached;
s_cmdMemBlock = dkMemBlockCreate(&memBlockMaker);
// Create a command buffer
DkCmdBufMaker cmdBufMaker;
dkCmdBufMakerDefaults(&cmdBufMaker, s_device);
s_cmdBuf = dkCmdBufCreate(&cmdBufMaker);
dkCmdBufAddMemory(s_cmdBuf, s_cmdMemBlock, 0, CMDMEMSIZE);
// Define source for linear framebuffer copies
const DkCopyBuf linearSrc = {
.addr = dkMemBlockGetGpuAddr(s_workMemBlock),
.rowLength = 0,
.imageHeight = 0,
};
// Define rectangle for the copies
const DkImageRect copyRect = {
.x = 0, .y = 0, .z = 0,
.width = width, .height = height, .depth = 1,
};
// Record command lists for the copies
for (unsigned i = 0; i < FB_NUM; i ++) {
DkImageView tiledDst;
dkImageViewDefaults(&tiledDst, &fbImages[i]);
dkCmdBufCopyBufferToImage(s_cmdBuf, &linearSrc, &tiledDst, &copyRect, 0);
dkCmdBufSignalFence(s_cmdBuf, &s_fence, false);
s_cmdLists[i] = dkCmdBufFinishList(s_cmdBuf);
}
// Create a queue, to which we will submit our command lists
DkQueueMaker queueMaker;
dkQueueMakerDefaults(&queueMaker, s_device);
queueMaker.flags = 0; // we will only use this queue for transferring
s_queue = dkQueueCreate(&queueMaker);
}
void graphicsExit(void)
{
// Make sure the queue is idle before destroying anything
dkQueueWaitIdle(s_queue);
// Destroy all the resources we've created
dkQueueDestroy(s_queue);
dkCmdBufDestroy(s_cmdBuf);
dkMemBlockDestroy(s_cmdMemBlock);
dkMemBlockDestroy(s_workMemBlock);
dkSwapchainDestroy(s_swapchain);
dkMemBlockDestroy(s_fbMemBlock);
dkDeviceDestroy(s_device);
}
void* graphicsFrameBegin(u32* out_stride)
{
// Ensure the GPU is not reading from the framebuffer
dkFenceWait(&s_fence, -1);
// Return information
if (out_stride) *out_stride = s_fbWidth*4;
return dkMemBlockGetCpuAddr(s_workMemBlock);
}
void graphicsFrameEnd(void)
{
// Flush the linear framebuffer
dkMemBlockFlushCpuCache(s_workMemBlock, 0, s_fbWidth*s_fbHeight*4);
// Present a frame
int slot = dkQueueAcquireImage(s_queue, s_swapchain);
dkQueueSubmitCommands(s_queue, s_cmdLists[slot]);
dkQueuePresentImage(s_queue, s_swapchain, slot);
}

9
nx_main/nx_graphics.h Normal file
View File

@ -0,0 +1,9 @@
#pragma once
#include <switch.h>
void graphicsInit(u32 width, u32 height);
void graphicsExit(void);
void* graphicsFrameBegin(u32* out_stride);
void graphicsFrameEnd(void);