From f37518d8484dea844d2384ae67a56cdd3f45622d Mon Sep 17 00:00:00 2001 From: fincs Date: Thu, 13 Dec 2018 23:56:41 +0100 Subject: [PATCH] Add framebufferMakeLinear - linear shadow buffer support --- nx/include/switch/display/framebuffer.h | 14 ++++- nx/source/display/framebuffer.c | 69 +++++++++++++++++++++++++ 2 files changed, 81 insertions(+), 2 deletions(-) diff --git a/nx/include/switch/display/framebuffer.h b/nx/include/switch/display/framebuffer.h index 8d766314..84255d9b 100644 --- a/nx/include/switch/display/framebuffer.h +++ b/nx/include/switch/display/framebuffer.h @@ -13,6 +13,7 @@ typedef struct Framebuffer { NWindow *win; NvMap map; void* buf; + void* buf_linear; u32 stride; u32 width_aligned; u32 height_aligned; @@ -29,6 +30,8 @@ typedef struct Framebuffer { * @param[in] height Desired height of the framebuffer (usually 720). * @param[in] format Desired pixel format (see PIXEL_FORMAT_* enum). * @param[in] num_fbs Number of buffers to create. Pass 1 for single-buffering, 2 for double-buffering or 3 for triple-buffering. + * @note Framebuffer images are stored in Tegra block linear format with 16Bx2 sector ordering, read TRM chapter 20.1 for more details. + * In order to use regular linear layout, consider calling \ref framebufferMakeLinear after the \ref Framebuffer object is created. * @note Presently, only the following pixel formats are supported: * \ref PIXEL_FORMAT_RGBA_8888 * \ref PIXEL_FORMAT_RGBX_8888 @@ -38,24 +41,31 @@ typedef struct Framebuffer { */ Result framebufferCreate(Framebuffer* fb, NWindow *win, u32 width, u32 height, u32 format, u32 num_fbs); +/// Enables linear framebuffer mode in a \ref Framebuffer, allocating a shadow buffer in the process. +Result framebufferMakeLinear(Framebuffer* fb); + /// Closes a \ref Framebuffer object, freeing all resources associated with it. void framebufferClose(Framebuffer* fb); /** - * @brief Begins rendering a frame in a Framebuffer. + * @brief Begins rendering a frame in a \ref Framebuffer. * @param[in] fb Pointer to \ref Framebuffer structure. * @param[out] out_stride Output variable containing the distance in bytes between rows of pixels in memory. * @return Pointer to buffer to which new graphics data should be written to. * @note When this function is called, a buffer will be dequeued from the corresponding \ref NWindow. * @note This function will return pointers to different buffers, depending on the number of buffers it was initialized with. + * @note If \ref framebufferMakeLinear was used, this function will instead return a pointer to the shadow linear buffer. + * In this case, the offset of a pixel is \p y * \p out_stride + \p x * \p bytes_per_pixel. * @note Each call to \ref framebufferBegin must be paired with a \ref framebufferEnd call. */ void* framebufferBegin(Framebuffer* fb, u32* out_stride); /** - * @brief Finishes rendering a frame in a Framebuffer. + * @brief Finishes rendering a frame in a \ref Framebuffer. * @param[in] fb Pointer to \ref Framebuffer structure. * @note When this function is called, the written image data will be flushed and queued (presented) in the corresponding \ref NWindow. + * @note If \ref framebufferMakeLinear was used, this function will copy the image from the shadow linear buffer to the actual framebuffer, + * converting it in the process to the layout expected by the compositor. * @note Each call to \ref framebufferBegin must be paired with a \ref framebufferEnd call. */ void framebufferEnd(Framebuffer* fb); diff --git a/nx/source/display/framebuffer.c b/nx/source/display/framebuffer.c index 84be30c6..d6a3dc23 100644 --- a/nx/source/display/framebuffer.c +++ b/nx/source/display/framebuffer.c @@ -111,11 +111,29 @@ Result framebufferCreate(Framebuffer* fb, NWindow *win, u32 width, u32 height, u return rc; } +Result framebufferMakeLinear(Framebuffer* fb) +{ + if (!fb || !fb->has_init) + return MAKERESULT(Module_Libnx, LibnxError_NotInitialized); + if (fb->buf_linear) + return MAKERESULT(Module_Libnx, LibnxError_AlreadyInitialized); + + u32 height = (fb->win->height + 7) &~ 7; // GOBs are 8 rows tall + fb->buf_linear = calloc(1, fb->stride*height); + if (!fb->buf_linear) + return MAKERESULT(Module_Libnx, LibnxError_OutOfMemory); + + return 0; +} + void framebufferClose(Framebuffer* fb) { if (!fb || !fb->has_init) return; + if (fb->buf_linear) + free(fb->buf_linear); + if (fb->buf) { nwindowReleaseBuffers(fb->win); nvMapClose(&fb->map); @@ -141,15 +159,66 @@ void* framebufferBegin(Framebuffer* fb, u32* out_stride) if (out_stride) *out_stride = fb->stride; + if (fb->buf_linear) + return fb->buf_linear; + return (u8*)fb->buf + slot*fb->fb_size; } +static void _convertGobTo16Bx2(u8* outgob, const u8* ingob, u32 stride) +{ + // GOB byte offsets can be expressed with 9 bits: + // yyyxxxxxx where 'x' is the horizontal position and 'y' is the vertical position + // 16Bx2 sector ordering basically applies swizzling to the upper 5 bits: + // iiiiioooo where 'o' doesn't change and 'i' gets swizzled + // This swizzling of the 'i' field can be expressed the following way: + // 43210 -> 14302 to go from unswizzled to swizzled offset + // 32041 <- 43210 to go from swizzled to unswizzled offset + // Here, we iterate through each of the 32 sequential swizzled positions and + // calculate the actual X and Y positions in the unswizzled source image. + // Since the 'o' bits aren't swizzled, we can copy the whole thing as a single 128-bit unit. + + for (u32 i = 0; i < 32; i ++) { + const u32 y = ((i>>1)&0x06) | ( i &0x01); + const u32 x = ((i<<3)&0x10) | ((i<<1)&0x20); + *(u128*)outgob = *(u128*)(ingob + y*stride + x); + outgob += sizeof(u128); + } +} + +static void _convertToBlocklinear(void* outbuf, const void* inbuf, u32 stride, u32 height, u32 block_height_log2) +{ + const u32 block_height_gobs = 1U << block_height_log2; + const u32 block_height_px = 8U << block_height_log2; + + const u32 width_blocks = stride >> 6; + const u32 height_blocks = (height + block_height_px - 1) >> (3 + block_height_log2); + u8* outgob = (u8*)outbuf; + + for (u32 block_y = 0; block_y < height_blocks; block_y ++) { + for (u32 block_x = 0; block_x < width_blocks; block_x ++) { + for (u32 gob_y = 0; gob_y < block_height_gobs; gob_y ++) { + const u32 x = block_x*64; + const u32 y = block_y*block_height_px + gob_y*8; + if (y < height) { + const u8* ingob = (u8*)inbuf + y*stride + x; + _convertGobTo16Bx2(outgob, ingob, stride); + } + outgob += 512; + } + } + } +} + void framebufferEnd(Framebuffer* fb) { if (!fb->has_init) return; void* buf = (u8*)fb->buf + fb->win->cur_slot*fb->fb_size; + if (fb->buf_linear) + _convertToBlocklinear(buf, fb->buf_linear, fb->stride, fb->win->height, 4); + armDCacheFlush(buf, fb->fb_size); Result rc = nwindowQueueBuffer(fb->win, fb->win->cur_slot, NULL);