Add framebufferMakeLinear - linear shadow buffer support

2025-08-06 16:19:25 +02:00 · 2018-12-13 23:56:41 +01:00 · 2018-12-13 23:56:41 +01:00 · f37518d848
commit f37518d848
parent dbc960a24f
2 changed files with 81 additions and 2 deletions
--- a/nx/include/switch/display/framebuffer.h
+++ b/nx/include/switch/display/framebuffer.h
@ -13,6 +13,7 @@ typedef struct Framebuffer {
    NWindow *win;
    NvMap map;
    void* buf;
+    void* buf_linear;
    u32 stride;
    u32 width_aligned;
    u32 height_aligned;
@ -29,6 +30,8 @@ typedef struct Framebuffer {
 * @param[in] height Desired height of the framebuffer (usually 720).
 * @param[in] format Desired pixel format (see PIXEL_FORMAT_* enum).
 * @param[in] num_fbs Number of buffers to create. Pass 1 for single-buffering, 2 for double-buffering or 3 for triple-buffering.
+ * @note Framebuffer images are stored in Tegra block linear format with 16Bx2 sector ordering, read TRM chapter 20.1 for more details.
+ *       In order to use regular linear layout, consider calling \ref framebufferMakeLinear after the \ref Framebuffer object is created.
 * @note Presently, only the following pixel formats are supported:
 *       \ref PIXEL_FORMAT_RGBA_8888
 *       \ref PIXEL_FORMAT_RGBX_8888
@ -38,24 +41,31 @@ typedef struct Framebuffer {
 */
 Result framebufferCreate(Framebuffer* fb, NWindow *win, u32 width, u32 height, u32 format, u32 num_fbs);

+/// Enables linear framebuffer mode in a \ref Framebuffer, allocating a shadow buffer in the process.
+Result framebufferMakeLinear(Framebuffer* fb);
+
 /// Closes a \ref Framebuffer object, freeing all resources associated with it.
 void framebufferClose(Framebuffer* fb);

 /**
- * @brief Begins rendering a frame in a Framebuffer.
+ * @brief Begins rendering a frame in a \ref Framebuffer.
 * @param[in] fb Pointer to \ref Framebuffer structure.
 * @param[out] out_stride Output variable containing the distance in bytes between rows of pixels in memory.
 * @return Pointer to buffer to which new graphics data should be written to.
 * @note When this function is called, a buffer will be dequeued from the corresponding \ref NWindow.
 * @note This function will return pointers to different buffers, depending on the number of buffers it was initialized with.
+ * @note If \ref framebufferMakeLinear was used, this function will instead return a pointer to the shadow linear buffer.
+ *       In this case, the offset of a pixel is \p y * \p out_stride + \p x * \p bytes_per_pixel.
 * @note Each call to \ref framebufferBegin must be paired with a \ref framebufferEnd call.
 */
 void* framebufferBegin(Framebuffer* fb, u32* out_stride);

 /**
- * @brief Finishes rendering a frame in a Framebuffer.
+ * @brief Finishes rendering a frame in a \ref Framebuffer.
 * @param[in] fb Pointer to \ref Framebuffer structure.
 * @note When this function is called, the written image data will be flushed and queued (presented) in the corresponding \ref NWindow.
+ * @note If \ref framebufferMakeLinear was used, this function will copy the image from the shadow linear buffer to the actual framebuffer,
+ *       converting it in the process to the layout expected by the compositor.
 * @note Each call to \ref framebufferBegin must be paired with a \ref framebufferEnd call.
 */
 void framebufferEnd(Framebuffer* fb);
--- a/nx/source/display/framebuffer.c
+++ b/nx/source/display/framebuffer.c
@ -111,11 +111,29 @@ Result framebufferCreate(Framebuffer* fb, NWindow *win, u32 width, u32 height, u
    return rc;
 }

+Result framebufferMakeLinear(Framebuffer* fb)
+{
+    if (!fb || !fb->has_init)
+        return MAKERESULT(Module_Libnx, LibnxError_NotInitialized);
+    if (fb->buf_linear)
+        return MAKERESULT(Module_Libnx, LibnxError_AlreadyInitialized);
+
+    u32 height = (fb->win->height + 7) &~ 7; // GOBs are 8 rows tall
+    fb->buf_linear = calloc(1, fb->stride*height);
+    if (!fb->buf_linear)
+        return MAKERESULT(Module_Libnx, LibnxError_OutOfMemory);
+
+    return 0;
+}
+
 void framebufferClose(Framebuffer* fb)
 {
    if (!fb || !fb->has_init)
        return;

+    if (fb->buf_linear)
+        free(fb->buf_linear);
+
    if (fb->buf) {
        nwindowReleaseBuffers(fb->win);
        nvMapClose(&fb->map);
@ -141,15 +159,66 @@ void* framebufferBegin(Framebuffer* fb, u32* out_stride)
    if (out_stride)
        *out_stride = fb->stride;

+    if (fb->buf_linear)
+        return fb->buf_linear;
+
    return (u8*)fb->buf + slot*fb->fb_size;
 }

+static void _convertGobTo16Bx2(u8* outgob, const u8* ingob, u32 stride)
+{
+    // GOB byte offsets can be expressed with 9 bits:
+    //   yyyxxxxxx  where 'x' is the horizontal position and 'y' is the vertical position
+    // 16Bx2 sector ordering basically applies swizzling to the upper 5 bits:
+    //   iiiiioooo  where 'o' doesn't change and 'i' gets swizzled
+    // This swizzling of the 'i' field can be expressed the following way:
+    //   43210 -> 14302  to go from unswizzled to swizzled offset
+    //   32041 <- 43210  to go from swizzled to unswizzled offset
+    // Here, we iterate through each of the 32 sequential swizzled positions and
+    // calculate the actual X and Y positions in the unswizzled source image.
+    // Since the 'o' bits aren't swizzled, we can copy the whole thing as a single 128-bit unit.
+
+    for (u32 i = 0; i < 32; i ++) {
+        const u32 y = ((i>>1)&0x06) | ( i    &0x01);
+        const u32 x = ((i<<3)&0x10) | ((i<<1)&0x20);
+        *(u128*)outgob = *(u128*)(ingob + y*stride + x);
+        outgob += sizeof(u128);
+    }
+}
+
+static void _convertToBlocklinear(void* outbuf, const void* inbuf, u32 stride, u32 height, u32 block_height_log2)
+{
+    const u32 block_height_gobs = 1U << block_height_log2;
+    const u32 block_height_px = 8U << block_height_log2;
+
+    const u32 width_blocks = stride >> 6;
+    const u32 height_blocks = (height + block_height_px - 1) >> (3 + block_height_log2);
+    u8* outgob = (u8*)outbuf;
+
+    for (u32 block_y = 0; block_y < height_blocks; block_y ++) {
+        for (u32 block_x = 0; block_x < width_blocks; block_x ++) {
+            for (u32 gob_y = 0; gob_y < block_height_gobs; gob_y ++) {
+                const u32 x = block_x*64;
+                const u32 y = block_y*block_height_px + gob_y*8;
+                if (y < height) {
+                    const u8* ingob = (u8*)inbuf + y*stride + x;
+                    _convertGobTo16Bx2(outgob, ingob, stride);
+                }
+                outgob += 512;
+            }
+        }
+    }
+}
+
 void framebufferEnd(Framebuffer* fb)
 {
    if (!fb->has_init)
        return;

    void* buf = (u8*)fb->buf + fb->win->cur_slot*fb->fb_size;
+    if (fb->buf_linear)
+        _convertToBlocklinear(buf, fb->buf_linear, fb->stride, fb->win->height, 4);
+
    armDCacheFlush(buf, fb->fb_size);

    Result rc = nwindowQueueBuffer(fb->win, fb->win->cur_slot, NULL);