NvGpfifo: Redesign API to allow for submitting multiple entries at once

2025-08-06 00:19:22 +02:00 · 2018-09-16 16:35:57 +02:00 · 2018-09-16 16:35:57 +02:00 · a50942509f
commit a50942509f
parent 91fb381461
2 changed files with 51 additions and 20 deletions
--- a/nx/include/switch/nvidia/gpu/gpfifo.h
+++ b/nx/include/switch/nvidia/gpu/gpfifo.h
@ -1,13 +1,19 @@
 #pragma once

+#define GPFIFO_QUEUE_SIZE        0x800
+#define GPFIFO_ENTRY_NOT_MAIN    BIT(9)
+#define GPFIFO_ENTRY_NO_PREFETCH BIT(31)
+
 typedef struct {
    NvChannel* parent;
+    u32 syncpt_id;
+    u32 num_entries;
+    nvioctl_gpfifo_entry entries[GPFIFO_QUEUE_SIZE];
 } NvGpfifo;

 Result nvGpfifoCreate(NvGpfifo* f, NvChannel* parent);
 void   nvGpfifoClose(NvGpfifo* f);

-#define NV_MAKE_GPFIFO_ENTRY(iova, size) \
-    ((iova) | (((u64)(size)) << 42))
-
-Result nvGpfifoSubmitCmdList(NvGpfifo* f, NvCmdList* cmd_list, u32 fence_incr, NvFence* fence_out);
+Result nvGpfifoAppendEntry(NvGpfifo* f, iova_t start, size_t num_cmds, u32 flags);
+Result nvGpfifoAppendCmdList(NvGpfifo* f, NvCmdList* cmd_list, u32 flags);
+Result nvGpfifoFlush(NvGpfifo* f, u32 fence_incr, NvFence* fence_out);
--- a/nx/source/nvidia/gpu/gpfifo.c
+++ b/nx/source/nvidia/gpu/gpfifo.c
@ -4,6 +4,7 @@
 #include "arm/atomics.h"
 #include "kernel/svc.h"
 #include "kernel/event.h"
+#include "kernel/detect.h"
 #include "services/nv.h"
 #include "nvidia/ioctl.h"
 #include "nvidia/buffer.h"
@ -18,17 +19,14 @@
 #include "nvidia/gpu/error_notifier.h"
 #include "nvidia/gpu/gpu.h"

-#define DEFAULT_FIFO_ENTRIES 0x800

 Result nvGpfifoCreate(NvGpfifo* f, NvChannel* parent)
 {
    f->parent = parent;

    NvFence fence;
-    Result res = nvioctlChannel_AllocGpfifoEx2(parent->fd, DEFAULT_FIFO_ENTRIES, 1, 0, 0, 0, 0, &fence);
-    //__builtin_printf("nvGpfifoCreate initial fence: %d %u\n", (int)fence.id, fence.value);
-    //if (R_SUCCEEDED(res) && (s32)fence.id >= 0)
-    //    nvFenceWait(&fence, -1);
+    Result res = nvioctlChannel_AllocGpfifoEx2(parent->fd, GPFIFO_QUEUE_SIZE, 1, 0, 0, 0, 0, &fence);
+    f->syncpt_id = fence.id;
    return res;
 }

@ -36,14 +34,39 @@ void nvGpfifoClose(NvGpfifo* f) {
    /**/
 }

-Result nvGpfifoSubmitCmdList(NvGpfifo* f, NvCmdList* cmd_list, u32 fence_incr, NvFence* fence_out)
+Result nvGpfifoAppendEntry(NvGpfifo* f, iova_t start, size_t num_cmds, u32 flags)
+{
+    if (f->num_entries >= GPFIFO_QUEUE_SIZE)
+        return MAKERESULT(Module_Libnx, LibnxError_OutOfMemory);
+
+    nvioctl_gpfifo_entry* entry = &f->entries[f->num_entries++];
+    entry->desc = start;
+    entry->desc32[1] |= flags | (num_cmds << 10);
+    return 0;
+}
+
+Result nvGpfifoAppendCmdList(NvGpfifo* f, NvCmdList* cmd_list, u32 flags)
+{
+    Result rc = nvGpfifoAppendEntry(f,
+        nvCmdListGetGpuAddr(cmd_list) + 4*cmd_list->offset,
+        cmd_list->num_cmds,
+        flags);
+
+    if (R_SUCCEEDED(rc)) {
+        cmd_list->offset += cmd_list->num_cmds;
+        cmd_list->num_cmds = 0;
+    }
+
+    return rc;
+}
+
+Result nvGpfifoFlush(NvGpfifo* f, u32 fence_incr, NvFence* fence_out)
 {
    Result rc;
-    nvioctl_gpfifo_entry ent;
    NvFence fence;

-    ent.desc = nvCmdListGetGpuAddr(cmd_list) + 4*cmd_list->offset;
-    ent.desc32[1] |= (2 << 8) | (nvCmdListGetListSize(cmd_list) << 10);
+    if (!f->num_entries)
+        return MAKERESULT(Module_Libnx, LibnxError_NotFound);

    fence.id = 0;
    fence.value = fence_incr;
@ -52,14 +75,16 @@ Result nvGpfifoSubmitCmdList(NvGpfifo* f, NvCmdList* cmd_list, u32 fence_incr, N
    if (fence_incr)
        flags |= BIT(8);

-    rc = nvioctlChannel_SubmitGpfifo(
-        f->parent->fd, &ent, 1, flags, &fence);
+    if (kernelAbove400())
+        rc = nvioctlChannel_KickoffPb(f->parent->fd, f->entries, f->num_entries, flags, &fence);
+    else
+        rc = nvioctlChannel_SubmitGpfifo(f->parent->fd, f->entries, f->num_entries, flags, &fence);

-    if (R_SUCCEEDED(rc) && fence_out)
-        *fence_out = fence;
-
-    cmd_list->offset += cmd_list->num_cmds;
-    cmd_list->num_cmds = 0;
+    if (R_SUCCEEDED(rc)) {
+        f->num_entries = 0;
+        if (fence_out)
+            *fence_out = fence;
+    }

    return rc;
 }