diff --git a/nx/include/switch/nvidia/gpu/gpfifo.h b/nx/include/switch/nvidia/gpu/gpfifo.h index 661071ec..1869737a 100644 --- a/nx/include/switch/nvidia/gpu/gpfifo.h +++ b/nx/include/switch/nvidia/gpu/gpfifo.h @@ -1,13 +1,19 @@ #pragma once +#define GPFIFO_QUEUE_SIZE 0x800 +#define GPFIFO_ENTRY_NOT_MAIN BIT(9) +#define GPFIFO_ENTRY_NO_PREFETCH BIT(31) + typedef struct { NvChannel* parent; + u32 syncpt_id; + u32 num_entries; + nvioctl_gpfifo_entry entries[GPFIFO_QUEUE_SIZE]; } NvGpfifo; Result nvGpfifoCreate(NvGpfifo* f, NvChannel* parent); void nvGpfifoClose(NvGpfifo* f); -#define NV_MAKE_GPFIFO_ENTRY(iova, size) \ - ((iova) | (((u64)(size)) << 42)) - -Result nvGpfifoSubmitCmdList(NvGpfifo* f, NvCmdList* cmd_list, u32 fence_incr, NvFence* fence_out); +Result nvGpfifoAppendEntry(NvGpfifo* f, iova_t start, size_t num_cmds, u32 flags); +Result nvGpfifoAppendCmdList(NvGpfifo* f, NvCmdList* cmd_list, u32 flags); +Result nvGpfifoFlush(NvGpfifo* f, u32 fence_incr, NvFence* fence_out); diff --git a/nx/source/nvidia/gpu/gpfifo.c b/nx/source/nvidia/gpu/gpfifo.c index e69bc516..86324449 100644 --- a/nx/source/nvidia/gpu/gpfifo.c +++ b/nx/source/nvidia/gpu/gpfifo.c @@ -4,6 +4,7 @@ #include "arm/atomics.h" #include "kernel/svc.h" #include "kernel/event.h" +#include "kernel/detect.h" #include "services/nv.h" #include "nvidia/ioctl.h" #include "nvidia/buffer.h" @@ -18,17 +19,14 @@ #include "nvidia/gpu/error_notifier.h" #include "nvidia/gpu/gpu.h" -#define DEFAULT_FIFO_ENTRIES 0x800 Result nvGpfifoCreate(NvGpfifo* f, NvChannel* parent) { f->parent = parent; NvFence fence; - Result res = nvioctlChannel_AllocGpfifoEx2(parent->fd, DEFAULT_FIFO_ENTRIES, 1, 0, 0, 0, 0, &fence); - //__builtin_printf("nvGpfifoCreate initial fence: %d %u\n", (int)fence.id, fence.value); - //if (R_SUCCEEDED(res) && (s32)fence.id >= 0) - // nvFenceWait(&fence, -1); + Result res = nvioctlChannel_AllocGpfifoEx2(parent->fd, GPFIFO_QUEUE_SIZE, 1, 0, 0, 0, 0, &fence); + f->syncpt_id = fence.id; return res; } @@ -36,14 +34,39 @@ void nvGpfifoClose(NvGpfifo* f) { /**/ } -Result nvGpfifoSubmitCmdList(NvGpfifo* f, NvCmdList* cmd_list, u32 fence_incr, NvFence* fence_out) +Result nvGpfifoAppendEntry(NvGpfifo* f, iova_t start, size_t num_cmds, u32 flags) +{ + if (f->num_entries >= GPFIFO_QUEUE_SIZE) + return MAKERESULT(Module_Libnx, LibnxError_OutOfMemory); + + nvioctl_gpfifo_entry* entry = &f->entries[f->num_entries++]; + entry->desc = start; + entry->desc32[1] |= flags | (num_cmds << 10); + return 0; +} + +Result nvGpfifoAppendCmdList(NvGpfifo* f, NvCmdList* cmd_list, u32 flags) +{ + Result rc = nvGpfifoAppendEntry(f, + nvCmdListGetGpuAddr(cmd_list) + 4*cmd_list->offset, + cmd_list->num_cmds, + flags); + + if (R_SUCCEEDED(rc)) { + cmd_list->offset += cmd_list->num_cmds; + cmd_list->num_cmds = 0; + } + + return rc; +} + +Result nvGpfifoFlush(NvGpfifo* f, u32 fence_incr, NvFence* fence_out) { Result rc; - nvioctl_gpfifo_entry ent; NvFence fence; - ent.desc = nvCmdListGetGpuAddr(cmd_list) + 4*cmd_list->offset; - ent.desc32[1] |= (2 << 8) | (nvCmdListGetListSize(cmd_list) << 10); + if (!f->num_entries) + return MAKERESULT(Module_Libnx, LibnxError_NotFound); fence.id = 0; fence.value = fence_incr; @@ -52,14 +75,16 @@ Result nvGpfifoSubmitCmdList(NvGpfifo* f, NvCmdList* cmd_list, u32 fence_incr, N if (fence_incr) flags |= BIT(8); - rc = nvioctlChannel_SubmitGpfifo( - f->parent->fd, &ent, 1, flags, &fence); + if (kernelAbove400()) + rc = nvioctlChannel_KickoffPb(f->parent->fd, f->entries, f->num_entries, flags, &fence); + else + rc = nvioctlChannel_SubmitGpfifo(f->parent->fd, f->entries, f->num_entries, flags, &fence); - if (R_SUCCEEDED(rc) && fence_out) - *fence_out = fence; - - cmd_list->offset += cmd_list->num_cmds; - cmd_list->num_cmds = 0; + if (R_SUCCEEDED(rc)) { + f->num_entries = 0; + if (fence_out) + *fence_out = fence; + } return rc; }