diff --git a/nx/include/switch/nvidia/gpu/cmd_list.h b/nx/include/switch/nvidia/gpu/cmd_list.h index 0a7e054b..8418d316 100644 --- a/nx/include/switch/nvidia/gpu/cmd_list.h +++ b/nx/include/switch/nvidia/gpu/cmd_list.h @@ -4,6 +4,7 @@ typedef struct NvGpu NvGpu; typedef struct { NvBuffer buffer; + size_t offset; size_t num_cmds; size_t max_cmds; NvGpu* parent; @@ -15,4 +16,5 @@ void nvCmdListClose(NvCmdList* c); iova_t nvCmdListGetGpuAddr(NvCmdList* c); u64 nvCmdListGetListSize(NvCmdList* c); +void nvCmdListReset(NvCmdList* c); u32* nvCmdListInsert(NvCmdList* c, size_t num_cmds); diff --git a/nx/include/switch/nvidia/gpu/gpfifo.h b/nx/include/switch/nvidia/gpu/gpfifo.h index ee1852b9..661071ec 100644 --- a/nx/include/switch/nvidia/gpu/gpfifo.h +++ b/nx/include/switch/nvidia/gpu/gpfifo.h @@ -10,4 +10,4 @@ void nvGpfifoClose(NvGpfifo* f); #define NV_MAKE_GPFIFO_ENTRY(iova, size) \ ((iova) | (((u64)(size)) << 42)) -Result nvGpfifoSubmit(NvGpfifo* f, NvCmdList* cmd_list, NvFence* fence_out); +Result nvGpfifoSubmitCmdList(NvGpfifo* f, NvCmdList* cmd_list, u32 fence_incr, NvFence* fence_out); diff --git a/nx/source/nvidia/gpu/cmd_list.c b/nx/source/nvidia/gpu/cmd_list.c index 811c55b1..e54f6df8 100644 --- a/nx/source/nvidia/gpu/cmd_list.c +++ b/nx/source/nvidia/gpu/cmd_list.c @@ -28,6 +28,7 @@ Result nvCmdListCreate(NvCmdList* c, NvGpu* parent, size_t max_cmds) if (R_SUCCEEDED(rc)) { nvBufferMakeCpuUncached(&c->buffer); + c->offset = 0; c->num_cmds = 0; c->max_cmds = max_cmds; c->parent = parent; @@ -48,14 +49,19 @@ u64 nvCmdListGetListSize(NvCmdList* c) { return c->num_cmds; } +void nvCmdListReset(NvCmdList* c) { + c->offset = 0; + c->num_cmds = 0; +} + u32* nvCmdListInsert(NvCmdList* c, size_t num_cmds) { // Has enough space? - if ((c->num_cmds + num_cmds) > c->max_cmds) + if ((c->offset + c->num_cmds + num_cmds) > c->max_cmds) return NULL; c->num_cmds += num_cmds; u32* list = (u32*) nvBufferGetCpuAddr(&c->buffer); - return &list[c->num_cmds - num_cmds]; + return &list[c->offset + c->num_cmds - num_cmds]; } diff --git a/nx/source/nvidia/gpu/gpfifo.c b/nx/source/nvidia/gpu/gpfifo.c index 04a0d35b..e69bc516 100644 --- a/nx/source/nvidia/gpu/gpfifo.c +++ b/nx/source/nvidia/gpu/gpfifo.c @@ -26,8 +26,9 @@ Result nvGpfifoCreate(NvGpfifo* f, NvChannel* parent) NvFence fence; Result res = nvioctlChannel_AllocGpfifoEx2(parent->fd, DEFAULT_FIFO_ENTRIES, 1, 0, 0, 0, 0, &fence); - if (R_SUCCEEDED(res) && (s32)fence.id >= 0) - nvFenceWait(&fence, -1); + //__builtin_printf("nvGpfifoCreate initial fence: %d %u\n", (int)fence.id, fence.value); + //if (R_SUCCEEDED(res) && (s32)fence.id >= 0) + // nvFenceWait(&fence, -1); return res; } @@ -35,23 +36,29 @@ void nvGpfifoClose(NvGpfifo* f) { /**/ } -Result nvGpfifoSubmit(NvGpfifo* f, NvCmdList* cmd_list, NvFence* fence_out) +Result nvGpfifoSubmitCmdList(NvGpfifo* f, NvCmdList* cmd_list, u32 fence_incr, NvFence* fence_out) { Result rc; nvioctl_gpfifo_entry ent; + NvFence fence; - u64 a = - nvCmdListGetGpuAddr(cmd_list) | (nvCmdListGetListSize(cmd_list) << 42); + ent.desc = nvCmdListGetGpuAddr(cmd_list) + 4*cmd_list->offset; + ent.desc32[1] |= (2 << 8) | (nvCmdListGetListSize(cmd_list) << 10); - ent.desc32[0] = a; - ent.desc32[1] = a >> 32; + fence.id = 0; + fence.value = fence_incr; - fence_out->id = -1; - fence_out->value = 0; + u32 flags = BIT(2); + if (fence_incr) + flags |= BIT(8); rc = nvioctlChannel_SubmitGpfifo( - f->parent->fd, &ent, 1, /*0x104*/0x104/*flags*/, fence_out); + f->parent->fd, &ent, 1, flags, &fence); + if (R_SUCCEEDED(rc) && fence_out) + *fence_out = fence; + + cmd_list->offset += cmd_list->num_cmds; cmd_list->num_cmds = 0; return rc;