From e7ae7ea8460bbe31eb2c4a668ea6d0fb6261997e Mon Sep 17 00:00:00 2001 From: fincs Date: Fri, 14 Sep 2018 18:09:58 +0200 Subject: [PATCH] nv wrappers: Handle cacheability settings properly, more details: - nvAddressSpaceMapBuffer now accepts a flags parameter instead of hardcoding NvMapBufferFlags_IsCacheable. - NvBufferFlags was incorrect and was thus removed. - nvBufferCreate/nvBufferCreateRw replaced with nvBufferCreate, with an extra 'is_cacheable' bool parameter. There's no such thing as a "read-only/read-write" buffer. - nvBufferMakeCpuUncached/nvBufferMakeCpuCached were removed. --- nx/include/switch/nvidia/address_space.h | 2 +- nx/include/switch/nvidia/buffer.h | 12 +----- nx/include/switch/nvidia/ioctl.h | 2 +- nx/source/nvidia/address_space.c | 4 +- nx/source/nvidia/buffer.c | 51 ++++++++++-------------- nx/source/nvidia/gpu/cmd_list.c | 4 +- nx/source/nvidia/gpu/zcull_ctx.c | 8 +--- 7 files changed, 31 insertions(+), 52 deletions(-) diff --git a/nx/include/switch/nvidia/address_space.h b/nx/include/switch/nvidia/address_space.h index e16e5077..be920b13 100644 --- a/nx/include/switch/nvidia/address_space.h +++ b/nx/include/switch/nvidia/address_space.h @@ -18,7 +18,7 @@ Result nvAddressSpaceReserveAlign(NvAddressSpace* a, NvPageSize align, u32 pages Result nvAddressSpaceReserveAtFixedAddr(NvAddressSpace* a, iova_t addr, u32 pages, NvPageSize page_sz); Result nvAddressSpaceReserveFull(NvAddressSpace* a); -Result nvAddressSpaceMapBuffer(NvAddressSpace* a, u32 fd, NvKind kind, iova_t* iova_out); +Result nvAddressSpaceMapBuffer(NvAddressSpace* a, u32 fd, u32 flags, NvKind kind, iova_t* iova_out); Result nvAddressSpaceUnmapBuffer(NvAddressSpace* a, iova_t iova); struct NvChannel; diff --git a/nx/include/switch/nvidia/buffer.h b/nx/include/switch/nvidia/buffer.h index ec5f4cd3..6a85b2dc 100644 --- a/nx/include/switch/nvidia/buffer.h +++ b/nx/include/switch/nvidia/buffer.h @@ -2,10 +2,6 @@ #include "types.h" #include "address_space.h" -typedef enum { - NvBufferFlags_Writable=1, -} NvBufferFlags; - typedef struct NvAddressSpace NvAddressSpace; typedef struct NvBuffer { @@ -17,14 +13,14 @@ typedef struct NvBuffer { NvAddressSpace* addr_space; NvKind kind; bool has_init; + bool is_cacheable; } NvBuffer; Result nvBufferInit(void); u32 nvBufferGetNvmapFd(void); void nvBufferExit(void); -Result nvBufferCreate(NvBuffer* m, size_t size, u32 align, NvKind kind, NvAddressSpace* as); -Result nvBufferCreateRw(NvBuffer* m, size_t size, u32 align, NvKind kind, NvAddressSpace* as); +Result nvBufferCreate(NvBuffer* m, size_t size, u32 align, bool is_cacheable, NvKind kind, NvAddressSpace* as); void nvBufferFree(NvBuffer* m); void* nvBufferGetCpuAddr(NvBuffer* m); @@ -32,7 +28,3 @@ iova_t nvBufferGetGpuAddr(NvBuffer* m); Result nvBufferMapAsTexture(NvBuffer* m, NvKind kind); iova_t nvBufferGetGpuAddrTexture(NvBuffer* m); - -Result nvBufferMakeCpuUncached(NvBuffer* m); -Result nvBufferMakeCpuCached(NvBuffer* m); - diff --git a/nx/include/switch/nvidia/ioctl.h b/nx/include/switch/nvidia/ioctl.h index a9d8fbb9..f4fe9186 100644 --- a/nx/include/switch/nvidia/ioctl.h +++ b/nx/include/switch/nvidia/ioctl.h @@ -134,7 +134,7 @@ typedef enum { // Used with nvioctlNvhostAsGpu_MapBufferEx(). typedef enum { NvMapBufferFlags_FixedOffset = 1, - NvMapBufferFlags_IsCachable = 4, + NvMapBufferFlags_IsCacheable = 4, } NvMapBufferFlags; typedef enum { diff --git a/nx/source/nvidia/address_space.c b/nx/source/nvidia/address_space.c index 772a1e03..0d29eaaf 100644 --- a/nx/source/nvidia/address_space.c +++ b/nx/source/nvidia/address_space.c @@ -56,10 +56,10 @@ Result nvAddressSpaceReserveFull(NvAddressSpace* a) { } Result nvAddressSpaceMapBuffer( - NvAddressSpace* a, u32 fd, NvKind kind, + NvAddressSpace* a, u32 fd, u32 flags, NvKind kind, iova_t* iova_out) { return nvioctlNvhostAsGpu_MapBufferEx( - a->fd, NvMapBufferFlags_IsCachable, kind, fd, 0x10000, 0, 0, 0, iova_out); + a->fd, flags, kind, fd, 0x10000, 0, 0, 0, iova_out); } Result nvAddressSpaceUnmapBuffer(NvAddressSpace* a, iova_t iova) { diff --git a/nx/source/nvidia/buffer.c b/nx/source/nvidia/buffer.c index e1425737..af38dfb7 100644 --- a/nx/source/nvidia/buffer.c +++ b/nx/source/nvidia/buffer.c @@ -2,6 +2,7 @@ #include "types.h" #include "result.h" #include "arm/atomics.h" +#include "arm/cache.h" #include "kernel/svc.h" #include "services/nv.h" #include "nvidia/ioctl.h" @@ -18,12 +19,12 @@ Result nvBufferInit(void) if (atomicIncrement64(&g_refCnt) > 0) return 0; - rc = nvOpen(&g_nvmap_fd, "/dev/nvmap"); + rc = nvOpen(&g_nvmap_fd, "/dev/nvmap"); - if (R_FAILED(rc)) - atomicDecrement64(&g_refCnt); + if (R_FAILED(rc)) + atomicDecrement64(&g_refCnt); - return rc; + return rc; } void nvBufferExit(void) @@ -41,8 +42,8 @@ u32 nvBufferGetNvmapFd(void) { return g_nvmap_fd; } -static Result _nvBufferCreate( - NvBuffer* m, size_t size, u32 flags, u32 align, NvKind kind, +Result nvBufferCreate( + NvBuffer* m, size_t size, u32 align, bool is_cacheable, NvKind kind, NvAddressSpace* as) { Result rc; @@ -50,6 +51,7 @@ static Result _nvBufferCreate( size = (size + align - 1) & ~(align - 1); m->has_init = true; + m->is_cacheable = is_cacheable; m->size = size; m->fd = -1; m->cpu_addr = memalign(align, size); @@ -64,11 +66,17 @@ static Result _nvBufferCreate( rc = nvioctlNvmap_Create(g_nvmap_fd, size, &m->fd); if (R_SUCCEEDED(rc)) - rc = nvioctlNvmap_Alloc( - g_nvmap_fd, m->fd, 0, flags, align, kind, m->cpu_addr); + rc = nvioctlNvmap_Alloc(g_nvmap_fd, m->fd, + 0, is_cacheable ? 1 : 0, align, kind, m->cpu_addr); + + if (R_SUCCEEDED(rc) && !is_cacheable) { + armDCacheFlush(m->cpu_addr, m->size); + svcSetMemoryAttribute(m->cpu_addr, m->size, 8, 8); + } if (R_SUCCEEDED(rc)) - rc = nvAddressSpaceMapBuffer(as, m->fd, 0, &m->gpu_addr); + rc = nvAddressSpaceMapBuffer(as, m->fd, + is_cacheable ? NvMapBufferFlags_IsCacheable : 0, NvKind_Pitch, &m->gpu_addr); if (R_FAILED(rc)) nvBufferFree(m); @@ -76,24 +84,6 @@ static Result _nvBufferCreate( return rc; } -Result nvBufferCreate( - NvBuffer* m, size_t size, u32 align, NvKind kind, NvAddressSpace* as) { - return _nvBufferCreate(m, size, 0, align, kind, as); -} - -Result nvBufferCreateRw( - NvBuffer* m, size_t size, u32 align, NvKind kind, NvAddressSpace* as) { - return _nvBufferCreate(m, size, NvBufferFlags_Writable, align, kind, as); -} - -Result nvBufferMakeCpuUncached(NvBuffer* m) { - return svcSetMemoryAttribute(m->cpu_addr, m->size, 8, 8); -} - -Result nvBufferMakeCpuCached(NvBuffer* m) { - return svcSetMemoryAttribute(m->cpu_addr, m->size, 8, 0); -} - void nvBufferFree(NvBuffer* m) { if (!m->has_init) @@ -115,7 +105,9 @@ void nvBufferFree(NvBuffer* m) } if (m->cpu_addr) { - nvBufferMakeCpuCached(m); + if (!m->is_cacheable) + svcSetMemoryAttribute(m->cpu_addr, m->size, 8, 0); + free(m->cpu_addr); m->cpu_addr = NULL; } @@ -132,7 +124,8 @@ iova_t nvBufferGetGpuAddr(NvBuffer* m) { } Result nvBufferMapAsTexture(NvBuffer* m, NvKind kind) { - return nvAddressSpaceMapBuffer(m->addr_space, m->fd, kind, &m->gpu_addr_texture); + return nvAddressSpaceMapBuffer(m->addr_space, m->fd, + m->is_cacheable ? NvMapBufferFlags_IsCacheable : 0, kind, &m->gpu_addr_texture); } iova_t nvBufferGetGpuAddrTexture(NvBuffer* m) { diff --git a/nx/source/nvidia/gpu/cmd_list.c b/nx/source/nvidia/gpu/cmd_list.c index e54f6df8..ff6fc50a 100644 --- a/nx/source/nvidia/gpu/cmd_list.c +++ b/nx/source/nvidia/gpu/cmd_list.c @@ -22,12 +22,10 @@ Result nvCmdListCreate(NvCmdList* c, NvGpu* parent, size_t max_cmds) Result rc; rc = nvBufferCreate( - &c->buffer, max_cmds * 4, 0x1000, NvKind_Pitch, + &c->buffer, max_cmds * 4, 0x1000, NvKind_Pitch, false, &parent->addr_space); if (R_SUCCEEDED(rc)) { - nvBufferMakeCpuUncached(&c->buffer); - c->offset = 0; c->num_cmds = 0; c->max_cmds = max_cmds; diff --git a/nx/source/nvidia/gpu/zcull_ctx.c b/nx/source/nvidia/gpu/zcull_ctx.c index cbe9e603..7c3110df 100644 --- a/nx/source/nvidia/gpu/zcull_ctx.c +++ b/nx/source/nvidia/gpu/zcull_ctx.c @@ -24,13 +24,10 @@ Result nvZcullContextCreate(NvZcullContext* z, NvGpu* parent) z->parent = parent; - rc = nvBufferCreateRw( - &z->ctx_buf, nvInfoGetZcullCtxSize(), 0x20000, NvKind_Pitch, + rc = nvBufferCreate( + &z->ctx_buf, nvInfoGetZcullCtxSize(), 0x20000, NvKind_Pitch, true, &parent->addr_space); - if (R_SUCCEEDED(rc)) - rc = nvBufferMapAsTexture(&z->ctx_buf, NvKind_Generic_16BX2); - if (R_SUCCEEDED(rc)) rc = nvioctlChannel_ZCullBind( parent->gpu_channel.fd, nvBufferGetGpuAddr(&z->ctx_buf), @@ -40,6 +37,5 @@ Result nvZcullContextCreate(NvZcullContext* z, NvGpu* parent) } void nvZcullContextClose(NvZcullContext* z) { - // TODO: Unmap z->ctx_buf from parent->addr_space? nvBufferFree(&z->ctx_buf); }