diff --git a/nx/include/switch/nvidia/address_space.h b/nx/include/switch/nvidia/address_space.h
index e16e5077..be920b13 100644
--- a/nx/include/switch/nvidia/address_space.h
+++ b/nx/include/switch/nvidia/address_space.h
@@ -18,7 +18,7 @@ Result nvAddressSpaceReserveAlign(NvAddressSpace* a, NvPageSize align, u32 pages
 Result nvAddressSpaceReserveAtFixedAddr(NvAddressSpace* a, iova_t addr, u32 pages, NvPageSize page_sz);
 Result nvAddressSpaceReserveFull(NvAddressSpace* a);
 
-Result nvAddressSpaceMapBuffer(NvAddressSpace* a, u32 fd, NvKind kind, iova_t* iova_out);
+Result nvAddressSpaceMapBuffer(NvAddressSpace* a, u32 fd, u32 flags, NvKind kind, iova_t* iova_out);
 Result nvAddressSpaceUnmapBuffer(NvAddressSpace* a, iova_t iova);
 
 struct NvChannel;
diff --git a/nx/include/switch/nvidia/buffer.h b/nx/include/switch/nvidia/buffer.h
index ec5f4cd3..6a85b2dc 100644
--- a/nx/include/switch/nvidia/buffer.h
+++ b/nx/include/switch/nvidia/buffer.h
@@ -2,10 +2,6 @@
 #include "types.h"
 #include "address_space.h"
 
-typedef enum {
-    NvBufferFlags_Writable=1,
-} NvBufferFlags;
-
 typedef struct NvAddressSpace NvAddressSpace;
 
 typedef struct NvBuffer {
@@ -17,14 +13,14 @@ typedef struct NvBuffer {
     NvAddressSpace* addr_space;
     NvKind kind;
     bool   has_init;
+    bool   is_cacheable;
 } NvBuffer;
 
 Result nvBufferInit(void);
 u32    nvBufferGetNvmapFd(void);
 void   nvBufferExit(void);
 
-Result nvBufferCreate(NvBuffer* m, size_t size, u32 align, NvKind kind, NvAddressSpace* as);
-Result nvBufferCreateRw(NvBuffer* m, size_t size, u32 align, NvKind kind, NvAddressSpace* as);
+Result nvBufferCreate(NvBuffer* m, size_t size, u32 align, bool is_cacheable, NvKind kind, NvAddressSpace* as);
 void   nvBufferFree(NvBuffer* m);
 
 void*  nvBufferGetCpuAddr(NvBuffer* m);
@@ -32,7 +28,3 @@ iova_t nvBufferGetGpuAddr(NvBuffer* m);
 
 Result nvBufferMapAsTexture(NvBuffer* m, NvKind kind);
 iova_t nvBufferGetGpuAddrTexture(NvBuffer* m);
-
-Result nvBufferMakeCpuUncached(NvBuffer* m);
-Result nvBufferMakeCpuCached(NvBuffer* m);
-
diff --git a/nx/include/switch/nvidia/ioctl.h b/nx/include/switch/nvidia/ioctl.h
index a9d8fbb9..f4fe9186 100644
--- a/nx/include/switch/nvidia/ioctl.h
+++ b/nx/include/switch/nvidia/ioctl.h
@@ -134,7 +134,7 @@ typedef enum {
 // Used with nvioctlNvhostAsGpu_MapBufferEx().
 typedef enum {
     NvMapBufferFlags_FixedOffset = 1,
-    NvMapBufferFlags_IsCachable = 4,
+    NvMapBufferFlags_IsCacheable = 4,
 } NvMapBufferFlags;
 
 typedef enum {
diff --git a/nx/source/nvidia/address_space.c b/nx/source/nvidia/address_space.c
index 772a1e03..0d29eaaf 100644
--- a/nx/source/nvidia/address_space.c
+++ b/nx/source/nvidia/address_space.c
@@ -56,10 +56,10 @@ Result nvAddressSpaceReserveFull(NvAddressSpace* a) {
 }
 
 Result nvAddressSpaceMapBuffer(
-        NvAddressSpace* a, u32 fd, NvKind kind,
+        NvAddressSpace* a, u32 fd, u32 flags, NvKind kind,
         iova_t* iova_out) {
     return nvioctlNvhostAsGpu_MapBufferEx(
-        a->fd, NvMapBufferFlags_IsCachable, kind, fd, 0x10000, 0, 0, 0, iova_out);
+        a->fd, flags, kind, fd, 0x10000, 0, 0, 0, iova_out);
 }
 
 Result nvAddressSpaceUnmapBuffer(NvAddressSpace* a, iova_t iova) {
diff --git a/nx/source/nvidia/buffer.c b/nx/source/nvidia/buffer.c
index e1425737..af38dfb7 100644
--- a/nx/source/nvidia/buffer.c
+++ b/nx/source/nvidia/buffer.c
@@ -2,6 +2,7 @@
 #include "types.h"
 #include "result.h"
 #include "arm/atomics.h"
+#include "arm/cache.h"
 #include "kernel/svc.h"
 #include "services/nv.h"
 #include "nvidia/ioctl.h"
@@ -18,12 +19,12 @@ Result nvBufferInit(void)
     if (atomicIncrement64(&g_refCnt) > 0)
         return 0;
 
-     rc = nvOpen(&g_nvmap_fd, "/dev/nvmap");
+    rc = nvOpen(&g_nvmap_fd, "/dev/nvmap");
 
-     if (R_FAILED(rc))
-         atomicDecrement64(&g_refCnt);
+    if (R_FAILED(rc))
+        atomicDecrement64(&g_refCnt);
 
-     return rc;
+    return rc;
 }
 
 void nvBufferExit(void)
@@ -41,8 +42,8 @@ u32 nvBufferGetNvmapFd(void) {
     return g_nvmap_fd;
 }
 
-static Result _nvBufferCreate(
-    NvBuffer* m, size_t size, u32 flags, u32 align, NvKind kind,
+Result nvBufferCreate(
+    NvBuffer* m, size_t size, u32 align, bool is_cacheable, NvKind kind,
     NvAddressSpace* as)
 {
     Result rc;
@@ -50,6 +51,7 @@ static Result _nvBufferCreate(
     size = (size + align - 1) & ~(align - 1);
 
     m->has_init = true;
+    m->is_cacheable = is_cacheable;
     m->size = size;
     m->fd = -1;
     m->cpu_addr = memalign(align, size);
@@ -64,11 +66,17 @@ static Result _nvBufferCreate(
     rc = nvioctlNvmap_Create(g_nvmap_fd, size, &m->fd);
 
     if (R_SUCCEEDED(rc))
-        rc = nvioctlNvmap_Alloc(
-            g_nvmap_fd, m->fd, 0, flags, align, kind, m->cpu_addr);
+        rc = nvioctlNvmap_Alloc(g_nvmap_fd, m->fd,
+            0, is_cacheable ? 1 : 0, align, kind, m->cpu_addr);
+
+    if (R_SUCCEEDED(rc) && !is_cacheable) {
+        armDCacheFlush(m->cpu_addr, m->size);
+        svcSetMemoryAttribute(m->cpu_addr, m->size, 8, 8);
+    }
 
     if (R_SUCCEEDED(rc))
-        rc = nvAddressSpaceMapBuffer(as, m->fd, 0, &m->gpu_addr);
+        rc = nvAddressSpaceMapBuffer(as, m->fd,
+            is_cacheable ? NvMapBufferFlags_IsCacheable : 0, NvKind_Pitch, &m->gpu_addr);
 
     if (R_FAILED(rc))
         nvBufferFree(m);
@@ -76,24 +84,6 @@ static Result _nvBufferCreate(
     return rc;
 }
 
-Result nvBufferCreate(
-        NvBuffer* m, size_t size, u32 align, NvKind kind, NvAddressSpace* as) {
-    return _nvBufferCreate(m, size, 0, align, kind, as);
-}
-
-Result nvBufferCreateRw(
-        NvBuffer* m, size_t size, u32 align, NvKind kind, NvAddressSpace* as) {
-    return _nvBufferCreate(m, size, NvBufferFlags_Writable, align, kind, as);
-}
-
-Result nvBufferMakeCpuUncached(NvBuffer* m) {
-    return svcSetMemoryAttribute(m->cpu_addr, m->size, 8, 8);
-}
-
-Result nvBufferMakeCpuCached(NvBuffer* m) {
-    return svcSetMemoryAttribute(m->cpu_addr, m->size, 8, 0);
-}
-
 void nvBufferFree(NvBuffer* m)
 {
     if (!m->has_init)
@@ -115,7 +105,9 @@ void nvBufferFree(NvBuffer* m)
     }
 
     if (m->cpu_addr) {
-        nvBufferMakeCpuCached(m);
+        if (!m->is_cacheable)
+            svcSetMemoryAttribute(m->cpu_addr, m->size, 8, 0);
+
         free(m->cpu_addr);
         m->cpu_addr = NULL;
     }
@@ -132,7 +124,8 @@ iova_t nvBufferGetGpuAddr(NvBuffer* m) {
 }
 
 Result nvBufferMapAsTexture(NvBuffer* m, NvKind kind) {
-    return nvAddressSpaceMapBuffer(m->addr_space, m->fd, kind, &m->gpu_addr_texture);
+    return nvAddressSpaceMapBuffer(m->addr_space, m->fd,
+        m->is_cacheable ? NvMapBufferFlags_IsCacheable : 0, kind, &m->gpu_addr_texture);
 }
 
 iova_t nvBufferGetGpuAddrTexture(NvBuffer* m) {
diff --git a/nx/source/nvidia/gpu/cmd_list.c b/nx/source/nvidia/gpu/cmd_list.c
index e54f6df8..ff6fc50a 100644
--- a/nx/source/nvidia/gpu/cmd_list.c
+++ b/nx/source/nvidia/gpu/cmd_list.c
@@ -22,12 +22,10 @@ Result nvCmdListCreate(NvCmdList* c, NvGpu* parent, size_t max_cmds)
     Result rc;
 
     rc = nvBufferCreate(
-        &c->buffer, max_cmds * 4, 0x1000, NvKind_Pitch,
+        &c->buffer, max_cmds * 4, 0x1000, NvKind_Pitch, false,
         &parent->addr_space);
 
     if (R_SUCCEEDED(rc)) {
-        nvBufferMakeCpuUncached(&c->buffer);
-
         c->offset = 0;
         c->num_cmds = 0;
         c->max_cmds = max_cmds;
diff --git a/nx/source/nvidia/gpu/zcull_ctx.c b/nx/source/nvidia/gpu/zcull_ctx.c
index cbe9e603..7c3110df 100644
--- a/nx/source/nvidia/gpu/zcull_ctx.c
+++ b/nx/source/nvidia/gpu/zcull_ctx.c
@@ -24,13 +24,10 @@ Result nvZcullContextCreate(NvZcullContext* z, NvGpu* parent)
 
     z->parent = parent;
 
-    rc = nvBufferCreateRw(
-        &z->ctx_buf, nvInfoGetZcullCtxSize(), 0x20000, NvKind_Pitch,
+    rc = nvBufferCreate(
+        &z->ctx_buf, nvInfoGetZcullCtxSize(), 0x20000, NvKind_Pitch, true,
         &parent->addr_space);
 
-    if (R_SUCCEEDED(rc))
-        rc = nvBufferMapAsTexture(&z->ctx_buf, NvKind_Generic_16BX2);
-
     if (R_SUCCEEDED(rc))
         rc = nvioctlChannel_ZCullBind(
             parent->gpu_channel.fd, nvBufferGetGpuAddr(&z->ctx_buf),
@@ -40,6 +37,5 @@ Result nvZcullContextCreate(NvZcullContext* z, NvGpu* parent)
 }
 
 void nvZcullContextClose(NvZcullContext* z) {
-    // TODO: Unmap z->ctx_buf from parent->addr_space?
     nvBufferFree(&z->ctx_buf);
 }