From 5ea56defc4c1b233a11642572e2cb8b7a5b20952 Mon Sep 17 00:00:00 2001 From: plutooo Date: Sun, 1 Apr 2018 16:48:52 +0200 Subject: [PATCH] Getting closer to GPU initialization --- nx/include/switch.h | 1 + nx/include/switch/nvidia/cmds/3d.h | 71 +++++++++++-- nx/include/switch/nvidia/gpu/cmd_list.h | 5 +- nx/source/nvidia/cmds/3d_clear.c | 14 +-- nx/source/nvidia/cmds/3d_init.c | 129 +++++++++++++++++++----- nx/source/nvidia/cmds/common.c | 7 +- 6 files changed, 186 insertions(+), 41 deletions(-) diff --git a/nx/include/switch.h b/nx/include/switch.h index 70a8db12..2181d9ee 100644 --- a/nx/include/switch.h +++ b/nx/include/switch.h @@ -68,6 +68,7 @@ extern "C" { #include "switch/nvidia/gpu/3d_ctx.h" #include "switch/nvidia/gpu/error_notifier.h" #include "switch/nvidia/gpu/gpu.h" +#include "switch/nvidia/cmds/vn.h" #include "switch/nvidia/cmds/common.h" #include "switch/nvidia/cmds/3d.h" #include "switch/nvidia/cmds/3d_init.h" diff --git a/nx/include/switch/nvidia/cmds/3d.h b/nx/include/switch/nvidia/cmds/3d.h index 29e0788d..798a23da 100644 --- a/nx/include/switch/nvidia/cmds/3d.h +++ b/nx/include/switch/nvidia/cmds/3d.h @@ -1,14 +1,29 @@ enum { + NvReg3D_LineWidthSeparate = 0x83, NvReg3D_LocalBase = 0x1df, NvReg3D_RenderTargetNAddr = 0x200, NvReg3D_RenderTargetNHorizontal = 0x202, NvReg3D_RenderTargetNVertical = 0x203, - NvReg3D_LineWidthSeparate = 0x203, NvReg3D_RenderTargetNFormat = 0x204, NvReg3D_RenderTargetNTileMode = 0x205, NvReg3D_RenderTargetNArrayMode = 0x206, NvReg3D_RenderTargetNLayerStride = 0x207, NvReg3D_RenderTargetNBaseLayer = 0x208, + + NvReg3D_ViewportNScaleX = 0x280, + NvReg3D_ViewportNScaleY = 0x281, + NvReg3D_ViewportNScaleZ = 0x282, + NvReg3D_ViewportNTranslateX = 0x283, + NvReg3D_ViewportNTranslateY = 0x284, + NvReg3D_ViewportNTranslateZ = 0x285, + NvReg3D_ViewportNSwizzles = 0x286, + NvReg3D_ViewportNSubpixelPrecisionBias = 0x287, + + NvReg3D_ViewportNHorizontal= 0x300, + NvReg3D_ViewportNVertical= 0x301, + NvReg3D_ViewportNDepthRangeNear= 0x302, + NvReg3D_ViewportNDepthRangeFar= 0x303, + NvReg3D_ClipRectNHorizontal = 0x340, NvReg3D_ClipRectNVertical = 0x341, NvReg3D_CallLimitLog = 0x359, @@ -23,6 +38,8 @@ enum { NvReg3D_ClearFlags = 0x43e, NvReg3D_RenderTargetControl = 0x487, NvReg3D_LinkedTsc = 0x48d, + NvReg3D_BlendIndependent = 0x4b9, + NvReg3D_ScreenHorizontalControl = 0x4be, NvReg3D_ZcullStatCtrsEnable = 0x547, NvReg3D_MultisampleEnable = 0x54d, NvReg3D_MultisampleControl = 0x54f, @@ -31,22 +48,64 @@ enum { NvReg3D_MultisampleCsaaEnable = 0x56d, NvReg3D_Layer = 0x573, NvReg3D_MultisampleMode = 0x574, + NvReg3D_EdgeFlag = 0x579, NvReg3D_VertexIdGenMode = 0x593, NvReg3D_PointRasterRules = 0x597, NvReg3D_ProvokingVertexLast = 0x5a1, NvReg3D_VertexStreamNEnableDivisor = 0x620, NvReg3D_ZcullTestMask = 0x65b, NvReg3D_ClearBufferTrigger = 0x674, - NvReg3D_TextureConstBufferIndex = 0x982, + NvReg3D_ViewportTransformEnable = 0x64b, + NvReg3D_ViewportControl = 0x64f, + + NvReg3D_ConstantBufferSize = 0x8e0, + NvReg3D_ConstantBufferAddr = 0x8e1, + NvReg3D_ConstantBufferLoadOffset = 0x8e3, + NvReg3D_ConstantBufferBufferN = 0x8e3, + NvReg3D_TextureConstantBufferIndex = 0x982, + NvReg3D_ConstantBufferBindN = 0x904, + NvReg3D_MmeShadowScratchN = 0xd00, }; +#define NvReg3D_ViewportScaleX(n) \ + (NvReg3D_ViewportNScaleX + 8*(n)) +#define NvReg3D_ViewportScaleY(n) \ + (NvReg3D_ViewportNScaleY + 8*(n)) +#define NvReg3D_ViewportScaleZ(n) \ + (NvReg3D_ViewportNScaleZ + 8*(n)) +#define NvReg3D_ViewportTranslateX(n) \ + (NvReg3D_ViewportNTranslateX + 8*(n)) +#define NvReg3D_ViewportTranslateY(n) \ + (NvReg3D_ViewportNTranslateY + 8*(n)) +#define NvReg3D_ViewportTranslateZ(n) \ + (NvReg3D_ViewportNTranslateZ + 8*(n)) +#define NvReg3D_ViewportSwizzles(n) \ + (NvReg3D_ViewportNSwizzles + 8*(n)) +#define NvReg3D_ViewportSubpixelPrecisionBias(n) \ + (NvReg3D_ViewportNSubpixelPrecisionBias + 8*(n)) + +#define NvReg3D_ViewportHorizontal(n) \ + (NvReg3D_ViewportNHorizontal + 4*(n)) +#define NvReg3D_ViewportVertical(n) \ + (NvReg3D_ViewportNVertical + 4*(n)) +#define NvReg3D_ViewportDepthRangeNear(n) \ + (NvReg3D_ViewportNDepthRangeNear + 4*(n)) +#define NvReg3D_ViewportDepthRangeFar(n) \ + (NvReg3D_ViewportNDepthRangeFar + 4*(n)) + #define NvReg3D_ClipRectHorizontal(n) \ - ((NvReg3D_ClipRectNHorizontal) + 2*(n)) + (NvReg3D_ClipRectNHorizontal + 2*(n)) #define NvReg3D_ClipRectVertical(n) \ - ((NvReg3D_ClipRectNVertical) + 2*(n)) + (NvReg3D_ClipRectNVertical + 2*(n)) #define NvReg3D_ScissorEnable(n) \ - ((NvReg3D_ScissorNEnable) + 4*(n)) + (NvReg3D_ScissorNEnable + 4*(n)) #define NvReg3D_VertexStreamEnableDivisor(n) \ - ((NvReg3D_VertexStreamNEnableDivisor) + (n)) + (NvReg3D_VertexStreamNEnableDivisor + (n)) + +#define NvReg3D_ConstantBufferBind(n) \ + (NvReg3D_ConstantBufferBindN + 8*(n)) + +#define NvReg3D_MmeShadowScratch(n) \ + (NvReg3D_MmeShadowScratchN + (n)) diff --git a/nx/include/switch/nvidia/gpu/cmd_list.h b/nx/include/switch/nvidia/gpu/cmd_list.h index 24b7bb72..4ec83e1b 100644 --- a/nx/include/switch/nvidia/gpu/cmd_list.h +++ b/nx/include/switch/nvidia/gpu/cmd_list.h @@ -13,12 +13,13 @@ iova_t nvCmdListGetGpuAddr(NvCmdList* c); u64 nvCmdListGetListSize(NvCmdList* c); u32* nvCmdListInsert(NvCmdList* c, size_t num_cmds); -#define NvCmd(cmd_list, ...) do { \ + +#define NvCmd(cmd_list, ...) do { \ u32 _[] = { __VA_ARGS__ }; \ memcpy(nvCmdListInsert(cmd_list, sizeof(_)/4), _, sizeof(_)); \ } while (0) -#define NvImm(subc, reg, val) \ +#define NvImm(subc, reg, val) \ (0x80000000 | (reg) | ((subc) << 13) | ((val) << 16)) #define NvRep(subc, reg, ...) \ diff --git a/nx/source/nvidia/cmds/3d_clear.c b/nx/source/nvidia/cmds/3d_clear.c index a2e0994c..5545f679 100644 --- a/nx/source/nvidia/cmds/3d_clear.c +++ b/nx/source/nvidia/cmds/3d_clear.c @@ -1,17 +1,17 @@ #include #include -void nvCmdsClearBuffer( - NvCmdList* cmds, NvBuffer* buf, u32 width, u32 height, float colors[4]) +void nvVnClearBuffer( + Vn* vn, NvBuffer* buf, u32 width, u32 height, float colors[4]) { - NvCmd(cmds, NvIncr(0, NvReg3D_ClearColor, + VnCmd(vn, NvIncr(0, NvReg3D_ClearColor, f2i(colors[0]), f2i(colors[1]), f2i(colors[2]), f2i(colors[3]))); - NvCmd(cmds, NvIncr(0, NvReg3D_ScreenScissorHorizontal, + VnCmd(vn, NvIncr(0, NvReg3D_ScreenScissorHorizontal, 0 | (0x100 << 16), 0 | (0x100 << 16))); - NvCmd(cmds, NvImm(0, NvReg3D_RenderTargetControl, 1)); // bit0 probably enables RT #0 + VnCmd(vn, NvImm(0, NvReg3D_RenderTargetControl, 1)); // bit0 probably enables RT #0 iova_t gpu_addr = nvBufferGetGpuAddr(buf); - NvCmd(cmds, + VnCmd(vn, NvIncr(NvReg3D_RenderTargetNAddr + 0x10*0, gpu_addr >> 32, gpu_addr, width, height, @@ -23,7 +23,7 @@ void nvCmdsClearBuffer( )); int z; for (z=0; z<32; z++) - NvCmd(cmds, NvImm(0, NvReg3D_ClearBufferTrigger, 0x3c | (z << 10))); + VnCmd(vn, NvImm(0, NvReg3D_ClearBufferTrigger, 0x3c | (z << 10))); /* TODO: IMMED_NVC0(push, NVC0_3D(ZETA_ENABLE), 0); diff --git a/nx/source/nvidia/cmds/3d_init.c b/nx/source/nvidia/cmds/3d_init.c index fe488c02..a92392f4 100644 --- a/nx/source/nvidia/cmds/3d_init.c +++ b/nx/source/nvidia/cmds/3d_init.c @@ -1,12 +1,13 @@ #include #include -void nvCmdsInit3D(NvCmdList* cmds) { - NvCmd( - cmds, +Result vnInit3D(Vn* vn) { + Result rc; + + VnCmd(vn, // ??? - NvIncr(0, 0xd1a, 0, 0xffffffff), - NvImm(0, 0xd19, 0), + NvIncr(0, NvReg3D_MmeShadowScratch(0x1A), 0, 0xffffffff), + NvImm(0, NvReg3D_MmeShadowScratch(0x19), 0), // Reset multisampling NvImm(0, NvReg3D_MultisampleEnable, 0), NvImm(0, NvReg3D_MultisampleCsaaEnable, 0), @@ -24,10 +25,11 @@ void nvCmdsInit3D(NvCmdList* cmds) { NvImm(0, NvReg3D_ClearFlags, 0x101)); size_t i; - for (i=0; i<16; i++) - NvCmd(cmds, NvImm(0, NvReg3D_ScissorEnable(i), 1)); + for (i=0; i<16; i++) { + VnCmd(vn, NvImm(0, NvReg3D_ScissorEnable(i), 1)); + } - NvCmd(cmds, NvImm(0, NvReg3D_PrimRestartWithDrawArrays, 1), + VnCmd(vn, NvImm(0, NvReg3D_PrimRestartWithDrawArrays, 1), NvImm(0, NvReg3D_PointRasterRules, 0), NvImm(0, NvReg3D_LinkedTsc, 0), NvImm(0, NvReg3D_ProvokingVertexLast, 1), @@ -46,11 +48,11 @@ void nvCmdsInit3D(NvCmdList* cmds) { NvImm(0, 0x584, 0xe)); for (i=0; i<16; i++) { - NvCmd(cmds, NvImm(0, NvReg3D_VertexStreamEnableDivisor(i), 0)); + VnCmd(vn, NvImm(0, NvReg3D_VertexStreamEnableDivisor(i), 0)); } - NvCmd( - cmds, + VnCmd( + vn, NvImm(0, NvReg3D_VertexIdGenMode, 0), NvImm(0, NvReg3D_ZcullStatCtrsEnable, 1), NvImm(0, NvReg3D_LineWidthSeparate, 1), @@ -61,7 +63,7 @@ void nvCmdsInit3D(NvCmdList* cmds) { NvImm(0, 0x670, 1), NvImm(0, 0x3e3, 0), NvImm(0, NvReg3D_StencilTwoSideEnable, 1), - NvImm(0, NvReg3D_TextureConstBufferIndex, 2), + NvImm(0, NvReg3D_TextureConstantBufferIndex, 2), NvImm(0, 0xc4, 0x503), NvIncr(0, NvReg3D_LocalBase, 0x01000000), NvImm(0, 0x44c, 0x13), @@ -73,29 +75,108 @@ void nvCmdsInit3D(NvCmdList* cmds) { NvImm(0, 0xa4, 0), NvImm(0, 0x221, 0x3f)); - // TODO: Call some macro shit (0xe16). + // TODO: Call macro_14f(0x00418800, 1, 1). + // TODO: Call macro_14f(0x00419a08, 0, 0x10). + // TODO: Call macro_14f(0x00419f78, 0, 8). + // TODO: Call macro_14f(0x00404468, 0x07ffffff, 0x3fffffff). + // TODO: Call macro_14f(0x00419a04, 1, 1). + // TODO: Call macro_14f(0x00419a04, 2, 2). - NvCmd( - cmds, + VnCmd( + vn, // Reset Zcull. - NvImm(0, NvReg3D_ZcullTestMask, 0), NvImm(0, 0x65a, 0x11), + NvImm(0, NvReg3D_ZcullTestMask, 0), NvImm(0, NvReg3D_ZcullRegion, 0), NvIncr(0, 0x054, 0x49000000, 0x49000001), - NvIncr(0, 0xd18, 0x05000500), + NvIncr(0, NvReg3D_MmeShadowScratch(0x18), 0x05000500) ); - // TODO: Call some macro shit (0xe34) + // TODO: Call macro_21d(5, 0x00050000, 0x67); - // TODO: Fill in NvReg3D_VertexRunoutAddr with a valid addr. + // TODO: Of what size is this buffer actually supposed to be? + rc = nvBufferCreateRw( + &vn->vertex_runout, 0x10000/*???*/, 0x1000, 0, &vn->parent->addr_space); - // TODO: Call some macro shit (0xe2a) + if (R_FAILED(rc)) + return rc; - // TODO: CB_DATA stuff + iova_t gpu_addr = nvBufferGetGpuAddr(&vn->vertex_runout); + VnCmd(vn, NvIncr(0, NvReg3D_VertexRunoutAddr, gpu_addr >> 32, gpu_addr)); - // TODO: Call some macro shit (0xe32) + // TODO: Call macro_206(0x194); - // TODO: CB_DATA stuff + // TODO: Write an addr(?) to low->0x8e4,high->0x8e5 + // TODO: Write 0 to 0x8e6, 0x8e7. - // TODO: CB_BIND stuff + // TODO: Call macro_226(5 /* addr_hi */, 0x00056900 /* addr_low */, 0x100) + // TODO: Call macro_226(5 /* addr_hi */, 0x00056A00 /* addr_low */, 0x800) + + // Bind all const buffers index 0 to same buffer (of size 0x5f00). + rc = nvBufferCreateRw( + &vn->const_buffer0, 0x5f00 + 5*0x200, 0x1000, 0, &vn->parent->addr_space); + + if (R_FAILED(rc)) + return rc; + + gpu_addr = nvBufferGetGpuAddr(&vn->const_buffer0); + + VnCmd(vn, + NvIncr( + 0, NvReg3D_ConstantBufferSize, + 0x5f00, + gpu_addr >> 32, gpu_addr + ) + ); + gpu_addr += 0x5f00; + + for (i=0; i<5; i++) { + VnCmd(vn, NvImm(0, NvReg3D_ConstantBufferBind(i), 1)); + } + + // Bind const buffer index 2 to differnet buffers (each of size 0x200). + for (i=0; i<5; i++) { + VnCmd(vn, + NvIncr(0, + NvReg3D_ConstantBufferSize, + 0x5f00, /* Size */ + gpu_addr >> 32, gpu_addr /* Addr */ + ), + NvImm(0, NvReg3D_ConstantBufferBind(i), 0x21), + NvIncrOnce( + 0, NvReg3D_ConstantBufferLoadOffset, 0, + 0,1,2,3,4,5,6,7 + ) + ); + gpu_addr += 0x200; + } + + VnCmd(vn, + NvImm(0, NvReg3D_BlendIndependent, 1), + NvImm(0, NvReg3D_EdgeFlag, 1), + NvImm(0, NvReg3D_ViewportTransformEnable, 1), + NvIncr(0, NvReg3D_ViewportControl, 0x181d) // ??? + ); + + // Reset all the viewports. + for (i=0; i<16; i++) { + VnCmd(vn, + NvIncr(0, NvReg3D_ViewportScaleX(i), + f2i(0.5), /* ScaleX */ + f2i(0.5), /* ScaleY */ + f2i(0.5), /* ScaleZ */ + f2i(0.5), /* TranslateX */ + f2i(0.5), /* TranslateY */ + f2i(0.5) /* TranslateZ */ + ), + NvIncr(0, NvReg3D_ViewportHorizontal(i), + 0 | (1<<16), /* Horizontal */ + 0 | (1<<16) /* Vertical */ + ) + ); + } + + VnCmd(vn, NvImm(0, NvReg3D_ScreenHorizontalControl, 0x10)); + + return 0; } diff --git a/nx/source/nvidia/cmds/common.c b/nx/source/nvidia/cmds/common.c index 2578a0e9..ad6cc6e3 100644 --- a/nx/source/nvidia/cmds/common.c +++ b/nx/source/nvidia/cmds/common.c @@ -1,8 +1,11 @@ #include #include -void nvCmdsInit(NvCmdList* cmds) { - NvCmd(cmds, +void vnCmdsInit(Vn* vn, NvGpu* parent) +{ + vn->parent = parent; + + VnCmd(vn, NvIncr(0, NvCmdCommon_BindObject, NvClassNumber_3D), NvIncr(1, NvCmdCommon_BindObject, NvClassNumber_Compute), NvIncr(2, NvCmdCommon_BindObject, NvClassNumber_Kepler),