Getting closer to GPU initialization

This commit is contained in:
plutooo 2018-04-01 16:48:52 +02:00
parent e420e53bde
commit 5ea56defc4
6 changed files with 186 additions and 41 deletions

View File

@ -68,6 +68,7 @@ extern "C" {
#include "switch/nvidia/gpu/3d_ctx.h"
#include "switch/nvidia/gpu/error_notifier.h"
#include "switch/nvidia/gpu/gpu.h"
#include "switch/nvidia/cmds/vn.h"
#include "switch/nvidia/cmds/common.h"
#include "switch/nvidia/cmds/3d.h"
#include "switch/nvidia/cmds/3d_init.h"

View File

@ -1,14 +1,29 @@
enum {
NvReg3D_LineWidthSeparate = 0x83,
NvReg3D_LocalBase = 0x1df,
NvReg3D_RenderTargetNAddr = 0x200,
NvReg3D_RenderTargetNHorizontal = 0x202,
NvReg3D_RenderTargetNVertical = 0x203,
NvReg3D_LineWidthSeparate = 0x203,
NvReg3D_RenderTargetNFormat = 0x204,
NvReg3D_RenderTargetNTileMode = 0x205,
NvReg3D_RenderTargetNArrayMode = 0x206,
NvReg3D_RenderTargetNLayerStride = 0x207,
NvReg3D_RenderTargetNBaseLayer = 0x208,
NvReg3D_ViewportNScaleX = 0x280,
NvReg3D_ViewportNScaleY = 0x281,
NvReg3D_ViewportNScaleZ = 0x282,
NvReg3D_ViewportNTranslateX = 0x283,
NvReg3D_ViewportNTranslateY = 0x284,
NvReg3D_ViewportNTranslateZ = 0x285,
NvReg3D_ViewportNSwizzles = 0x286,
NvReg3D_ViewportNSubpixelPrecisionBias = 0x287,
NvReg3D_ViewportNHorizontal= 0x300,
NvReg3D_ViewportNVertical= 0x301,
NvReg3D_ViewportNDepthRangeNear= 0x302,
NvReg3D_ViewportNDepthRangeFar= 0x303,
NvReg3D_ClipRectNHorizontal = 0x340,
NvReg3D_ClipRectNVertical = 0x341,
NvReg3D_CallLimitLog = 0x359,
@ -23,6 +38,8 @@ enum {
NvReg3D_ClearFlags = 0x43e,
NvReg3D_RenderTargetControl = 0x487,
NvReg3D_LinkedTsc = 0x48d,
NvReg3D_BlendIndependent = 0x4b9,
NvReg3D_ScreenHorizontalControl = 0x4be,
NvReg3D_ZcullStatCtrsEnable = 0x547,
NvReg3D_MultisampleEnable = 0x54d,
NvReg3D_MultisampleControl = 0x54f,
@ -31,22 +48,64 @@ enum {
NvReg3D_MultisampleCsaaEnable = 0x56d,
NvReg3D_Layer = 0x573,
NvReg3D_MultisampleMode = 0x574,
NvReg3D_EdgeFlag = 0x579,
NvReg3D_VertexIdGenMode = 0x593,
NvReg3D_PointRasterRules = 0x597,
NvReg3D_ProvokingVertexLast = 0x5a1,
NvReg3D_VertexStreamNEnableDivisor = 0x620,
NvReg3D_ZcullTestMask = 0x65b,
NvReg3D_ClearBufferTrigger = 0x674,
NvReg3D_TextureConstBufferIndex = 0x982,
NvReg3D_ViewportTransformEnable = 0x64b,
NvReg3D_ViewportControl = 0x64f,
NvReg3D_ConstantBufferSize = 0x8e0,
NvReg3D_ConstantBufferAddr = 0x8e1,
NvReg3D_ConstantBufferLoadOffset = 0x8e3,
NvReg3D_ConstantBufferBufferN = 0x8e3,
NvReg3D_TextureConstantBufferIndex = 0x982,
NvReg3D_ConstantBufferBindN = 0x904,
NvReg3D_MmeShadowScratchN = 0xd00,
};
#define NvReg3D_ViewportScaleX(n) \
(NvReg3D_ViewportNScaleX + 8*(n))
#define NvReg3D_ViewportScaleY(n) \
(NvReg3D_ViewportNScaleY + 8*(n))
#define NvReg3D_ViewportScaleZ(n) \
(NvReg3D_ViewportNScaleZ + 8*(n))
#define NvReg3D_ViewportTranslateX(n) \
(NvReg3D_ViewportNTranslateX + 8*(n))
#define NvReg3D_ViewportTranslateY(n) \
(NvReg3D_ViewportNTranslateY + 8*(n))
#define NvReg3D_ViewportTranslateZ(n) \
(NvReg3D_ViewportNTranslateZ + 8*(n))
#define NvReg3D_ViewportSwizzles(n) \
(NvReg3D_ViewportNSwizzles + 8*(n))
#define NvReg3D_ViewportSubpixelPrecisionBias(n) \
(NvReg3D_ViewportNSubpixelPrecisionBias + 8*(n))
#define NvReg3D_ViewportHorizontal(n) \
(NvReg3D_ViewportNHorizontal + 4*(n))
#define NvReg3D_ViewportVertical(n) \
(NvReg3D_ViewportNVertical + 4*(n))
#define NvReg3D_ViewportDepthRangeNear(n) \
(NvReg3D_ViewportNDepthRangeNear + 4*(n))
#define NvReg3D_ViewportDepthRangeFar(n) \
(NvReg3D_ViewportNDepthRangeFar + 4*(n))
#define NvReg3D_ClipRectHorizontal(n) \
((NvReg3D_ClipRectNHorizontal) + 2*(n))
(NvReg3D_ClipRectNHorizontal + 2*(n))
#define NvReg3D_ClipRectVertical(n) \
((NvReg3D_ClipRectNVertical) + 2*(n))
(NvReg3D_ClipRectNVertical + 2*(n))
#define NvReg3D_ScissorEnable(n) \
((NvReg3D_ScissorNEnable) + 4*(n))
(NvReg3D_ScissorNEnable + 4*(n))
#define NvReg3D_VertexStreamEnableDivisor(n) \
((NvReg3D_VertexStreamNEnableDivisor) + (n))
(NvReg3D_VertexStreamNEnableDivisor + (n))
#define NvReg3D_ConstantBufferBind(n) \
(NvReg3D_ConstantBufferBindN + 8*(n))
#define NvReg3D_MmeShadowScratch(n) \
(NvReg3D_MmeShadowScratchN + (n))

View File

@ -13,6 +13,7 @@ iova_t nvCmdListGetGpuAddr(NvCmdList* c);
u64 nvCmdListGetListSize(NvCmdList* c);
u32* nvCmdListInsert(NvCmdList* c, size_t num_cmds);
#define NvCmd(cmd_list, ...) do { \
u32 _[] = { __VA_ARGS__ }; \
memcpy(nvCmdListInsert(cmd_list, sizeof(_)/4), _, sizeof(_)); \

View File

@ -1,17 +1,17 @@
#include <switch.h>
#include <string.h>
void nvCmdsClearBuffer(
NvCmdList* cmds, NvBuffer* buf, u32 width, u32 height, float colors[4])
void nvVnClearBuffer(
Vn* vn, NvBuffer* buf, u32 width, u32 height, float colors[4])
{
NvCmd(cmds, NvIncr(0, NvReg3D_ClearColor,
VnCmd(vn, NvIncr(0, NvReg3D_ClearColor,
f2i(colors[0]), f2i(colors[1]), f2i(colors[2]), f2i(colors[3])));
NvCmd(cmds, NvIncr(0, NvReg3D_ScreenScissorHorizontal,
VnCmd(vn, NvIncr(0, NvReg3D_ScreenScissorHorizontal,
0 | (0x100 << 16), 0 | (0x100 << 16)));
NvCmd(cmds, NvImm(0, NvReg3D_RenderTargetControl, 1)); // bit0 probably enables RT #0
VnCmd(vn, NvImm(0, NvReg3D_RenderTargetControl, 1)); // bit0 probably enables RT #0
iova_t gpu_addr = nvBufferGetGpuAddr(buf);
NvCmd(cmds,
VnCmd(vn,
NvIncr(NvReg3D_RenderTargetNAddr + 0x10*0,
gpu_addr >> 32, gpu_addr,
width, height,
@ -23,7 +23,7 @@ void nvCmdsClearBuffer(
));
int z;
for (z=0; z<32; z++)
NvCmd(cmds, NvImm(0, NvReg3D_ClearBufferTrigger, 0x3c | (z << 10)));
VnCmd(vn, NvImm(0, NvReg3D_ClearBufferTrigger, 0x3c | (z << 10)));
/*
TODO:
IMMED_NVC0(push, NVC0_3D(ZETA_ENABLE), 0);

View File

@ -1,12 +1,13 @@
#include <switch.h>
#include <string.h>
void nvCmdsInit3D(NvCmdList* cmds) {
NvCmd(
cmds,
Result vnInit3D(Vn* vn) {
Result rc;
VnCmd(vn,
// ???
NvIncr(0, 0xd1a, 0, 0xffffffff),
NvImm(0, 0xd19, 0),
NvIncr(0, NvReg3D_MmeShadowScratch(0x1A), 0, 0xffffffff),
NvImm(0, NvReg3D_MmeShadowScratch(0x19), 0),
// Reset multisampling
NvImm(0, NvReg3D_MultisampleEnable, 0),
NvImm(0, NvReg3D_MultisampleCsaaEnable, 0),
@ -24,10 +25,11 @@ void nvCmdsInit3D(NvCmdList* cmds) {
NvImm(0, NvReg3D_ClearFlags, 0x101));
size_t i;
for (i=0; i<16; i++)
NvCmd(cmds, NvImm(0, NvReg3D_ScissorEnable(i), 1));
for (i=0; i<16; i++) {
VnCmd(vn, NvImm(0, NvReg3D_ScissorEnable(i), 1));
}
NvCmd(cmds, NvImm(0, NvReg3D_PrimRestartWithDrawArrays, 1),
VnCmd(vn, NvImm(0, NvReg3D_PrimRestartWithDrawArrays, 1),
NvImm(0, NvReg3D_PointRasterRules, 0),
NvImm(0, NvReg3D_LinkedTsc, 0),
NvImm(0, NvReg3D_ProvokingVertexLast, 1),
@ -46,11 +48,11 @@ void nvCmdsInit3D(NvCmdList* cmds) {
NvImm(0, 0x584, 0xe));
for (i=0; i<16; i++) {
NvCmd(cmds, NvImm(0, NvReg3D_VertexStreamEnableDivisor(i), 0));
VnCmd(vn, NvImm(0, NvReg3D_VertexStreamEnableDivisor(i), 0));
}
NvCmd(
cmds,
VnCmd(
vn,
NvImm(0, NvReg3D_VertexIdGenMode, 0),
NvImm(0, NvReg3D_ZcullStatCtrsEnable, 1),
NvImm(0, NvReg3D_LineWidthSeparate, 1),
@ -61,7 +63,7 @@ void nvCmdsInit3D(NvCmdList* cmds) {
NvImm(0, 0x670, 1),
NvImm(0, 0x3e3, 0),
NvImm(0, NvReg3D_StencilTwoSideEnable, 1),
NvImm(0, NvReg3D_TextureConstBufferIndex, 2),
NvImm(0, NvReg3D_TextureConstantBufferIndex, 2),
NvImm(0, 0xc4, 0x503),
NvIncr(0, NvReg3D_LocalBase, 0x01000000),
NvImm(0, 0x44c, 0x13),
@ -73,29 +75,108 @@ void nvCmdsInit3D(NvCmdList* cmds) {
NvImm(0, 0xa4, 0),
NvImm(0, 0x221, 0x3f));
// TODO: Call some macro shit (0xe16).
// TODO: Call macro_14f(0x00418800, 1, 1).
// TODO: Call macro_14f(0x00419a08, 0, 0x10).
// TODO: Call macro_14f(0x00419f78, 0, 8).
// TODO: Call macro_14f(0x00404468, 0x07ffffff, 0x3fffffff).
// TODO: Call macro_14f(0x00419a04, 1, 1).
// TODO: Call macro_14f(0x00419a04, 2, 2).
NvCmd(
cmds,
VnCmd(
vn,
// Reset Zcull.
NvImm(0, NvReg3D_ZcullTestMask, 0),
NvImm(0, 0x65a, 0x11),
NvImm(0, NvReg3D_ZcullTestMask, 0),
NvImm(0, NvReg3D_ZcullRegion, 0),
NvIncr(0, 0x054, 0x49000000, 0x49000001),
NvIncr(0, 0xd18, 0x05000500),
NvIncr(0, NvReg3D_MmeShadowScratch(0x18), 0x05000500)
);
// TODO: Call some macro shit (0xe34)
// TODO: Call macro_21d(5, 0x00050000, 0x67);
// TODO: Fill in NvReg3D_VertexRunoutAddr with a valid addr.
// TODO: Of what size is this buffer actually supposed to be?
rc = nvBufferCreateRw(
&vn->vertex_runout, 0x10000/*???*/, 0x1000, 0, &vn->parent->addr_space);
// TODO: Call some macro shit (0xe2a)
if (R_FAILED(rc))
return rc;
// TODO: CB_DATA stuff
iova_t gpu_addr = nvBufferGetGpuAddr(&vn->vertex_runout);
VnCmd(vn, NvIncr(0, NvReg3D_VertexRunoutAddr, gpu_addr >> 32, gpu_addr));
// TODO: Call some macro shit (0xe32)
// TODO: Call macro_206(0x194);
// TODO: CB_DATA stuff
// TODO: Write an addr(?) to low->0x8e4,high->0x8e5
// TODO: Write 0 to 0x8e6, 0x8e7.
// TODO: CB_BIND stuff
// TODO: Call macro_226(5 /* addr_hi */, 0x00056900 /* addr_low */, 0x100)
// TODO: Call macro_226(5 /* addr_hi */, 0x00056A00 /* addr_low */, 0x800)
// Bind all const buffers index 0 to same buffer (of size 0x5f00).
rc = nvBufferCreateRw(
&vn->const_buffer0, 0x5f00 + 5*0x200, 0x1000, 0, &vn->parent->addr_space);
if (R_FAILED(rc))
return rc;
gpu_addr = nvBufferGetGpuAddr(&vn->const_buffer0);
VnCmd(vn,
NvIncr(
0, NvReg3D_ConstantBufferSize,
0x5f00,
gpu_addr >> 32, gpu_addr
)
);
gpu_addr += 0x5f00;
for (i=0; i<5; i++) {
VnCmd(vn, NvImm(0, NvReg3D_ConstantBufferBind(i), 1));
}
// Bind const buffer index 2 to differnet buffers (each of size 0x200).
for (i=0; i<5; i++) {
VnCmd(vn,
NvIncr(0,
NvReg3D_ConstantBufferSize,
0x5f00, /* Size */
gpu_addr >> 32, gpu_addr /* Addr */
),
NvImm(0, NvReg3D_ConstantBufferBind(i), 0x21),
NvIncrOnce(
0, NvReg3D_ConstantBufferLoadOffset, 0,
0,1,2,3,4,5,6,7
)
);
gpu_addr += 0x200;
}
VnCmd(vn,
NvImm(0, NvReg3D_BlendIndependent, 1),
NvImm(0, NvReg3D_EdgeFlag, 1),
NvImm(0, NvReg3D_ViewportTransformEnable, 1),
NvIncr(0, NvReg3D_ViewportControl, 0x181d) // ???
);
// Reset all the viewports.
for (i=0; i<16; i++) {
VnCmd(vn,
NvIncr(0, NvReg3D_ViewportScaleX(i),
f2i(0.5), /* ScaleX */
f2i(0.5), /* ScaleY */
f2i(0.5), /* ScaleZ */
f2i(0.5), /* TranslateX */
f2i(0.5), /* TranslateY */
f2i(0.5) /* TranslateZ */
),
NvIncr(0, NvReg3D_ViewportHorizontal(i),
0 | (1<<16), /* Horizontal */
0 | (1<<16) /* Vertical */
)
);
}
VnCmd(vn, NvImm(0, NvReg3D_ScreenHorizontalControl, 0x10));
return 0;
}

View File

@ -1,8 +1,11 @@
#include <switch.h>
#include <string.h>
void nvCmdsInit(NvCmdList* cmds) {
NvCmd(cmds,
void vnCmdsInit(Vn* vn, NvGpu* parent)
{
vn->parent = parent;
VnCmd(vn,
NvIncr(0, NvCmdCommon_BindObject, NvClassNumber_3D),
NvIncr(1, NvCmdCommon_BindObject, NvClassNumber_Compute),
NvIncr(2, NvCmdCommon_BindObject, NvClassNumber_Kepler),