diff --git a/nx/include/switch/kernel/thread.h b/nx/include/switch/kernel/thread.h index 1b379166..ae80db52 100644 --- a/nx/include/switch/kernel/thread.h +++ b/nx/include/switch/kernel/thread.h @@ -1,7 +1,5 @@ typedef struct { Handle handle; - ThreadFunc entry; - void* arg; void* stack_mem; void* stack_mirror; size_t stack_sz; diff --git a/nx/include/switch/result.h b/nx/include/switch/result.h index 59d21f6b..b2850551 100644 --- a/nx/include/switch/result.h +++ b/nx/include/switch/result.h @@ -29,4 +29,5 @@ #define LIBNX_NOTFOUND 8 #define LIBNX_IOERROR 9 #define LIBNX_BADINPUT 10 +#define LIBNX_BADREENT 11 #define LIBNX_PARCEL_ERRBASE 100 diff --git a/nx/source/internal.h b/nx/source/internal.h new file mode 100644 index 00000000..209eb9cd --- /dev/null +++ b/nx/source/internal.h @@ -0,0 +1,24 @@ +#pragma once +#include +#include + +#define THREADVARS_MAGIC 0x21545624 // !TV$ + +// This structure is exactly 0x20 bytes, if more is needed modify getThreadVars() below +typedef struct { + // Magic value used to check if the struct is initialized + u32 magic; + + // Pointer to the current thread (if exists) + Thread* thread_ptr; + + // Pointer to this thread's newlib state + struct _reent* reent; + + // Pointer to this thread's thread-local segment + void* tls_tp; // !! Offset needs to be TLS+0x1F8 for __aarch64_read_tp !! +} ThreadVars; + +static inline ThreadVars* getThreadVars(void) { + return (ThreadVars*)((u8*)armGetTls() + 0x1E0); +} diff --git a/nx/source/kernel/thread.c b/nx/source/kernel/thread.c index 20663e24..5fa94e60 100644 --- a/nx/source/kernel/thread.c +++ b/nx/source/kernel/thread.c @@ -1,9 +1,32 @@ // Copyright 2017 plutoo #include #include +#include "../internal.h" -static void _EntryWrap(Thread* t) { - t->entry(t->arg); +extern const u8 __tdata_lma[]; +extern const u8 __tdata_lma_end[]; +extern u8 __tls_start[]; +extern u8 __tls_end[]; + +typedef struct { + Thread* t; + ThreadFunc entry; + void* arg; + struct _reent* reent; + void* tls; + void* padding; +} ThreadEntryArgs; + +static void _EntryWrap(ThreadEntryArgs* args) { + // Initialize thread vars + ThreadVars* tv = getThreadVars(); + tv->magic = THREADVARS_MAGIC; + tv->thread_ptr = args->t; + tv->reent = args->reent; + tv->tls_tp = (u8*)args->tls-2*sizeof(void*); // subtract size of Thread Control Block (TCB) + + // Launch thread entrypoint + args->entry(args->arg); svcExitThread(); } @@ -11,8 +34,12 @@ Result threadCreate( Thread* t, ThreadFunc entry, void* arg, size_t stack_sz, int prio, int cpuid) { + stack_sz = (stack_sz+0xF) &~ 0xF; + Result rc = 0; - void* stack = memalign(0x1000, stack_sz); + size_t reent_sz = (sizeof(struct _reent)+0xF) &~ 0xF; + size_t tls_sz = (__tls_end-__tls_start+0xF) &~ 0xF; + void* stack = memalign(0x1000, stack_sz + reent_sz + tls_sz); if (stack == NULL) { rc = MAKERESULT(MODULE_LIBNX, LIBNX_OUTOFMEM); @@ -24,21 +51,41 @@ Result threadCreate( if (R_SUCCEEDED(rc)) { - u64 stack_top = ((u64)stack_mirror) + t->stack_sz; + u64 stack_top = ((u64)stack_mirror) + t->stack_sz - sizeof(ThreadEntryArgs); + ThreadEntryArgs* args = (ThreadEntryArgs*) stack_top; Handle handle; rc = svcCreateThread( - &handle, (ThreadFunc) &_EntryWrap, (void*) t, (void*) stack_top, + &handle, (ThreadFunc) &_EntryWrap, args, (void*)stack_top, prio, cpuid); if (R_SUCCEEDED(rc)) { t->handle = handle; - t->entry = entry; - t->arg = arg; t->stack_mem = stack; t->stack_mirror = stack_mirror; t->stack_sz = stack_sz; + + args->t = t; + args->entry = entry; + args->arg = arg; + args->reent = (struct _reent*)((u8*)stack + stack_sz); + args->tls = (u8*)stack + stack_sz + reent_sz; + + // Set up child thread's reent struct, inheriting standard file handles + _REENT_INIT_PTR(args->reent); + struct _reent* cur = getThreadVars()->reent; + args->reent->_stdin = cur->_stdin; + args->reent->_stdout = cur->_stdout; + args->reent->_stderr = cur->_stderr; + + // Set up child thread's TLS segment + size_t tls_load_sz = __tdata_lma_end-__tdata_lma; + size_t tls_bss_sz = tls_sz - tls_load_sz; + if (tls_load_sz) + memcpy(args->tls, __tdata_lma, tls_load_sz); + if (tls_bss_sz) + memset(args->tls+tls_load_sz, 0, tls_bss_sz); } if (R_FAILED(rc)) { diff --git a/nx/source/system/newlib.c b/nx/source/system/newlib.c index 876d842b..8d2776c5 100644 --- a/nx/source/system/newlib.c +++ b/nx/source/system/newlib.c @@ -3,12 +3,27 @@ #include #include #include +#include "../internal.h" void __attribute__((weak)) NORETURN __libnx_exit(int rc); +extern const u8 __tdata_lma[]; +extern const u8 __tdata_lma_end[]; +extern u8 __tls_start[]; + +static struct _reent* __libnx_get_reent() { + ThreadVars* tv = getThreadVars(); + if (tv->magic != THREADVARS_MAGIC) { + fatalSimple(MAKERESULT(MODULE_LIBNX, LIBNX_BADREENT)); + for (;;); + } + return tv->reent; +} + void newlibSetup() { // Register newlib syscalls - __syscalls.exit = __libnx_exit; + __syscalls.exit = __libnx_exit; + __syscalls.getreent = __libnx_get_reent; // Register locking syscalls __syscalls.lock_init = mutexInit; @@ -17,4 +32,15 @@ void newlibSetup() { __syscalls.lock_init_recursive = rmutexInit; __syscalls.lock_acquire_recursive = rmutexLock; __syscalls.lock_release_recursive = rmutexUnlock; + + // Initialize thread vars for the main thread + ThreadVars* tv = getThreadVars(); + tv->magic = THREADVARS_MAGIC; + tv->thread_ptr = NULL; + tv->reent = _impure_ptr; + tv->tls_tp = __tls_start-2*sizeof(void*); // subtract size of Thread Control Block (TCB) + + u32 tls_size = __tdata_lma_end - __tdata_lma; + if (tls_size) + memcpy(__tls_start, __tdata_lma, tls_size); } diff --git a/nx/source/system/readtp.s b/nx/source/system/readtp.s new file mode 100644 index 00000000..1d2065d2 --- /dev/null +++ b/nx/source/system/readtp.s @@ -0,0 +1,10 @@ + .section .text.__aarch64_read_tp, "ax", %progbits + .global __aarch64_read_tp + .type __aarch64_read_tp, %function + .align 2 + .cfi_startproc +__aarch64_read_tp: + mrs x0, tpidrro_el0 + ldr x0, [x0, #0x1F8] + ret + .cfi_endproc