From 3a5d0dae359ee2d550e3704c2f6c2912121d9654 Mon Sep 17 00:00:00 2001 From: fincs Date: Thu, 19 Oct 2023 20:26:54 +0200 Subject: [PATCH] Linker script refactoring, see details: - Added separate relro "segment" (introduced by [17.0.0+]) - Reordered sections to better reflect official layout - Fixed handling of TLS segment alignment --- nx/source/internal.h | 15 +++ nx/source/kernel/thread.c | 39 ++++---- nx/source/runtime/newlib.c | 6 +- nx/switch.ld | 183 ++++++++++++++++++++++--------------- nx/switch.specs | 5 +- 5 files changed, 146 insertions(+), 102 deletions(-) diff --git a/nx/source/internal.h b/nx/source/internal.h index 422fe7de..37b3e538 100644 --- a/nx/source/internal.h +++ b/nx/source/internal.h @@ -23,6 +23,21 @@ typedef struct { void* tls_tp; // !! Offset needs to be TLS+0x1F8 for __aarch64_read_tp !! } ThreadVars; +extern const u8 __tdata_lma[]; +extern const u8 __tdata_lma_end[]; +extern u8 __tls_start[]; +extern u8 __tls_end[]; +extern size_t __tls_align; + static inline ThreadVars* getThreadVars(void) { return (ThreadVars*)((u8*)armGetTls() + 0x200 - sizeof(ThreadVars)); } + +NX_INLINE size_t getTlsStartOffset(void) +{ + // TLS region begins with the Thread Control Block (TCB), which is intended + // to contain two pointers. The actual tdata/tbss segment follows the TCB, + // however if it requires special alignment the offset is rounded up. + size_t tcb_sz = 2*sizeof(void*); + return __tls_align > tcb_sz ? __tls_align : tcb_sz; +} diff --git a/nx/source/kernel/thread.c b/nx/source/kernel/thread.c index 46a01718..bd758527 100644 --- a/nx/source/kernel/thread.c +++ b/nx/source/kernel/thread.c @@ -16,11 +16,6 @@ #define USER_TLS_END (0x200 - sizeof(ThreadVars)) #define NUM_TLS_SLOTS ((USER_TLS_END - USER_TLS_BEGIN) / sizeof(void*)) -extern const u8 __tdata_lma[]; -extern const u8 __tdata_lma_end[]; -extern u8 __tls_start[]; -extern u8 __tls_end[]; - static Mutex g_threadMutex; static Thread* g_threadList; @@ -45,7 +40,7 @@ static void _EntryWrap(ThreadEntryArgs* args) { tv->magic = THREADVARS_MAGIC; tv->thread_ptr = args->t; tv->reent = args->reent; - tv->tls_tp = (u8*)args->tls-2*sizeof(void*); // subtract size of Thread Control Block (TCB) + tv->tls_tp = (u8*)args->tls-getTlsStartOffset(); tv->handle = args->t->handle; // Initialize thread info @@ -94,28 +89,35 @@ Result threadCreate( Thread* t, ThreadFunc entry, void* arg, void* stack_mem, size_t stack_sz, int prio, int cpuid) { - - const size_t tls_sz = (__tls_end-__tls_start+0xF) &~ 0xF; const size_t reent_sz = (sizeof(struct _reent)+0xF) &~ 0xF; + const size_t tls_sz = (__tls_end-__tls_start+0xF) &~ 0xF; + + // Verify stack size alignment + if (stack_sz & 0xFFF) { + return MAKERESULT(Module_Libnx, LibnxError_BadInput); + } bool owns_stack_mem; if (stack_mem == NULL) { - // Allocate new memory, stack then reent then tls. - stack_mem = __libnx_aligned_alloc(0x1000, stack_sz + reent_sz + tls_sz); + // Allocate new memory for the stack, tls and reent. + stack_mem = __libnx_aligned_alloc(0x1000, stack_sz + tls_sz + reent_sz); owns_stack_mem = true; } else { - // Use provided memory for stack, reent, and tls. - if (((uintptr_t)stack_mem & 0xFFF) || (stack_sz & 0xFFF)) { + // Verify alignment of provided memory. + if ((uintptr_t)stack_mem & 0xFFF) { return MAKERESULT(Module_Libnx, LibnxError_BadInput); } // Ensure we don't go out of bounds. - if (stack_sz <= tls_sz + reent_sz) { + size_t align_mask = getTlsStartOffset()-1; + size_t needed_sz = (tls_sz + reent_sz + align_mask) &~ align_mask; + if (stack_sz <= needed_sz + sizeof(ThreadEntryArgs)) { return MAKERESULT(Module_Libnx, LibnxError_OutOfMemory); } - stack_sz -= tls_sz + reent_sz; + // Use provided memory for the stack, tls and reent. + stack_sz -= needed_sz; owns_stack_mem = false; } @@ -123,9 +125,9 @@ Result threadCreate( return MAKERESULT(Module_Libnx, LibnxError_OutOfMemory); } - // Stack size may be unaligned in either case. + // Total allocation size may be unaligned in either case. virtmemLock(); - const size_t aligned_stack_sz = (stack_sz + tls_sz + reent_sz +0xFFF) & ~0xFFF; + const size_t aligned_stack_sz = (stack_sz + tls_sz + reent_sz + 0xFFF) & ~0xFFF; void* stack_mirror = virtmemFindStack(aligned_stack_sz, 0x4000); Result rc = svcMapMemory(stack_mirror, stack_mem, aligned_stack_sz); virtmemUnlock(); @@ -134,8 +136,9 @@ Result threadCreate( { uintptr_t stack_top = (uintptr_t)stack_mirror + stack_sz - sizeof(ThreadEntryArgs); ThreadEntryArgs* args = (ThreadEntryArgs*) stack_top; - void *reent = (void*)((uintptr_t)stack_mirror + stack_sz); - void *tls = (void*)((uintptr_t)reent + reent_sz); + void *tls = (void*)((uintptr_t)stack_mirror + stack_sz); + void *reent = (void*)((uintptr_t)tls + tls_sz); + Handle handle; t->handle = INVALID_HANDLE; diff --git a/nx/source/runtime/newlib.c b/nx/source/runtime/newlib.c index c348ef58..ac260d00 100644 --- a/nx/source/runtime/newlib.c +++ b/nx/source/runtime/newlib.c @@ -30,10 +30,6 @@ struct __pthread_t void __attribute__((weak)) NORETURN __libnx_exit(int rc); -extern const u8 __tdata_lma[]; -extern const u8 __tdata_lma_end[]; -extern u8 __tls_start[]; - /// TimeType passed to timeGetCurrentTime() during time initialization. If that fails and __nx_time_type isn't TimeType_Default, timeGetCurrentTime() will be called again with TimeType_Default. __attribute__((weak)) TimeType __nx_time_type = TimeType_Default; @@ -438,7 +434,7 @@ void newlibSetup(void) tv->reent = _impure_ptr; #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Warray-bounds" - tv->tls_tp = __tls_start-2*sizeof(void*); // subtract size of Thread Control Block (TCB) + tv->tls_tp = __tls_start-getTlsStartOffset(); #pragma GCC diagnostic pop tv->handle = envGetMainThreadHandle(); diff --git a/nx/switch.ld b/nx/switch.ld index 4d1deb1c..3be8a25a 100644 --- a/nx/switch.ld +++ b/nx/switch.ld @@ -11,10 +11,11 @@ PHDRS SECTIONS { + PROVIDE_HIDDEN( __start__ = 0x0 ); + /* =========== CODE section =========== */ - PROVIDE(__start__ = 0x0); . = __start__; - __code_start = . ; + PROVIDE_HIDDEN( __code_start = . ); .text : { @@ -48,111 +49,143 @@ SECTIONS /* =========== RODATA section =========== */ . = ALIGN(0x1000); - __rodata_start = . ; + PROVIDE_HIDDEN( __rodata_start = . ); .nx-module-name : { KEEP (*(.nx-module-name)) } :rodata + .rela.dyn : { *(.rela.*) } :rodata + .relr.dyn : { *(.relr.*) } :rodata + + .hash : { *(.hash) } :rodata + .gnu.hash : { *(.gnu.hash) } :rodata + + .dynsym : { *(.dynsym) } :rodata + .dynstr : { *(.dynstr) } :rodata + .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) . = ALIGN(8); } :rodata - .eh_frame_hdr : { __eh_frame_hdr_start = .; *(.eh_frame_hdr) *(.eh_frame_entry .eh_frame_entry.*) __eh_frame_hdr_end = .; } :rodata - .eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) *(.eh_frame.*) } :rodata - .gcc_except_table : ONLY_IF_RO { *(.gcc_except_table .gcc_except_table.*) } :rodata - .gnu_extab : ONLY_IF_RO { *(.gnu_extab*) } : rodata + .tls.align : + { + QUAD( MAX( ALIGNOF(.tdata), ALIGNOF(.tbss) ) ) + } :rodata + + PROVIDE_HIDDEN( __tls_align = ADDR(.tls.align) ); + + .gcc_except_table : { *(.gcc_except_table .gcc_except_table.*) } :rodata + .eh_frame_hdr : { *(.eh_frame_hdr) *(.eh_frame_entry .eh_frame_entry.*) } :rodata + .eh_frame : { KEEP (*(.eh_frame)) *(.eh_frame.*) } :rodata + .gnu_extab : { *(.gnu_extab*) } : rodata + .exception_ranges : { *(.exception_ranges .exception_ranges*) } :rodata + + PROVIDE_HIDDEN( __eh_frame_hdr_start = ADDR(.eh_frame_hdr) ); + PROVIDE_HIDDEN( __eh_frame_hdr_end = ADDR(.eh_frame_hdr) + SIZEOF(.eh_frame_hdr) ); - .dynamic : { *(.dynamic) } :rodata :dyn - .dynsym : { *(.dynsym) } :rodata - .dynstr : { *(.dynstr) } :rodata - .rela.dyn : { *(.rela.*) } :rodata - .interp : { *(.interp) } :rodata - .hash : { *(.hash) } :rodata - .gnu.hash : { *(.gnu.hash) } :rodata .gnu.version : { *(.gnu.version) } :rodata .gnu.version_d : { *(.gnu.version_d) } :rodata .gnu.version_r : { *(.gnu.version_r) } :rodata + .note.gnu.build-id : { *(.note.gnu.build-id) } :rodata + /* =========== RELRO section =========== */ + . = ALIGN(0x1000); + PROVIDE_HIDDEN( __relro_start = . ); + + .preinit_array : + { + PROVIDE_HIDDEN( __preinit_array_start = . ); + KEEP (*(.preinit_array)) + PROVIDE_HIDDEN( __preinit_array_end = . ); + } :data + + .init_array : + { + PROVIDE_HIDDEN( __init_array_start = . ); + KEEP( *(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)) ) + KEEP( *(.init_array .ctors) ) + PROVIDE_HIDDEN( __init_array_end = . ); + } :data + + .fini_array : + { + PROVIDE_HIDDEN( __fini_array_start = . ); + KEEP( *(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*)) ) + KEEP( *(.fini_array .dtors) ) + PROVIDE_HIDDEN( __fini_array_end = . ); + } :data + + .data.rel.ro : + { + *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) + *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*) + . = ALIGN(8); + } :data + + .dynamic : { *(.dynamic) } :data :dyn + + .got : { *(.got) *(.igot) } :data + .got.plt : { *(.got.plt) *(.igot.plt) } :data + + PROVIDE_HIDDEN( __got_start__ = ADDR(.got) ); + PROVIDE_HIDDEN( __got_end__ = ADDR(.got.plt) + SIZEOF(.got.plt) ); + /* =========== DATA section =========== */ . = ALIGN(0x1000); - __data_start = . ; + PROVIDE_HIDDEN( __data_start = . ); - .eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) *(.eh_frame.*) } :data - .gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) } :data - .gnu_extab : ONLY_IF_RW { *(.gnu_extab*) } : data - .exception_ranges : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) } :data - - .tdata ALIGN(8) : - { - __tdata_lma = .; - *(.tdata .tdata.* .gnu.linkonce.td.*) - . = ALIGN(8); - __tdata_lma_end = .; - } :data - - .tbss ALIGN(8) : - { - *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) - . = ALIGN(8); - } :data - - .preinit_array ALIGN(8) : - { - PROVIDE (__preinit_array_start = .); - KEEP (*(.preinit_array)) - PROVIDE (__preinit_array_end = .); - } :data - - .init_array ALIGN(8) : - { - PROVIDE (__init_array_start = .); - KEEP( *(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)) ) - KEEP( *(.init_array .ctors) ) - PROVIDE (__init_array_end = .); - } :data - - .fini_array ALIGN(8) : - { - PROVIDE (__fini_array_start = .); - KEEP( *(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*)) ) - KEEP( *(.fini_array .dtors) ) - PROVIDE (__fini_array_end = .); - } :data - - __got_start__ = .; - - .got : { *(.got) *(.igot) } :data - .got.plt : { *(.got.plt) *(.igot.plt) } :data - - __got_end__ = .; - - .data ALIGN(8) : + .data : { *(.data .data.* .gnu.linkonce.d.*) SORT(CONSTRUCTORS) + . = ALIGN(8); } :data - .bss ALIGN(8) : + .tdata : + { + *(.tdata .tdata.* .gnu.linkonce.td.*) + . = ALIGN(8); + } :data + + PROVIDE_HIDDEN( __tdata_lma = ADDR(.tdata) ); + PROVIDE_HIDDEN( __tdata_lma_end = ADDR(.tdata) + SIZEOF(.tdata) ); + + .tbss : + { + *(.tbss .tbss.* .gnu.linkonce.tb.*) + *(.tcommon) + . = ALIGN(8); + } :data + + .bss : { *(.dynbss) *(.bss .bss.* .gnu.linkonce.b.*) *(COMMON) . = ALIGN(8); + } :data - /* Reserve space for the TLS segment of the main thread */ - __tls_start = .; - . += + SIZEOF(.tdata) + SIZEOF(.tbss); - __tls_end = .; - } : data - __bss_start__ = ADDR(.bss); - __bss_end__ = ADDR(.bss) + SIZEOF(.bss); + /* Reserve space for the TLS segment of the main thread */ + .main.tls ALIGN(MAX(ALIGNOF(.tdata),ALIGNOF(.tbss))) : + { + . += SIZEOF(.tdata); + . = ALIGN(ALIGNOF(.tbss)); + . += SIZEOF(.tbss); + } :data - __end__ = ABSOLUTE(.) ; + PROVIDE_HIDDEN( __tls_start = ADDR(.main.tls) ); + PROVIDE_HIDDEN( __tls_end = ADDR(.main.tls) + SIZEOF(.main.tls) ); + PROVIDE_HIDDEN( __bss_start__ = ADDR(.bss) ); + PROVIDE_HIDDEN( __bss_end__ = __tls_end ); + + PROVIDE_HIDDEN( __end__ = ABSOLUTE(.) ); + + /* =========== Argument buffer =========== */ . = ALIGN(0x1000); - __argdata__ = ABSOLUTE(.) ; + PROVIDE_HIDDEN( __argdata__ = ABSOLUTE(.) ); /* ================== ==== Metadata ==== diff --git a/nx/switch.specs b/nx/switch.specs index 47284225..c486af8d 100644 --- a/nx/switch.specs +++ b/nx/switch.specs @@ -1,8 +1,5 @@ -%rename link old_link - *link: -%(old_link) -T %:getenv(DEVKITPRO /libnx/switch.ld) -pie --no-dynamic-linker --spare-dynamic-tags=0 --gc-sections -z text -z nodynamic-undefined-weak --build-id=sha1 --nx-module-name ++ -T %:getenv(DEVKITPRO /libnx/switch.ld) -pie --no-dynamic-linker --spare-dynamic-tags=0 --gc-sections -z text -z now -z nodynamic-undefined-weak --build-id=sha1 --nx-module-name *startfile: crti%O%s crtbegin%O%s --require-defined=main -