Linker script refactoring, see details:

- Added separate relro "segment" (introduced by [17.0.0+])
- Reordered sections to better reflect official layout
- Fixed handling of TLS segment alignment
This commit is contained in:
fincs 2023-10-19 20:26:54 +02:00
parent 288a09c4eb
commit 3a5d0dae35
5 changed files with 146 additions and 102 deletions

View File

@ -23,6 +23,21 @@ typedef struct {
void* tls_tp; // !! Offset needs to be TLS+0x1F8 for __aarch64_read_tp !! void* tls_tp; // !! Offset needs to be TLS+0x1F8 for __aarch64_read_tp !!
} ThreadVars; } ThreadVars;
extern const u8 __tdata_lma[];
extern const u8 __tdata_lma_end[];
extern u8 __tls_start[];
extern u8 __tls_end[];
extern size_t __tls_align;
static inline ThreadVars* getThreadVars(void) { static inline ThreadVars* getThreadVars(void) {
return (ThreadVars*)((u8*)armGetTls() + 0x200 - sizeof(ThreadVars)); return (ThreadVars*)((u8*)armGetTls() + 0x200 - sizeof(ThreadVars));
} }
NX_INLINE size_t getTlsStartOffset(void)
{
// TLS region begins with the Thread Control Block (TCB), which is intended
// to contain two pointers. The actual tdata/tbss segment follows the TCB,
// however if it requires special alignment the offset is rounded up.
size_t tcb_sz = 2*sizeof(void*);
return __tls_align > tcb_sz ? __tls_align : tcb_sz;
}

View File

@ -16,11 +16,6 @@
#define USER_TLS_END (0x200 - sizeof(ThreadVars)) #define USER_TLS_END (0x200 - sizeof(ThreadVars))
#define NUM_TLS_SLOTS ((USER_TLS_END - USER_TLS_BEGIN) / sizeof(void*)) #define NUM_TLS_SLOTS ((USER_TLS_END - USER_TLS_BEGIN) / sizeof(void*))
extern const u8 __tdata_lma[];
extern const u8 __tdata_lma_end[];
extern u8 __tls_start[];
extern u8 __tls_end[];
static Mutex g_threadMutex; static Mutex g_threadMutex;
static Thread* g_threadList; static Thread* g_threadList;
@ -45,7 +40,7 @@ static void _EntryWrap(ThreadEntryArgs* args) {
tv->magic = THREADVARS_MAGIC; tv->magic = THREADVARS_MAGIC;
tv->thread_ptr = args->t; tv->thread_ptr = args->t;
tv->reent = args->reent; tv->reent = args->reent;
tv->tls_tp = (u8*)args->tls-2*sizeof(void*); // subtract size of Thread Control Block (TCB) tv->tls_tp = (u8*)args->tls-getTlsStartOffset();
tv->handle = args->t->handle; tv->handle = args->t->handle;
// Initialize thread info // Initialize thread info
@ -94,28 +89,35 @@ Result threadCreate(
Thread* t, ThreadFunc entry, void* arg, void* stack_mem, size_t stack_sz, Thread* t, ThreadFunc entry, void* arg, void* stack_mem, size_t stack_sz,
int prio, int cpuid) int prio, int cpuid)
{ {
const size_t tls_sz = (__tls_end-__tls_start+0xF) &~ 0xF;
const size_t reent_sz = (sizeof(struct _reent)+0xF) &~ 0xF; const size_t reent_sz = (sizeof(struct _reent)+0xF) &~ 0xF;
const size_t tls_sz = (__tls_end-__tls_start+0xF) &~ 0xF;
// Verify stack size alignment
if (stack_sz & 0xFFF) {
return MAKERESULT(Module_Libnx, LibnxError_BadInput);
}
bool owns_stack_mem; bool owns_stack_mem;
if (stack_mem == NULL) { if (stack_mem == NULL) {
// Allocate new memory, stack then reent then tls. // Allocate new memory for the stack, tls and reent.
stack_mem = __libnx_aligned_alloc(0x1000, stack_sz + reent_sz + tls_sz); stack_mem = __libnx_aligned_alloc(0x1000, stack_sz + tls_sz + reent_sz);
owns_stack_mem = true; owns_stack_mem = true;
} else { } else {
// Use provided memory for stack, reent, and tls. // Verify alignment of provided memory.
if (((uintptr_t)stack_mem & 0xFFF) || (stack_sz & 0xFFF)) { if ((uintptr_t)stack_mem & 0xFFF) {
return MAKERESULT(Module_Libnx, LibnxError_BadInput); return MAKERESULT(Module_Libnx, LibnxError_BadInput);
} }
// Ensure we don't go out of bounds. // Ensure we don't go out of bounds.
if (stack_sz <= tls_sz + reent_sz) { size_t align_mask = getTlsStartOffset()-1;
size_t needed_sz = (tls_sz + reent_sz + align_mask) &~ align_mask;
if (stack_sz <= needed_sz + sizeof(ThreadEntryArgs)) {
return MAKERESULT(Module_Libnx, LibnxError_OutOfMemory); return MAKERESULT(Module_Libnx, LibnxError_OutOfMemory);
} }
stack_sz -= tls_sz + reent_sz; // Use provided memory for the stack, tls and reent.
stack_sz -= needed_sz;
owns_stack_mem = false; owns_stack_mem = false;
} }
@ -123,7 +125,7 @@ Result threadCreate(
return MAKERESULT(Module_Libnx, LibnxError_OutOfMemory); return MAKERESULT(Module_Libnx, LibnxError_OutOfMemory);
} }
// Stack size may be unaligned in either case. // Total allocation size may be unaligned in either case.
virtmemLock(); virtmemLock();
const size_t aligned_stack_sz = (stack_sz + tls_sz + reent_sz + 0xFFF) & ~0xFFF; const size_t aligned_stack_sz = (stack_sz + tls_sz + reent_sz + 0xFFF) & ~0xFFF;
void* stack_mirror = virtmemFindStack(aligned_stack_sz, 0x4000); void* stack_mirror = virtmemFindStack(aligned_stack_sz, 0x4000);
@ -134,8 +136,9 @@ Result threadCreate(
{ {
uintptr_t stack_top = (uintptr_t)stack_mirror + stack_sz - sizeof(ThreadEntryArgs); uintptr_t stack_top = (uintptr_t)stack_mirror + stack_sz - sizeof(ThreadEntryArgs);
ThreadEntryArgs* args = (ThreadEntryArgs*) stack_top; ThreadEntryArgs* args = (ThreadEntryArgs*) stack_top;
void *reent = (void*)((uintptr_t)stack_mirror + stack_sz); void *tls = (void*)((uintptr_t)stack_mirror + stack_sz);
void *tls = (void*)((uintptr_t)reent + reent_sz); void *reent = (void*)((uintptr_t)tls + tls_sz);
Handle handle; Handle handle;
t->handle = INVALID_HANDLE; t->handle = INVALID_HANDLE;

View File

@ -30,10 +30,6 @@ struct __pthread_t
void __attribute__((weak)) NORETURN __libnx_exit(int rc); void __attribute__((weak)) NORETURN __libnx_exit(int rc);
extern const u8 __tdata_lma[];
extern const u8 __tdata_lma_end[];
extern u8 __tls_start[];
/// TimeType passed to timeGetCurrentTime() during time initialization. If that fails and __nx_time_type isn't TimeType_Default, timeGetCurrentTime() will be called again with TimeType_Default. /// TimeType passed to timeGetCurrentTime() during time initialization. If that fails and __nx_time_type isn't TimeType_Default, timeGetCurrentTime() will be called again with TimeType_Default.
__attribute__((weak)) TimeType __nx_time_type = TimeType_Default; __attribute__((weak)) TimeType __nx_time_type = TimeType_Default;
@ -438,7 +434,7 @@ void newlibSetup(void)
tv->reent = _impure_ptr; tv->reent = _impure_ptr;
#pragma GCC diagnostic push #pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Warray-bounds" #pragma GCC diagnostic ignored "-Warray-bounds"
tv->tls_tp = __tls_start-2*sizeof(void*); // subtract size of Thread Control Block (TCB) tv->tls_tp = __tls_start-getTlsStartOffset();
#pragma GCC diagnostic pop #pragma GCC diagnostic pop
tv->handle = envGetMainThreadHandle(); tv->handle = envGetMainThreadHandle();

View File

@ -11,10 +11,11 @@ PHDRS
SECTIONS SECTIONS
{ {
PROVIDE_HIDDEN( __start__ = 0x0 );
/* =========== CODE section =========== */ /* =========== CODE section =========== */
PROVIDE(__start__ = 0x0);
. = __start__; . = __start__;
__code_start = . ; PROVIDE_HIDDEN( __code_start = . );
.text : .text :
{ {
@ -48,111 +49,143 @@ SECTIONS
/* =========== RODATA section =========== */ /* =========== RODATA section =========== */
. = ALIGN(0x1000); . = ALIGN(0x1000);
__rodata_start = . ; PROVIDE_HIDDEN( __rodata_start = . );
.nx-module-name : { KEEP (*(.nx-module-name)) } :rodata .nx-module-name : { KEEP (*(.nx-module-name)) } :rodata
.rela.dyn : { *(.rela.*) } :rodata
.relr.dyn : { *(.relr.*) } :rodata
.hash : { *(.hash) } :rodata
.gnu.hash : { *(.gnu.hash) } :rodata
.dynsym : { *(.dynsym) } :rodata
.dynstr : { *(.dynstr) } :rodata
.rodata : .rodata :
{ {
*(.rodata .rodata.* .gnu.linkonce.r.*) *(.rodata .rodata.* .gnu.linkonce.r.*)
. = ALIGN(8); . = ALIGN(8);
} :rodata } :rodata
.eh_frame_hdr : { __eh_frame_hdr_start = .; *(.eh_frame_hdr) *(.eh_frame_entry .eh_frame_entry.*) __eh_frame_hdr_end = .; } :rodata .tls.align :
.eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) *(.eh_frame.*) } :rodata {
.gcc_except_table : ONLY_IF_RO { *(.gcc_except_table .gcc_except_table.*) } :rodata QUAD( MAX( ALIGNOF(.tdata), ALIGNOF(.tbss) ) )
.gnu_extab : ONLY_IF_RO { *(.gnu_extab*) } : rodata } :rodata
PROVIDE_HIDDEN( __tls_align = ADDR(.tls.align) );
.gcc_except_table : { *(.gcc_except_table .gcc_except_table.*) } :rodata
.eh_frame_hdr : { *(.eh_frame_hdr) *(.eh_frame_entry .eh_frame_entry.*) } :rodata
.eh_frame : { KEEP (*(.eh_frame)) *(.eh_frame.*) } :rodata
.gnu_extab : { *(.gnu_extab*) } : rodata
.exception_ranges : { *(.exception_ranges .exception_ranges*) } :rodata
PROVIDE_HIDDEN( __eh_frame_hdr_start = ADDR(.eh_frame_hdr) );
PROVIDE_HIDDEN( __eh_frame_hdr_end = ADDR(.eh_frame_hdr) + SIZEOF(.eh_frame_hdr) );
.dynamic : { *(.dynamic) } :rodata :dyn
.dynsym : { *(.dynsym) } :rodata
.dynstr : { *(.dynstr) } :rodata
.rela.dyn : { *(.rela.*) } :rodata
.interp : { *(.interp) } :rodata
.hash : { *(.hash) } :rodata
.gnu.hash : { *(.gnu.hash) } :rodata
.gnu.version : { *(.gnu.version) } :rodata .gnu.version : { *(.gnu.version) } :rodata
.gnu.version_d : { *(.gnu.version_d) } :rodata .gnu.version_d : { *(.gnu.version_d) } :rodata
.gnu.version_r : { *(.gnu.version_r) } :rodata .gnu.version_r : { *(.gnu.version_r) } :rodata
.note.gnu.build-id : { *(.note.gnu.build-id) } :rodata .note.gnu.build-id : { *(.note.gnu.build-id) } :rodata
/* =========== DATA section =========== */ /* =========== RELRO section =========== */
. = ALIGN(0x1000); . = ALIGN(0x1000);
__data_start = . ; PROVIDE_HIDDEN( __relro_start = . );
.eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) *(.eh_frame.*) } :data .preinit_array :
.gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) } :data
.gnu_extab : ONLY_IF_RW { *(.gnu_extab*) } : data
.exception_ranges : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) } :data
.tdata ALIGN(8) :
{ {
__tdata_lma = .; PROVIDE_HIDDEN( __preinit_array_start = . );
*(.tdata .tdata.* .gnu.linkonce.td.*)
. = ALIGN(8);
__tdata_lma_end = .;
} :data
.tbss ALIGN(8) :
{
*(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon)
. = ALIGN(8);
} :data
.preinit_array ALIGN(8) :
{
PROVIDE (__preinit_array_start = .);
KEEP (*(.preinit_array)) KEEP (*(.preinit_array))
PROVIDE (__preinit_array_end = .); PROVIDE_HIDDEN( __preinit_array_end = . );
} :data } :data
.init_array ALIGN(8) : .init_array :
{ {
PROVIDE (__init_array_start = .); PROVIDE_HIDDEN( __init_array_start = . );
KEEP( *(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)) ) KEEP( *(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)) )
KEEP( *(.init_array .ctors) ) KEEP( *(.init_array .ctors) )
PROVIDE (__init_array_end = .); PROVIDE_HIDDEN( __init_array_end = . );
} :data } :data
.fini_array ALIGN(8) : .fini_array :
{ {
PROVIDE (__fini_array_start = .); PROVIDE_HIDDEN( __fini_array_start = . );
KEEP( *(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*)) ) KEEP( *(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*)) )
KEEP( *(.fini_array .dtors) ) KEEP( *(.fini_array .dtors) )
PROVIDE (__fini_array_end = .); PROVIDE_HIDDEN( __fini_array_end = . );
} :data } :data
__got_start__ = .; .data.rel.ro :
{
*(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*)
*(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*)
. = ALIGN(8);
} :data
.dynamic : { *(.dynamic) } :data :dyn
.got : { *(.got) *(.igot) } :data .got : { *(.got) *(.igot) } :data
.got.plt : { *(.got.plt) *(.igot.plt) } :data .got.plt : { *(.got.plt) *(.igot.plt) } :data
__got_end__ = .; PROVIDE_HIDDEN( __got_start__ = ADDR(.got) );
PROVIDE_HIDDEN( __got_end__ = ADDR(.got.plt) + SIZEOF(.got.plt) );
.data ALIGN(8) : /* =========== DATA section =========== */
. = ALIGN(0x1000);
PROVIDE_HIDDEN( __data_start = . );
.data :
{ {
*(.data .data.* .gnu.linkonce.d.*) *(.data .data.* .gnu.linkonce.d.*)
SORT(CONSTRUCTORS) SORT(CONSTRUCTORS)
. = ALIGN(8);
} :data } :data
.bss ALIGN(8) : .tdata :
{
*(.tdata .tdata.* .gnu.linkonce.td.*)
. = ALIGN(8);
} :data
PROVIDE_HIDDEN( __tdata_lma = ADDR(.tdata) );
PROVIDE_HIDDEN( __tdata_lma_end = ADDR(.tdata) + SIZEOF(.tdata) );
.tbss :
{
*(.tbss .tbss.* .gnu.linkonce.tb.*)
*(.tcommon)
. = ALIGN(8);
} :data
.bss :
{ {
*(.dynbss) *(.dynbss)
*(.bss .bss.* .gnu.linkonce.b.*) *(.bss .bss.* .gnu.linkonce.b.*)
*(COMMON) *(COMMON)
. = ALIGN(8); . = ALIGN(8);
} :data
/* Reserve space for the TLS segment of the main thread */ /* Reserve space for the TLS segment of the main thread */
__tls_start = .; .main.tls ALIGN(MAX(ALIGNOF(.tdata),ALIGNOF(.tbss))) :
. += + SIZEOF(.tdata) + SIZEOF(.tbss); {
__tls_end = .; . += SIZEOF(.tdata);
. = ALIGN(ALIGNOF(.tbss));
. += SIZEOF(.tbss);
} :data } :data
__bss_start__ = ADDR(.bss);
__bss_end__ = ADDR(.bss) + SIZEOF(.bss);
__end__ = ABSOLUTE(.) ; PROVIDE_HIDDEN( __tls_start = ADDR(.main.tls) );
PROVIDE_HIDDEN( __tls_end = ADDR(.main.tls) + SIZEOF(.main.tls) );
PROVIDE_HIDDEN( __bss_start__ = ADDR(.bss) );
PROVIDE_HIDDEN( __bss_end__ = __tls_end );
PROVIDE_HIDDEN( __end__ = ABSOLUTE(.) );
/* =========== Argument buffer =========== */
. = ALIGN(0x1000); . = ALIGN(0x1000);
__argdata__ = ABSOLUTE(.) ; PROVIDE_HIDDEN( __argdata__ = ABSOLUTE(.) );
/* ================== /* ==================
==== Metadata ==== ==== Metadata ====

View File

@ -1,8 +1,5 @@
%rename link old_link
*link: *link:
%(old_link) -T %:getenv(DEVKITPRO /libnx/switch.ld) -pie --no-dynamic-linker --spare-dynamic-tags=0 --gc-sections -z text -z nodynamic-undefined-weak --build-id=sha1 --nx-module-name + -T %:getenv(DEVKITPRO /libnx/switch.ld) -pie --no-dynamic-linker --spare-dynamic-tags=0 --gc-sections -z text -z now -z nodynamic-undefined-weak --build-id=sha1 --nx-module-name
*startfile: *startfile:
crti%O%s crtbegin%O%s --require-defined=main crti%O%s crtbegin%O%s --require-defined=main