diff --git a/libmesosphere/include/mesosphere/arch/arm64/init/kern_k_init_page_table.hpp b/libmesosphere/include/mesosphere/arch/arm64/init/kern_k_init_page_table.hpp index e1618dc2..e0613fbd 100644 --- a/libmesosphere/include/mesosphere/arch/arm64/init/kern_k_init_page_table.hpp +++ b/libmesosphere/include/mesosphere/arch/arm64/init/kern_k_init_page_table.hpp @@ -279,20 +279,21 @@ namespace ams::kern::arch::arm64::init { /* Invalidate the entire tlb. */ cpu::DataSynchronizationBarrierInnerShareable(); - cpu::InvalidateEntireTlbInnerShareable(); + cpu::InvalidateEntireTlb(); /* Copy data, if we should. */ const u64 negative_block_size_for_mask = static_cast(-static_cast(block_size)); const u64 offset_mask = negative_block_size_for_mask & ((1ul << 48) - 1); const KVirtualAddress copy_src_addr = KVirtualAddress(src_saved.GetRawAttributesUnsafeForSwap() & offset_mask); const KVirtualAddress copy_dst_addr = KVirtualAddress(dst_saved.GetRawAttributesUnsafeForSwap() & offset_mask); - if (block_size && do_copy) { + if (do_copy) { u8 tmp[0x100]; for (size_t ofs = 0; ofs < block_size; ofs += sizeof(tmp)) { std::memcpy(tmp, GetVoidPointer(copy_src_addr + ofs), sizeof(tmp)); std::memcpy(GetVoidPointer(copy_src_addr + ofs), GetVoidPointer(copy_dst_addr + ofs), sizeof(tmp)); std::memcpy(GetVoidPointer(copy_dst_addr + ofs), tmp, sizeof(tmp)); } + cpu::DataSynchronizationBarrierInnerShareable(); } /* Swap the mappings. */ @@ -339,7 +340,6 @@ namespace ams::kern::arch::arm64::init { /* Can we make an L1 block? */ if (util::IsAligned(GetInteger(virt_addr), L1BlockSize) && util::IsAligned(GetInteger(phys_addr), L1BlockSize) && size >= L1BlockSize) { *l1_entry = L1PageTableEntry(PageTableEntry::BlockTag{}, phys_addr, attr, PageTableEntry::SoftwareReservedBit_None, false); - cpu::DataSynchronizationBarrierInnerShareable(); virt_addr += L1BlockSize; phys_addr += L1BlockSize; @@ -350,8 +350,8 @@ namespace ams::kern::arch::arm64::init { /* If we don't already have an L2 table, we need to make a new one. */ if (!l1_entry->IsTable()) { KPhysicalAddress new_table = AllocateNewPageTable(allocator); - *l1_entry = L1PageTableEntry(PageTableEntry::TableTag{}, new_table, attr.IsPrivilegedExecuteNever()); cpu::DataSynchronizationBarrierInnerShareable(); + *l1_entry = L1PageTableEntry(PageTableEntry::TableTag{}, new_table, attr.IsPrivilegedExecuteNever()); } L2PageTableEntry *l2_entry = GetL2Entry(l1_entry, virt_addr); @@ -365,14 +365,12 @@ namespace ams::kern::arch::arm64::init { phys_addr += L2BlockSize; size -= L2BlockSize; } - cpu::DataSynchronizationBarrierInnerShareable(); continue; } /* Can we make an L2 block? */ if (util::IsAligned(GetInteger(virt_addr), L2BlockSize) && util::IsAligned(GetInteger(phys_addr), L2BlockSize) && size >= L2BlockSize) { *l2_entry = L2PageTableEntry(PageTableEntry::BlockTag{}, phys_addr, attr, PageTableEntry::SoftwareReservedBit_None, false); - cpu::DataSynchronizationBarrierInnerShareable(); virt_addr += L2BlockSize; phys_addr += L2BlockSize; @@ -383,8 +381,8 @@ namespace ams::kern::arch::arm64::init { /* If we don't already have an L3 table, we need to make a new one. */ if (!l2_entry->IsTable()) { KPhysicalAddress new_table = AllocateNewPageTable(allocator); - *l2_entry = L2PageTableEntry(PageTableEntry::TableTag{}, new_table, attr.IsPrivilegedExecuteNever()); cpu::DataSynchronizationBarrierInnerShareable(); + *l2_entry = L2PageTableEntry(PageTableEntry::TableTag{}, new_table, attr.IsPrivilegedExecuteNever()); } L3PageTableEntry *l3_entry = GetL3Entry(l2_entry, virt_addr); @@ -398,17 +396,18 @@ namespace ams::kern::arch::arm64::init { phys_addr += L3BlockSize; size -= L3BlockSize; } - cpu::DataSynchronizationBarrierInnerShareable(); continue; } /* Make an L3 block. */ *l3_entry = L3PageTableEntry(PageTableEntry::BlockTag{}, phys_addr, attr, PageTableEntry::SoftwareReservedBit_None, false); - cpu::DataSynchronizationBarrierInnerShareable(); virt_addr += L3BlockSize; phys_addr += L3BlockSize; size -= L3BlockSize; } + + /* Ensure data consistency after our mapping is added. */ + cpu::DataSynchronizationBarrierInnerShareable(); } KPhysicalAddress GetPhysicalAddress(KVirtualAddress virt_addr) const { @@ -556,9 +555,6 @@ namespace ams::kern::arch::arm64::init { } void Reprotect(KVirtualAddress virt_addr, size_t size, const PageTableEntry &attr_before, const PageTableEntry &attr_after) { - /* Ensure data consistency before we begin reprotection. */ - cpu::DataSynchronizationBarrierInnerShareable(); - /* Ensure that addresses and sizes are page aligned. */ MESOSPHERE_INIT_ABORT_UNLESS(util::IsAligned(GetInteger(virt_addr), PageSize)); MESOSPHERE_INIT_ABORT_UNLESS(util::IsAligned(size, PageSize)); @@ -699,7 +695,7 @@ namespace ams::kern::arch::arm64::init { this->PhysicallyRandomize(virt_addr, size, L2BlockSize, do_copy); this->PhysicallyRandomize(virt_addr, size, L3ContiguousBlockSize, do_copy); this->PhysicallyRandomize(virt_addr, size, L3BlockSize, do_copy); - cpu::StoreEntireCacheForInit(); + cpu::StoreCacheForInit(GetVoidPointer(virt_addr), size); } }; diff --git a/libmesosphere/include/mesosphere/arch/arm64/kern_cpu.hpp b/libmesosphere/include/mesosphere/arch/arm64/kern_cpu.hpp index 93769e36..e3f1de2e 100644 --- a/libmesosphere/include/mesosphere/arch/arm64/kern_cpu.hpp +++ b/libmesosphere/include/mesosphere/arch/arm64/kern_cpu.hpp @@ -48,6 +48,10 @@ namespace ams::kern::arch::arm64::cpu { __asm__ __volatile__("dsb ish" ::: "memory"); } + ALWAYS_INLINE void DataSynchronizationBarrierInnerShareableStore() { + __asm__ __volatile__("dsb ishst" ::: "memory"); + } + ALWAYS_INLINE void DataMemoryBarrier() { __asm__ __volatile__("dmb sy" ::: "memory"); } @@ -56,16 +60,20 @@ namespace ams::kern::arch::arm64::cpu { __asm__ __volatile__("dmb ish" ::: "memory"); } + ALWAYS_INLINE void DataMemoryBarrierInnerShareableStore() { + __asm__ __volatile__("dmb ishst" ::: "memory"); + } + ALWAYS_INLINE void InstructionMemoryBarrier() { __asm__ __volatile__("isb" ::: "memory"); } - ALWAYS_INLINE void EnsureInstructionConsistencyInnerShareable() { + ALWAYS_INLINE void EnsureInstructionConsistency() { DataSynchronizationBarrierInnerShareable(); InstructionMemoryBarrier(); } - ALWAYS_INLINE void EnsureInstructionConsistency() { + ALWAYS_INLINE void EnsureInstructionConsistencyFullSystem() { DataSynchronizationBarrier(); InstructionMemoryBarrier(); } @@ -182,28 +190,23 @@ namespace ams::kern::arch::arm64::cpu { NOINLINE void SynchronizeAllCores(); /* Cache management helpers. */ - void StoreEntireCacheForInit(); - void FlushEntireCacheForInit(); + void StoreCacheForInit(void *addr, size_t size); void FlushEntireDataCache(); Result InvalidateDataCache(void *addr, size_t size); Result StoreDataCache(const void *addr, size_t size); Result FlushDataCache(const void *addr, size_t size); - Result InvalidateInstructionCache(void *addr, size_t size); void InvalidateEntireInstructionCache(); + void ClearPageToZeroImpl(void *); + ALWAYS_INLINE void ClearPageToZero(void * const page) { MESOSPHERE_ASSERT(util::IsAligned(reinterpret_cast(page), PageSize)); MESOSPHERE_ASSERT(page != nullptr); - uintptr_t cur = reinterpret_cast(__builtin_assume_aligned(page, PageSize)); - const uintptr_t last = cur + PageSize - DataCacheLineSize; - - for (/* ... */; cur <= last; cur += DataCacheLineSize) { - __asm__ __volatile__("dc zva, %[cur]" :: [cur]"r"(cur) : "memory"); - } + ClearPageToZeroImpl(page); } ALWAYS_INLINE void InvalidateTlbByAsid(u32 asid) { @@ -223,20 +226,15 @@ namespace ams::kern::arch::arm64::cpu { EnsureInstructionConsistency(); } - ALWAYS_INLINE void InvalidateEntireTlbInnerShareable() { - __asm__ __volatile__("tlbi vmalle1is" ::: "memory"); - EnsureInstructionConsistencyInnerShareable(); - } - ALWAYS_INLINE void InvalidateEntireTlbDataOnly() { __asm__ __volatile__("tlbi vmalle1is" ::: "memory"); - DataSynchronizationBarrier(); + DataSynchronizationBarrierInnerShareable(); } ALWAYS_INLINE void InvalidateTlbByVaDataOnly(KProcessAddress virt_addr) { const u64 value = ((GetInteger(virt_addr) >> 12) & 0xFFFFFFFFFFFul); __asm__ __volatile__("tlbi vaae1is, %[value]" :: [value]"r"(value) : "memory"); - DataSynchronizationBarrier(); + DataSynchronizationBarrierInnerShareable(); } ALWAYS_INLINE uintptr_t GetCurrentThreadPointerValue() { diff --git a/libmesosphere/include/mesosphere/arch/arm64/kern_k_interrupt_manager.hpp b/libmesosphere/include/mesosphere/arch/arm64/kern_k_interrupt_manager.hpp index ae5b6051..2a0c1560 100644 --- a/libmesosphere/include/mesosphere/arch/arm64/kern_k_interrupt_manager.hpp +++ b/libmesosphere/include/mesosphere/arch/arm64/kern_k_interrupt_manager.hpp @@ -96,8 +96,6 @@ namespace ams::kern::arch::arm64 { } static void HandleInterrupt(bool user_mode); - - /* Implement more KInterruptManager functionality. */ private: Result BindGlobal(KInterruptHandler *handler, s32 irq, s32 core_id, s32 priority, bool manual_clear, bool level); Result BindLocal(KInterruptHandler *handler, s32 irq, s32 priority, bool manual_clear); diff --git a/libmesosphere/include/mesosphere/arch/arm64/kern_k_page_table.hpp b/libmesosphere/include/mesosphere/arch/arm64/kern_k_page_table.hpp index efaf8a7a..334ec014 100644 --- a/libmesosphere/include/mesosphere/arch/arm64/kern_k_page_table.hpp +++ b/libmesosphere/include/mesosphere/arch/arm64/kern_k_page_table.hpp @@ -174,7 +174,6 @@ namespace ams::kern::arch::arm64 { static NOINLINE void Initialize(s32 core_id); ALWAYS_INLINE void Activate(u32 proc_id) { - cpu::DataSynchronizationBarrier(); cpu::SwitchProcess(m_ttbr, proc_id); } @@ -219,12 +218,13 @@ namespace ams::kern::arch::arm64 { Result ChangePermissions(KProcessAddress virt_addr, size_t num_pages, PageTableEntry entry_template, DisableMergeAttribute disable_merge_attr, bool refresh_mapping, PageLinkedList *page_list, bool reuse_ll); - static ALWAYS_INLINE void PteDataSynchronizationBarrier() { - cpu::DataSynchronizationBarrierInnerShareable(); + static ALWAYS_INLINE void PteDataMemoryBarrier() { + cpu::DataMemoryBarrierInnerShareableStore(); } static ALWAYS_INLINE void ClearPageTable(KVirtualAddress table) { cpu::ClearPageToZero(GetVoidPointer(table)); + cpu::DataSynchronizationBarrierInnerShareable(); } ALWAYS_INLINE void OnTableUpdated() const { @@ -239,22 +239,8 @@ namespace ams::kern::arch::arm64 { cpu::InvalidateTlbByVaDataOnly(virt_addr); } - ALWAYS_INLINE void NoteUpdated() const { - cpu::DataSynchronizationBarrier(); - - if (this->IsKernel()) { - this->OnKernelTableUpdated(); - } else { - this->OnTableUpdated(); - } - } - - ALWAYS_INLINE void NoteSingleKernelPageUpdated(KProcessAddress virt_addr) const { - MESOSPHERE_ASSERT(this->IsKernel()); - - cpu::DataSynchronizationBarrier(); - this->OnKernelTableSinglePageUpdated(virt_addr); - } + void NoteUpdated() const; + void NoteSingleKernelPageUpdated(KProcessAddress virt_addr) const; KVirtualAddress AllocatePageTable(PageLinkedList *page_list, bool reuse_ll) const { KVirtualAddress table = this->GetPageTableManager().Allocate(); diff --git a/libmesosphere/include/mesosphere/arch/arm64/kern_userspace_memory_access.hpp b/libmesosphere/include/mesosphere/arch/arm64/kern_userspace_memory_access.hpp index 7245831c..202be5f5 100644 --- a/libmesosphere/include/mesosphere/arch/arm64/kern_userspace_memory_access.hpp +++ b/libmesosphere/include/mesosphere/arch/arm64/kern_userspace_memory_access.hpp @@ -46,7 +46,6 @@ namespace ams::kern::arch::arm64 { static bool StoreDataCache(uintptr_t start, uintptr_t end); static bool FlushDataCache(uintptr_t start, uintptr_t end); static bool InvalidateDataCache(uintptr_t start, uintptr_t end); - static bool InvalidateInstructionCache(uintptr_t start, uintptr_t end); static bool ReadIoMemory32Bit(void *dst, const void *src, size_t size); static bool ReadIoMemory16Bit(void *dst, const void *src, size_t size); diff --git a/libmesosphere/include/mesosphere/kern_k_scheduler_impls.hpp b/libmesosphere/include/mesosphere/kern_k_scheduler_impls.hpp index effaf048..e7071ab0 100644 --- a/libmesosphere/include/mesosphere/kern_k_scheduler_impls.hpp +++ b/libmesosphere/include/mesosphere/kern_k_scheduler_impls.hpp @@ -35,7 +35,7 @@ namespace ams::kern { ALWAYS_INLINE void KScheduler::RescheduleOtherCores(u64 cores_needing_scheduling) { if (const u64 core_mask = cores_needing_scheduling & ~(1ul << m_core_id); core_mask != 0) { - cpu::DataSynchronizationBarrier(); + cpu::DataSynchronizationBarrierInnerShareable(); Kernel::GetInterruptManager().SendInterProcessorInterrupt(KInterruptName_Scheduler, core_mask); } } diff --git a/libmesosphere/source/arch/arm64/kern_cpu.cpp b/libmesosphere/source/arch/arm64/kern_cpu.cpp index 15fecd44..759a628f 100644 --- a/libmesosphere/source/arch/arm64/kern_cpu.cpp +++ b/libmesosphere/source/arch/arm64/kern_cpu.cpp @@ -176,7 +176,7 @@ namespace ams::kern::arch::arm64::cpu { const u64 target_mask = m_target_cores.Load(); - DataSynchronizationBarrier(); + DataSynchronizationBarrierInnerShareable(); Kernel::GetInterruptManager().SendInterProcessorInterrupt(KInterruptName_CacheOperation, target_mask); this->ProcessOperation(); @@ -213,32 +213,37 @@ namespace ams::kern::arch::arm64::cpu { }; /* Instances of the interrupt handlers. */ - KThreadTerminationInterruptHandler g_thread_termination_handler; - KCacheHelperInterruptHandler g_cache_operation_handler; - KPerformanceCounterInterruptHandler g_performance_counter_handler[cpu::NumCores]; + constinit KThreadTerminationInterruptHandler g_thread_termination_handler; + constinit KCacheHelperInterruptHandler g_cache_operation_handler; + constinit KPerformanceCounterInterruptHandler g_performance_counter_handler[cpu::NumCores]; /* Expose this as a global, for asm to use. */ - s32 g_all_core_sync_count; + constinit s32 g_all_core_sync_count; - template + template ALWAYS_INLINE void PerformCacheOperationBySetWayImpl(int level, F f) { /* Used in multiple locations. */ const u64 level_sel_value = static_cast(level << 1); + /* Get the cache size id register value with interrupts disabled. */ u64 ccsidr_value; - if constexpr (Init) { - /* During init, we can just set the selection register directly. */ - cpu::SetCsselrEl1(level_sel_value); - cpu::InstructionMemoryBarrier(); - ccsidr_value = cpu::GetCcsidrEl1(); - } else { - /* After init, we need to care about interrupts. */ + { + /* Disable interrupts. */ KScopedInterruptDisable di; + + /* Configure the cache select register for our level. */ cpu::SetCsselrEl1(level_sel_value); + + /* Ensure our configuration takes before reading the cache size id register. */ cpu::InstructionMemoryBarrier(); + + /* Get the cache size id register. */ ccsidr_value = cpu::GetCcsidrEl1(); } + /* Ensure that no memory inconsistencies occur between cache management invocations. */ + cpu::DataSynchronizationBarrier(); + /* Get cache size id info. */ CacheSizeIdRegisterAccessor ccsidr_el1(ccsidr_value); const int num_sets = ccsidr_el1.GetNumberOfSets(); @@ -266,13 +271,11 @@ namespace ams::kern::arch::arm64::cpu { } void StoreDataCacheBySetWay(int level) { - PerformCacheOperationBySetWayImpl(level, StoreDataCacheLineBySetWayImpl); - cpu::DataSynchronizationBarrier(); + PerformCacheOperationBySetWayImpl(level, StoreDataCacheLineBySetWayImpl); } void FlushDataCacheBySetWay(int level) { - PerformCacheOperationBySetWayImpl(level, FlushDataCacheLineBySetWayImpl); - cpu::DataSynchronizationBarrier(); + PerformCacheOperationBySetWayImpl(level, FlushDataCacheLineBySetWayImpl); } void KCacheHelperInterruptHandler::ProcessOperation() { @@ -284,9 +287,11 @@ namespace ams::kern::arch::arm64::cpu { break; case Operation::StoreDataCache: StoreDataCacheBySetWay(0); + cpu::DataSynchronizationBarrier(); break; case Operation::FlushDataCache: FlushDataCacheBySetWay(0); + cpu::DataSynchronizationBarrier(); break; } } @@ -323,14 +328,6 @@ namespace ams::kern::arch::arm64::cpu { R_SUCCEED(); } - ALWAYS_INLINE Result InvalidateInstructionCacheRange(uintptr_t start, uintptr_t end) { - MESOSPHERE_ASSERT(util::IsAligned(start, InstructionCacheLineSize)); - MESOSPHERE_ASSERT(util::IsAligned(end, InstructionCacheLineSize)); - R_UNLESS(UserspaceAccess::InvalidateInstructionCache(start, end), svc::ResultInvalidCurrentMemory()); - EnsureInstructionConsistency(); - R_SUCCEED(); - } - ALWAYS_INLINE void InvalidateEntireInstructionCacheLocalImpl() { __asm__ __volatile__("ic iallu" ::: "memory"); } @@ -341,26 +338,12 @@ namespace ams::kern::arch::arm64::cpu { } - void StoreEntireCacheForInit() { - /* Store local. */ - { - CacheLineIdRegisterAccessor clidr_el1; - const int levels_of_unification = clidr_el1.GetLevelsOfUnification(); - - for (int level = 0; level != levels_of_unification; ++level) { - PerformCacheOperationBySetWayImpl(level, StoreDataCacheLineBySetWayImpl); - } - } - - /* Store shared. */ - { - CacheLineIdRegisterAccessor clidr_el1; - const int levels_of_coherency = clidr_el1.GetLevelsOfCoherency(); - const int levels_of_unification = clidr_el1.GetLevelsOfUnification(); - - for (int level = levels_of_unification; level <= levels_of_coherency; ++level) { - PerformCacheOperationBySetWayImpl(level, StoreDataCacheLineBySetWayImpl); - } + void StoreCacheForInit(void *addr, size_t size) { + /* Store the data cache for the specified range. */ + const uintptr_t start = util::AlignDown(reinterpret_cast(addr), DataCacheLineSize); + const uintptr_t end = start + size; + for (uintptr_t cur = start; cur < end; cur += DataCacheLineSize) { + __asm__ __volatile__("dc cvac, %[cur]" :: [cur]"r"(cur) : "memory"); } /* Data synchronization barrier. */ @@ -370,36 +353,7 @@ namespace ams::kern::arch::arm64::cpu { InvalidateEntireInstructionCacheLocalImpl(); /* Ensure local instruction consistency. */ - DataSynchronizationBarrierInnerShareable(); - InstructionMemoryBarrier(); - } - - void FlushEntireCacheForInit() { - /* Flush data cache. */ - { - /* Get levels of coherence/unificaiton. */ - CacheLineIdRegisterAccessor clidr_el1; - const int levels_of_coherency = clidr_el1.GetLevelsOfCoherency(); - - /* Store cache from L1 up to (level of coherence - 1). */ - for (int level = 0; level < levels_of_coherency - 1; ++level) { - PerformCacheOperationBySetWayImpl(level, StoreDataCacheLineBySetWayImpl); - DataSynchronizationBarrier(); - } - - /* Flush cache from (level of coherence - 1) down to L0. */ - for (int level = levels_of_coherency; level > 0; --level) { - PerformCacheOperationBySetWayImpl(level - 1, FlushDataCacheLineBySetWayImpl); - DataSynchronizationBarrier(); - } - } - - /* Invalidate instruction cache. */ - InvalidateEntireInstructionCacheLocalImpl(); EnsureInstructionConsistency(); - - /* Invalidate entire TLB. */ - InvalidateEntireTlb(); } void FlushEntireDataCache() { @@ -417,10 +371,17 @@ namespace ams::kern::arch::arm64::cpu { for (int level = levels_of_coherency; level > 1; --level) { FlushDataCacheBySetWay(level - 1); } + + /* Data synchronization barrier for full system. */ + DataSynchronizationBarrier(); } Result InvalidateDataCache(void *addr, size_t size) { - KScopedCoreMigrationDisable dm; + /* Mark ourselves as in a cache maintenance operation, and prevent re-ordering. */ + __asm__ __volatile__("" ::: "memory"); + GetCurrentThread().SetInCacheMaintenanceOperation(); + ON_SCOPE_EXIT { GetCurrentThread().ClearInCacheMaintenanceOperation(); __asm__ __volatile__("" ::: "memory"); }; + const uintptr_t start = reinterpret_cast(addr); const uintptr_t end = start + size; uintptr_t aligned_start = util::AlignDown(start, DataCacheLineSize); @@ -444,7 +405,11 @@ namespace ams::kern::arch::arm64::cpu { } Result StoreDataCache(const void *addr, size_t size) { - KScopedCoreMigrationDisable dm; + /* Mark ourselves as in a cache maintenance operation, and prevent re-ordering. */ + __asm__ __volatile__("" ::: "memory"); + GetCurrentThread().SetInCacheMaintenanceOperation(); + ON_SCOPE_EXIT { GetCurrentThread().ClearInCacheMaintenanceOperation(); __asm__ __volatile__("" ::: "memory"); }; + const uintptr_t start = util::AlignDown(reinterpret_cast(addr), DataCacheLineSize); const uintptr_t end = util::AlignUp( reinterpret_cast(addr) + size, DataCacheLineSize); @@ -452,26 +417,17 @@ namespace ams::kern::arch::arm64::cpu { } Result FlushDataCache(const void *addr, size_t size) { - KScopedCoreMigrationDisable dm; + /* Mark ourselves as in a cache maintenance operation, and prevent re-ordering. */ + __asm__ __volatile__("" ::: "memory"); + GetCurrentThread().SetInCacheMaintenanceOperation(); + ON_SCOPE_EXIT { GetCurrentThread().ClearInCacheMaintenanceOperation(); __asm__ __volatile__("" ::: "memory"); }; + const uintptr_t start = util::AlignDown(reinterpret_cast(addr), DataCacheLineSize); const uintptr_t end = util::AlignUp( reinterpret_cast(addr) + size, DataCacheLineSize); R_RETURN(FlushDataCacheRange(start, end)); } - Result InvalidateInstructionCache(void *addr, size_t size) { - KScopedCoreMigrationDisable dm; - const uintptr_t start = util::AlignDown(reinterpret_cast(addr), InstructionCacheLineSize); - const uintptr_t end = util::AlignUp( reinterpret_cast(addr) + size, InstructionCacheLineSize); - - R_TRY(InvalidateInstructionCacheRange(start, end)); - - /* Request the interrupt helper to perform an instruction memory barrier. */ - g_cache_operation_handler.RequestOperation(KCacheHelperInterruptHandler::Operation::InstructionMemoryBarrier); - - R_SUCCEED(); - } - void InvalidateEntireInstructionCache() { KScopedCoreMigrationDisable dm; diff --git a/libmesosphere/source/arch/arm64/kern_cpu_asm.s b/libmesosphere/source/arch/arm64/kern_cpu_asm.s index 1de732d6..3c6ec40c 100644 --- a/libmesosphere/source/arch/arm64/kern_cpu_asm.s +++ b/libmesosphere/source/arch/arm64/kern_cpu_asm.s @@ -61,3 +61,138 @@ _ZN3ams4kern4arch5arm643cpu23SynchronizeAllCoresImplEPii: 5: stlr wzr, [x0] ret + +/* ams::kern::arch::arm64::cpu::ClearPageToZeroImpl(void *) */ +.section .text._ZN3ams4kern4arch5arm643cpu19ClearPageToZeroImplEPv, "ax", %progbits +.global _ZN3ams4kern4arch5arm643cpu19ClearPageToZeroImplEPv +.type _ZN3ams4kern4arch5arm643cpu19ClearPageToZeroImplEPv, %function +_ZN3ams4kern4arch5arm643cpu19ClearPageToZeroImplEPv: + /* Efficiently clear the page using dc zva. */ + dc zva, x0 + add x8, x0, #0x040 + dc zva, x8 + add x8, x0, #0x080 + dc zva, x8 + add x8, x0, #0x0c0 + dc zva, x8 + add x8, x0, #0x100 + dc zva, x8 + add x8, x0, #0x140 + dc zva, x8 + add x8, x0, #0x180 + dc zva, x8 + add x8, x0, #0x1c0 + dc zva, x8 + add x8, x0, #0x200 + dc zva, x8 + add x8, x0, #0x240 + dc zva, x8 + add x8, x0, #0x280 + dc zva, x8 + add x8, x0, #0x2c0 + dc zva, x8 + add x8, x0, #0x300 + dc zva, x8 + add x8, x0, #0x340 + dc zva, x8 + add x8, x0, #0x380 + dc zva, x8 + add x8, x0, #0x3c0 + dc zva, x8 + add x8, x0, #0x400 + dc zva, x8 + add x8, x0, #0x440 + dc zva, x8 + add x8, x0, #0x480 + dc zva, x8 + add x8, x0, #0x4c0 + dc zva, x8 + add x8, x0, #0x500 + dc zva, x8 + add x8, x0, #0x540 + dc zva, x8 + add x8, x0, #0x580 + dc zva, x8 + add x8, x0, #0x5c0 + dc zva, x8 + add x8, x0, #0x600 + dc zva, x8 + add x8, x0, #0x640 + dc zva, x8 + add x8, x0, #0x680 + dc zva, x8 + add x8, x0, #0x6c0 + dc zva, x8 + add x8, x0, #0x700 + dc zva, x8 + add x8, x0, #0x740 + dc zva, x8 + add x8, x0, #0x780 + dc zva, x8 + add x8, x0, #0x7c0 + dc zva, x8 + add x8, x0, #0x800 + dc zva, x8 + add x8, x0, #0x840 + dc zva, x8 + add x8, x0, #0x880 + dc zva, x8 + add x8, x0, #0x8c0 + dc zva, x8 + add x8, x0, #0x900 + dc zva, x8 + add x8, x0, #0x940 + dc zva, x8 + add x8, x0, #0x980 + dc zva, x8 + add x8, x0, #0x9c0 + dc zva, x8 + add x8, x0, #0xa00 + dc zva, x8 + add x8, x0, #0xa40 + dc zva, x8 + add x8, x0, #0xa80 + dc zva, x8 + add x8, x0, #0xac0 + dc zva, x8 + add x8, x0, #0xb00 + dc zva, x8 + add x8, x0, #0xb40 + dc zva, x8 + add x8, x0, #0xb80 + dc zva, x8 + add x8, x0, #0xbc0 + dc zva, x8 + add x8, x0, #0xc00 + dc zva, x8 + add x8, x0, #0xc40 + dc zva, x8 + add x8, x0, #0xc80 + dc zva, x8 + add x8, x0, #0xcc0 + dc zva, x8 + add x8, x0, #0xd00 + dc zva, x8 + add x8, x0, #0xd40 + dc zva, x8 + add x8, x0, #0xd80 + dc zva, x8 + add x8, x0, #0xdc0 + dc zva, x8 + add x8, x0, #0xe00 + dc zva, x8 + add x8, x0, #0xe40 + dc zva, x8 + add x8, x0, #0xe80 + dc zva, x8 + add x8, x0, #0xec0 + dc zva, x8 + add x8, x0, #0xf00 + dc zva, x8 + add x8, x0, #0xf40 + dc zva, x8 + add x8, x0, #0xf80 + dc zva, x8 + add x8, x0, #0xfc0 + dc zva, x8 + ret diff --git a/libmesosphere/source/arch/arm64/kern_k_debug.cpp b/libmesosphere/source/arch/arm64/kern_k_debug.cpp index 690e3134..032cdea3 100644 --- a/libmesosphere/source/arch/arm64/kern_k_debug.cpp +++ b/libmesosphere/source/arch/arm64/kern_k_debug.cpp @@ -257,21 +257,21 @@ namespace ams::kern::arch::arm64 { #define MESOSPHERE_SET_HW_BREAK_POINT(ID, FLAGS, VALUE) \ ({ \ cpu::SetDbgBcr##ID##El1(0); \ - cpu::EnsureInstructionConsistency(); \ + cpu::EnsureInstructionConsistencyFullSystem(); \ cpu::SetDbgBvr##ID##El1(VALUE); \ - cpu::EnsureInstructionConsistency(); \ + cpu::EnsureInstructionConsistencyFullSystem(); \ cpu::SetDbgBcr##ID##El1(FLAGS); \ - cpu::EnsureInstructionConsistency(); \ + cpu::EnsureInstructionConsistencyFullSystem(); \ }) #define MESOSPHERE_SET_HW_WATCH_POINT(ID, FLAGS, VALUE) \ ({ \ cpu::SetDbgWcr##ID##El1(0); \ - cpu::EnsureInstructionConsistency(); \ + cpu::EnsureInstructionConsistencyFullSystem(); \ cpu::SetDbgWvr##ID##El1(VALUE); \ - cpu::EnsureInstructionConsistency(); \ + cpu::EnsureInstructionConsistencyFullSystem(); \ cpu::SetDbgWcr##ID##El1(FLAGS); \ - cpu::EnsureInstructionConsistency(); \ + cpu::EnsureInstructionConsistencyFullSystem(); \ }) Result KDebug::SetHardwareBreakPoint(ams::svc::HardwareBreakPointRegisterName name, u64 flags, u64 value) { diff --git a/libmesosphere/source/arch/arm64/kern_k_page_table.cpp b/libmesosphere/source/arch/arm64/kern_k_page_table.cpp index 66fec8b8..eaf88fdc 100644 --- a/libmesosphere/source/arch/arm64/kern_k_page_table.cpp +++ b/libmesosphere/source/arch/arm64/kern_k_page_table.cpp @@ -158,6 +158,32 @@ namespace ams::kern::arch::arm64 { } + ALWAYS_INLINE void KPageTable::NoteUpdated() const { + cpu::DataSynchronizationBarrierInnerShareableStore(); + + /* Mark ourselves as in a tlb maintenance operation. */ + GetCurrentThread().SetInTlbMaintenanceOperation(); + ON_SCOPE_EXIT { GetCurrentThread().ClearInTlbMaintenanceOperation(); __asm__ __volatile__("" ::: "memory"); }; + + if (this->IsKernel()) { + this->OnKernelTableUpdated(); + } else { + this->OnTableUpdated(); + } + } + + ALWAYS_INLINE void KPageTable::NoteSingleKernelPageUpdated(KProcessAddress virt_addr) const { + MESOSPHERE_ASSERT(this->IsKernel()); + + cpu::DataSynchronizationBarrierInnerShareableStore(); + + /* Mark ourselves as in a tlb maintenance operation. */ + GetCurrentThread().SetInTlbMaintenanceOperation(); + ON_SCOPE_EXIT { GetCurrentThread().ClearInTlbMaintenanceOperation(); __asm__ __volatile__("" ::: "memory"); }; + + this->OnKernelTableSinglePageUpdated(virt_addr); + } + void KPageTable::Initialize(s32 core_id) { /* Nothing actually needed here. */ MESOSPHERE_UNUSED(core_id); @@ -412,9 +438,8 @@ namespace ams::kern::arch::arm64 { /* Set the entry. */ l2_phys = GetPageTablePhysicalAddress(l2_virt); - PteDataSynchronizationBarrier(); + PteDataMemoryBarrier(); *l1_entry = L1PageTableEntry(PageTableEntry::TableTag{}, l2_phys, this->IsKernel(), true); - PteDataSynchronizationBarrier(); } else { l2_virt = GetPageTableVirtualAddress(l2_phys); } @@ -477,9 +502,8 @@ namespace ams::kern::arch::arm64 { /* Set the entry. */ l2_phys = GetPageTablePhysicalAddress(l2_virt); - PteDataSynchronizationBarrier(); + PteDataMemoryBarrier(); *l1_entry = L1PageTableEntry(PageTableEntry::TableTag{}, l2_phys, this->IsKernel(), true); - PteDataSynchronizationBarrier(); l2_allocated = true; } else { l2_virt = GetPageTableVirtualAddress(l2_phys); @@ -505,9 +529,8 @@ namespace ams::kern::arch::arm64 { /* Set the entry. */ l3_phys = GetPageTablePhysicalAddress(l3_virt); - PteDataSynchronizationBarrier(); + PteDataMemoryBarrier(); *l2_entry = L2PageTableEntry(PageTableEntry::TableTag{}, l3_phys, this->IsKernel(), true); - PteDataSynchronizationBarrier(); l2_open_count++; } else { l3_virt = GetPageTableVirtualAddress(l3_phys); @@ -631,7 +654,7 @@ namespace ams::kern::arch::arm64 { for (size_t i = 0; i < num_l2_blocks; i++) { *impl.GetL2EntryFromTable(l2_virt, virt_addr + L2BlockSize * i) = InvalidL2PageTableEntry; } - PteDataSynchronizationBarrier(); + PteDataMemoryBarrier(); /* Close references to the L2 table. */ if (this->GetPageTableManager().IsInPageTableHeap(l2_virt)) { @@ -665,7 +688,7 @@ namespace ams::kern::arch::arm64 { for (size_t i = 0; i < num_l3_blocks; i++) { *impl.GetL3EntryFromTable(l3_virt, virt_addr + L3BlockSize * i) = InvalidL3PageTableEntry; } - PteDataSynchronizationBarrier(); + PteDataMemoryBarrier(); /* Close references to the L3 table. */ if (this->GetPageTableManager().IsInPageTableHeap(l3_virt)) { @@ -783,6 +806,9 @@ namespace ams::kern::arch::arm64 { this->MergePages(orig_virt_addr + (num_pages - 1) * PageSize, page_list); } + /* Wait for pending stores to complete. */ + cpu::DataSynchronizationBarrierInnerShareableStore(); + /* Open references to the pages, if we should. */ if (IsHeapPhysicalAddress(orig_phys_addr)) { Kernel::GetMemoryManager().Open(orig_phys_addr, num_pages); @@ -878,6 +904,9 @@ namespace ams::kern::arch::arm64 { this->MergePages(orig_virt_addr + (num_pages - 1) * PageSize, page_list); } + /* Wait for pending stores to complete. */ + cpu::DataSynchronizationBarrierInnerShareableStore(); + /* We succeeded! We want to persist the reference to the pages. */ spg.CancelClose(); R_SUCCEED(); @@ -967,7 +996,6 @@ namespace ams::kern::arch::arm64 { auto sw_reserved_bits = PageTableEntry::EncodeSoftwareReservedBits(head_entry->IsHeadMergeDisabled(), head_entry->IsHeadAndBodyMergeDisabled(), tail_entry->IsTailMergeDisabled()); /* Merge! */ - PteDataSynchronizationBarrier(); *l2_entry = L2PageTableEntry(PageTableEntry::BlockTag{}, phys_addr, PageTableEntry(entry_template), sw_reserved_bits, false); /* Note that we updated. */ @@ -1049,7 +1077,6 @@ namespace ams::kern::arch::arm64 { auto sw_reserved_bits = PageTableEntry::EncodeSoftwareReservedBits(head_entry->IsHeadMergeDisabled(), head_entry->IsHeadAndBodyMergeDisabled(), tail_entry->IsTailMergeDisabled()); /* Merge! */ - /* NOTE: As of 13.1.0, Nintendo does not do: PteDataSynchronizationBarrier(); */ *l1_entry = L1PageTableEntry(PageTableEntry::BlockTag{}, phys_addr, PageTableEntry(entry_template), sw_reserved_bits, false); /* Note that we updated. */ @@ -1097,7 +1124,7 @@ namespace ams::kern::arch::arm64 { this->GetPageTableManager().Open(l2_table, L1BlockSize / L2BlockSize); /* Replace the L1 entry with one to the new table. */ - PteDataSynchronizationBarrier(); + PteDataMemoryBarrier(); *l1_entry = L1PageTableEntry(PageTableEntry::TableTag{}, l2_phys, this->IsKernel(), true); this->NoteUpdated(); } @@ -1147,7 +1174,7 @@ namespace ams::kern::arch::arm64 { this->GetPageTableManager().Open(l3_table, L2BlockSize / L3BlockSize); /* Replace the L2 entry with one to the new table. */ - PteDataSynchronizationBarrier(); + PteDataMemoryBarrier(); *l2_entry = L2PageTableEntry(PageTableEntry::TableTag{}, l3_phys, this->IsKernel(), true); this->NoteUpdated(); } diff --git a/libmesosphere/source/arch/arm64/kern_userspace_memory_access_asm.s b/libmesosphere/source/arch/arm64/kern_userspace_memory_access_asm.s index 8a64750a..660d5a4e 100644 --- a/libmesosphere/source/arch/arm64/kern_userspace_memory_access_asm.s +++ b/libmesosphere/source/arch/arm64/kern_userspace_memory_access_asm.s @@ -577,26 +577,6 @@ _ZN3ams4kern4arch5arm6415UserspaceAccess19InvalidateDataCacheEmm: mov x0, #1 ret -/* ams::kern::arch::arm64::UserspaceAccess::InvalidateInstructionCache(uintptr_t start, uintptr_t end) */ -.section .text._ZN3ams4kern4arch5arm6415UserspaceAccess26InvalidateInstructionCacheEmm, "ax", %progbits -.global _ZN3ams4kern4arch5arm6415UserspaceAccess26InvalidateInstructionCacheEmm -.type _ZN3ams4kern4arch5arm6415UserspaceAccess26InvalidateInstructionCacheEmm, %function -.balign 0x10 -_ZN3ams4kern4arch5arm6415UserspaceAccess26InvalidateInstructionCacheEmm: - /* Check if we have any work to do. */ - cmp x1, x0 - b.eq 2f - -1: /* Loop, invalidating each cache line. */ - ic ivau, x0 - add x0, x0, #0x40 - cmp x1, x0 - b.ne 1b - -2: /* We're done! */ - mov x0, #1 - ret - /* ams::kern::arch::arm64::UserspaceAccess::ReadIoMemory32Bit(void *dst, const void *src, size_t size) */ .section .text._ZN3ams4kern4arch5arm6415UserspaceAccess17ReadIoMemory32BitEPvPKvm, "ax", %progbits .global _ZN3ams4kern4arch5arm6415UserspaceAccess17ReadIoMemory32BitEPvPKvm diff --git a/libmesosphere/source/board/nintendo/nx/kern_k_sleep_manager_asm.s b/libmesosphere/source/board/nintendo/nx/kern_k_sleep_manager_asm.s index 9864c1c1..fca5455c 100644 --- a/libmesosphere/source/board/nintendo/nx/kern_k_sleep_manager_asm.s +++ b/libmesosphere/source/board/nintendo/nx/kern_k_sleep_manager_asm.s @@ -278,6 +278,9 @@ _ZN3ams4kern5board8nintendo2nx13KSleepManager11ResumeEntryEm: .global _ZN3ams4kern5board8nintendo2nx13KSleepManager33InvalidateDataCacheForResumeEntryEm .type _ZN3ams4kern5board8nintendo2nx13KSleepManager33InvalidateDataCacheForResumeEntryEm, %function _ZN3ams4kern5board8nintendo2nx13KSleepManager33InvalidateDataCacheForResumeEntryEm: + /* cpu::DataSynchronizationBarrier(); */ + dsb sy + /* const u64 level_sel_value = level << 1; */ lsl x8, x0, #1 diff --git a/libmesosphere/source/kern_k_initial_process_reader.cpp b/libmesosphere/source/kern_k_initial_process_reader.cpp index a7c272dc..ff5f21a6 100644 --- a/libmesosphere/source/kern_k_initial_process_reader.cpp +++ b/libmesosphere/source/kern_k_initial_process_reader.cpp @@ -179,13 +179,7 @@ namespace ams::kern { } } - /* Flush caches. */ - /* NOTE: This seems incorrect according to arm spec, which says not to flush via set/way after boot. */ - /* However, Nintendo flushes the entire cache here and not doing so has caused reports of abort with ESR_EL1 */ - /* as 0x02000000 (unknown abort) to occur. */ MESOSPHERE_UNUSED(params); - cpu::FlushEntireDataCache(); - cpu::InvalidateEntireInstructionCache(); R_SUCCEED(); } diff --git a/libmesosphere/source/kern_k_thread.cpp b/libmesosphere/source/kern_k_thread.cpp index 38f7f8a8..c4d69249 100644 --- a/libmesosphere/source/kern_k_thread.cpp +++ b/libmesosphere/source/kern_k_thread.cpp @@ -1315,7 +1315,7 @@ namespace ams::kern { /* If the thread is runnable, send a termination interrupt to other cores. */ if (this->GetState() == ThreadState_Runnable) { if (const u64 core_mask = m_physical_affinity_mask.GetAffinityMask() & ~(1ul << GetCurrentCoreId()); core_mask != 0) { - cpu::DataSynchronizationBarrier(); + cpu::DataSynchronizationBarrierInnerShareable(); Kernel::GetInterruptManager().SendInterProcessorInterrupt(KInterruptName_ThreadTerminate, core_mask); } } diff --git a/libstratosphere/source/os/impl/os_cache_impl.os.horizon.hpp b/libstratosphere/source/os/impl/os_cache_impl.os.horizon.hpp index 46562b20..a6fbb197 100644 --- a/libstratosphere/source/os/impl/os_cache_impl.os.horizon.hpp +++ b/libstratosphere/source/os/impl/os_cache_impl.os.horizon.hpp @@ -32,6 +32,13 @@ namespace ams::os::impl { /* Calculate cache line size. */ cache_line_size = 4 << ((cache_type_register >> 16) & 0xF); + /* Get the thread local region. */ + auto * const tlr = svc::GetThreadLocalRegion(); + + /* Note to the kernel that we're performing cache maintenance, in case we get interrupted while touching cache lines. */ + tlr->cache_maintenance_flag = 1; + ON_SCOPE_EXIT { tlr->cache_maintenance_flag = 0; } + /* Iterate, flushing cache lines. */ for (uintptr_t cur = reinterpret_cast(addr) & ~(cache_line_size - 1); cur < end_addr; cur += cache_line_size) { __asm__ __volatile__ ("dc civac, %[cur]" :: [cur]"r"(cur)); diff --git a/libvapours/source/dd/impl/dd_cache_impl.os.horizon.hpp b/libvapours/source/dd/impl/dd_cache_impl.os.horizon.hpp index 447cd91e..5c94c2a8 100644 --- a/libvapours/source/dd/impl/dd_cache_impl.os.horizon.hpp +++ b/libvapours/source/dd/impl/dd_cache_impl.os.horizon.hpp @@ -31,6 +31,15 @@ namespace ams::dd::impl { __asm__ __volatile__("mrs %[ctr_el0], ctr_el0" : [ctr_el0]"=r"(ctr_el0)); const uintptr_t cache_line_size = 4 << ((ctr_el0 >> 16) & 0xF); + #if defined(ATMOSPHERE_IS_STRATOSPHERE) + /* Get the thread local region. */ + auto * const tlr = svc::GetThreadLocalRegion(); + + /* Note to the kernel that we're performing cache maintenance, in case we get interrupted while touching cache lines. */ + tlr->cache_maintenance_flag = 1; + ON_SCOPE_EXIT { tlr->cache_maintenance_flag = 0; } + #endif + /* Invalidate the cache. */ const uintptr_t start_addr = reinterpret_cast(addr) & ~(cache_line_size - 1); const uintptr_t end_addr = reinterpret_cast(addr) + size; @@ -62,6 +71,15 @@ namespace ams::dd::impl { __asm__ __volatile__("mrs %[ctr_el0], ctr_el0" : [ctr_el0]"=r"(ctr_el0)); const uintptr_t cache_line_size = 4 << ((ctr_el0 >> 16) & 0xF); + #if defined(ATMOSPHERE_IS_STRATOSPHERE) + /* Get the thread local region. */ + auto * const tlr = svc::GetThreadLocalRegion(); + + /* Note to the kernel that we're performing cache maintenance, in case we get interrupted while touching cache lines. */ + tlr->cache_maintenance_flag = 1; + ON_SCOPE_EXIT { tlr->cache_maintenance_flag = 0; } + #endif + /* Invalidate the cache. */ const uintptr_t start_addr = reinterpret_cast(addr) & ~(cache_line_size - 1); const uintptr_t end_addr = reinterpret_cast(addr) + size;