From 5d51dfdd6cc0264b3ed5e85254091e1135908478 Mon Sep 17 00:00:00 2001 From: Michael Scire Date: Sun, 2 Aug 2020 14:30:06 -0700 Subject: [PATCH] kernel_ldr: use unoptimized memcpy before mmu bringup (closes #1102) Before the MMU is up, all reads/writes must be aligned; the optimized memcpy implementation does not guarantee all reads/writes it performs are aligned. This commit splits the libc impl to be separate for kernel/kernel_ldr, and so now only kernel will use the optimized impl. This is safe, as the MMU is brought up before kernel begins executing. --- .../source/libc/arch/arm64/asmdefs.h | 31 - .../libc/arch/arm64/memcmp.arch.arm64.s | 133 ---- .../libc/arch/arm64/memcpy.arch.arm64.s | 239 ------- .../libc/arch/arm64/memset.arch.arm64.s | 172 ----- .../source/libc/kern_libc_config.arch.arm64.h | 24 - libmesosphere/source/libc/kern_libc_config.h | 26 - libmesosphere/source/libc/kern_libc_generic.c | 673 ------------------ 7 files changed, 1298 deletions(-) delete mode 100644 libmesosphere/source/libc/arch/arm64/asmdefs.h delete mode 100644 libmesosphere/source/libc/arch/arm64/memcmp.arch.arm64.s delete mode 100644 libmesosphere/source/libc/arch/arm64/memcpy.arch.arm64.s delete mode 100644 libmesosphere/source/libc/arch/arm64/memset.arch.arm64.s delete mode 100644 libmesosphere/source/libc/kern_libc_config.arch.arm64.h delete mode 100644 libmesosphere/source/libc/kern_libc_config.h delete mode 100644 libmesosphere/source/libc/kern_libc_generic.c diff --git a/libmesosphere/source/libc/arch/arm64/asmdefs.h b/libmesosphere/source/libc/arch/arm64/asmdefs.h deleted file mode 100644 index edc4e66c..00000000 --- a/libmesosphere/source/libc/arch/arm64/asmdefs.h +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Macros for asm code. - * - * Copyright (c) 2019, Arm Limited. - * SPDX-License-Identifier: MIT - */ - -#ifndef _ASMDEFS_H -#define _ASMDEFS_H - -#define ENTRY_ALIGN(name, alignment) \ - .global name; \ - .type name,%function; \ - .align alignment; \ - name: \ - .cfi_startproc; - -#define ENTRY(name) ENTRY_ALIGN(name, 6) - -#define ENTRY_ALIAS(name) \ - .global name; \ - .type name,%function; \ - name: - -#define END(name) \ - .cfi_endproc; \ - .size name, .-name; - -#define L(l) .L ## l - -#endif diff --git a/libmesosphere/source/libc/arch/arm64/memcmp.arch.arm64.s b/libmesosphere/source/libc/arch/arm64/memcmp.arch.arm64.s deleted file mode 100644 index 609f7e20..00000000 --- a/libmesosphere/source/libc/arch/arm64/memcmp.arch.arm64.s +++ /dev/null @@ -1,133 +0,0 @@ -/* memcmp - compare memory - * - * Copyright (c) 2013, Arm Limited. - * SPDX-License-Identifier: MIT - */ - -/* Assumptions: - * - * ARMv8-a, AArch64, unaligned accesses. - */ - -#include "asmdefs.h" - -/* Parameters and result. */ -#define src1 x0 -#define src2 x1 -#define limit x2 -#define result w0 - -/* Internal variables. */ -#define data1 x3 -#define data1w w3 -#define data1h x4 -#define data2 x5 -#define data2w w5 -#define data2h x6 -#define tmp1 x7 -#define tmp2 x8 - -ENTRY (memcmp) - subs limit, limit, 8 - b.lo L(less8) - - ldr data1, [src1], 8 - ldr data2, [src2], 8 - cmp data1, data2 - b.ne L(return) - - subs limit, limit, 8 - b.gt L(more16) - - ldr data1, [src1, limit] - ldr data2, [src2, limit] - b L(return) - -L(more16): - ldr data1, [src1], 8 - ldr data2, [src2], 8 - cmp data1, data2 - bne L(return) - - /* Jump directly to comparing the last 16 bytes for 32 byte (or less) - strings. */ - subs limit, limit, 16 - b.ls L(last_bytes) - - /* We overlap loads between 0-32 bytes at either side of SRC1 when we - try to align, so limit it only to strings larger than 128 bytes. */ - cmp limit, 96 - b.ls L(loop16) - - /* Align src1 and adjust src2 with bytes not yet done. */ - and tmp1, src1, 15 - add limit, limit, tmp1 - sub src1, src1, tmp1 - sub src2, src2, tmp1 - - /* Loop performing 16 bytes per iteration using aligned src1. - Limit is pre-decremented by 16 and must be larger than zero. - Exit if <= 16 bytes left to do or if the data is not equal. */ - .p2align 4 -L(loop16): - ldp data1, data1h, [src1], 16 - ldp data2, data2h, [src2], 16 - subs limit, limit, 16 - ccmp data1, data2, 0, hi - ccmp data1h, data2h, 0, eq - b.eq L(loop16) - - cmp data1, data2 - bne L(return) - mov data1, data1h - mov data2, data2h - cmp data1, data2 - bne L(return) - - /* Compare last 1-16 bytes using unaligned access. */ -L(last_bytes): - add src1, src1, limit - add src2, src2, limit - ldp data1, data1h, [src1] - ldp data2, data2h, [src2] - cmp data1, data2 - bne L(return) - mov data1, data1h - mov data2, data2h - cmp data1, data2 - - /* Compare data bytes and set return value to 0, -1 or 1. */ -L(return): -#ifndef __AARCH64EB__ - rev data1, data1 - rev data2, data2 -#endif - cmp data1, data2 -L(ret_eq): - cset result, ne - cneg result, result, lo - ret - - .p2align 4 - /* Compare up to 8 bytes. Limit is [-8..-1]. */ -L(less8): - adds limit, limit, 4 - b.lo L(less4) - ldr data1w, [src1], 4 - ldr data2w, [src2], 4 - cmp data1w, data2w - b.ne L(return) - sub limit, limit, 4 -L(less4): - adds limit, limit, 4 - beq L(ret_eq) -L(byte_loop): - ldrb data1w, [src1], 1 - ldrb data2w, [src2], 1 - subs limit, limit, 1 - ccmp data1w, data2w, 0, ne /* NZCV = 0b0000. */ - b.eq L(byte_loop) - sub result, data1w, data2w - ret - -END (memcmp) diff --git a/libmesosphere/source/libc/arch/arm64/memcpy.arch.arm64.s b/libmesosphere/source/libc/arch/arm64/memcpy.arch.arm64.s deleted file mode 100644 index 02ed1dd8..00000000 --- a/libmesosphere/source/libc/arch/arm64/memcpy.arch.arm64.s +++ /dev/null @@ -1,239 +0,0 @@ -/* - * memcpy - copy memory area - * - * Copyright (c) 2012-2020, Arm Limited. - * SPDX-License-Identifier: MIT - */ - -/* Assumptions: - * - * ARMv8-a, AArch64, unaligned accesses. - * - */ - -#include "asmdefs.h" - -#define dstin x0 -#define src x1 -#define count x2 -#define dst x3 -#define srcend x4 -#define dstend x5 -#define A_l x6 -#define A_lw w6 -#define A_h x7 -#define B_l x8 -#define B_lw w8 -#define B_h x9 -#define C_l x10 -#define C_lw w10 -#define C_h x11 -#define D_l x12 -#define D_h x13 -#define E_l x14 -#define E_h x15 -#define F_l x16 -#define F_h x17 -#define G_l count -#define G_h dst -#define H_l src -#define H_h srcend -#define tmp1 x14 - -/* This implementation handles overlaps and supports both memcpy and memmove - from a single entry point. It uses unaligned accesses and branchless - sequences to keep the code small, simple and improve performance. - - Copies are split into 3 main cases: small copies of up to 32 bytes, medium - copies of up to 128 bytes, and large copies. The overhead of the overlap - check is negligible since it is only required for large copies. - - Large copies use a software pipelined loop processing 64 bytes per iteration. - The destination pointer is 16-byte aligned to minimize unaligned accesses. - The loop tail is handled by always copying 64 bytes from the end. -*/ - -ENTRY (memcpy) -ENTRY_ALIAS (memmove) - add srcend, src, count - add dstend, dstin, count - cmp count, 128 - b.hi L(copy_long) - cmp count, 32 - b.hi L(copy32_128) - - /* Small copies: 0..32 bytes. */ - cmp count, 16 - b.lo L(copy16) - ldp A_l, A_h, [src] - ldp D_l, D_h, [srcend, -16] - stp A_l, A_h, [dstin] - stp D_l, D_h, [dstend, -16] - ret - - /* Copy 8-15 bytes. */ -L(copy16): - tbz count, 3, L(copy8) - ldr A_l, [src] - ldr A_h, [srcend, -8] - str A_l, [dstin] - str A_h, [dstend, -8] - ret - - .p2align 3 - /* Copy 4-7 bytes. */ -L(copy8): - tbz count, 2, L(copy4) - ldr A_lw, [src] - ldr B_lw, [srcend, -4] - str A_lw, [dstin] - str B_lw, [dstend, -4] - ret - - /* Copy 0..3 bytes using a branchless sequence. */ -L(copy4): - cbz count, L(copy0) - lsr tmp1, count, 1 - ldrb A_lw, [src] - ldrb C_lw, [srcend, -1] - ldrb B_lw, [src, tmp1] - strb A_lw, [dstin] - strb B_lw, [dstin, tmp1] - strb C_lw, [dstend, -1] -L(copy0): - ret - - .p2align 4 - /* Medium copies: 33..128 bytes. */ -L(copy32_128): - ldp A_l, A_h, [src] - ldp B_l, B_h, [src, 16] - ldp C_l, C_h, [srcend, -32] - ldp D_l, D_h, [srcend, -16] - cmp count, 64 - b.hi L(copy128) - stp A_l, A_h, [dstin] - stp B_l, B_h, [dstin, 16] - stp C_l, C_h, [dstend, -32] - stp D_l, D_h, [dstend, -16] - ret - - .p2align 4 - /* Copy 65..128 bytes. */ -L(copy128): - ldp E_l, E_h, [src, 32] - ldp F_l, F_h, [src, 48] - cmp count, 96 - b.ls L(copy96) - ldp G_l, G_h, [srcend, -64] - ldp H_l, H_h, [srcend, -48] - stp G_l, G_h, [dstend, -64] - stp H_l, H_h, [dstend, -48] -L(copy96): - stp A_l, A_h, [dstin] - stp B_l, B_h, [dstin, 16] - stp E_l, E_h, [dstin, 32] - stp F_l, F_h, [dstin, 48] - stp C_l, C_h, [dstend, -32] - stp D_l, D_h, [dstend, -16] - ret - - .p2align 4 - /* Copy more than 128 bytes. */ -L(copy_long): - /* Use backwards copy if there is an overlap. */ - sub tmp1, dstin, src - cbz tmp1, L(copy0) - cmp tmp1, count - b.lo L(copy_long_backwards) - - /* Copy 16 bytes and then align dst to 16-byte alignment. */ - - ldp D_l, D_h, [src] - and tmp1, dstin, 15 - bic dst, dstin, 15 - sub src, src, tmp1 - add count, count, tmp1 /* Count is now 16 too large. */ - ldp A_l, A_h, [src, 16] - stp D_l, D_h, [dstin] - ldp B_l, B_h, [src, 32] - ldp C_l, C_h, [src, 48] - ldp D_l, D_h, [src, 64]! - subs count, count, 128 + 16 /* Test and readjust count. */ - b.ls L(copy64_from_end) - -L(loop64): - stp A_l, A_h, [dst, 16] - ldp A_l, A_h, [src, 16] - stp B_l, B_h, [dst, 32] - ldp B_l, B_h, [src, 32] - stp C_l, C_h, [dst, 48] - ldp C_l, C_h, [src, 48] - stp D_l, D_h, [dst, 64]! - ldp D_l, D_h, [src, 64]! - subs count, count, 64 - b.hi L(loop64) - - /* Write the last iteration and copy 64 bytes from the end. */ -L(copy64_from_end): - ldp E_l, E_h, [srcend, -64] - stp A_l, A_h, [dst, 16] - ldp A_l, A_h, [srcend, -48] - stp B_l, B_h, [dst, 32] - ldp B_l, B_h, [srcend, -32] - stp C_l, C_h, [dst, 48] - ldp C_l, C_h, [srcend, -16] - stp D_l, D_h, [dst, 64] - stp E_l, E_h, [dstend, -64] - stp A_l, A_h, [dstend, -48] - stp B_l, B_h, [dstend, -32] - stp C_l, C_h, [dstend, -16] - ret - - .p2align 4 - - /* Large backwards copy for overlapping copies. - Copy 16 bytes and then align dst to 16-byte alignment. */ -L(copy_long_backwards): - ldp D_l, D_h, [srcend, -16] - and tmp1, dstend, 15 - sub srcend, srcend, tmp1 - sub count, count, tmp1 - ldp A_l, A_h, [srcend, -16] - stp D_l, D_h, [dstend, -16] - ldp B_l, B_h, [srcend, -32] - ldp C_l, C_h, [srcend, -48] - ldp D_l, D_h, [srcend, -64]! - sub dstend, dstend, tmp1 - subs count, count, 128 - b.ls L(copy64_from_start) - -L(loop64_backwards): - stp A_l, A_h, [dstend, -16] - ldp A_l, A_h, [srcend, -16] - stp B_l, B_h, [dstend, -32] - ldp B_l, B_h, [srcend, -32] - stp C_l, C_h, [dstend, -48] - ldp C_l, C_h, [srcend, -48] - stp D_l, D_h, [dstend, -64]! - ldp D_l, D_h, [srcend, -64]! - subs count, count, 64 - b.hi L(loop64_backwards) - - /* Write the last iteration and copy 64 bytes from the start. */ -L(copy64_from_start): - ldp G_l, G_h, [src, 48] - stp A_l, A_h, [dstend, -16] - ldp A_l, A_h, [src, 32] - stp B_l, B_h, [dstend, -32] - ldp B_l, B_h, [src, 16] - stp C_l, C_h, [dstend, -48] - ldp C_l, C_h, [src] - stp D_l, D_h, [dstend, -64] - stp G_l, G_h, [dstin, 48] - stp A_l, A_h, [dstin, 32] - stp B_l, B_h, [dstin, 16] - stp C_l, C_h, [dstin] - ret - -END (memcpy) diff --git a/libmesosphere/source/libc/arch/arm64/memset.arch.arm64.s b/libmesosphere/source/libc/arch/arm64/memset.arch.arm64.s deleted file mode 100644 index 700f0e84..00000000 --- a/libmesosphere/source/libc/arch/arm64/memset.arch.arm64.s +++ /dev/null @@ -1,172 +0,0 @@ -/* - * memset - fill memory with a constant byte - * - * Copyright (c) 2012-2020, Arm Limited. - * SPDX-License-Identifier: MIT - */ - -/* Assumptions: - * - * ARMv8-a, AArch64, Advanced SIMD, unaligned accesses. - * - */ - -#include "asmdefs.h" - -#define DC_ZVA_THRESHOLD 512 - -#define dstin x0 -#define val x1 -#define valw w1 -#define count x2 -#define dst x3 -#define dstend x4 -#define zva_val x5 - -ENTRY (memset) - - bfi valw, valw, 8, 8 - bfi valw, valw, 16, 16 - bfi val, val, 32, 32 - - add dstend, dstin, count - - cmp count, 96 - b.hi L(set_long) - cmp count, 16 - b.hs L(set_medium) - - /* Set 0..15 bytes. */ - tbz count, 3, 1f - str val, [dstin] - str val, [dstend, -8] - ret -1: tbz count, 2, 2f - str valw, [dstin] - str valw, [dstend, -4] - ret -2: cbz count, 3f - strb valw, [dstin] - tbz count, 1, 3f - strh valw, [dstend, -2] -3: ret - - /* Set 16..96 bytes. */ - .p2align 4 -L(set_medium): - stp val, val, [dstin] - tbnz count, 6, L(set96) - stp val, val, [dstend, -16] - tbz count, 5, 1f - stp val, val, [dstin, 16] - stp val, val, [dstend, -32] -1: ret - - .p2align 4 - /* Set 64..96 bytes. Write 64 bytes from the start and - 32 bytes from the end. */ -L(set96): - stp val, val, [dstin, 16] - stp val, val, [dstin, 32] - stp val, val, [dstin, 48] - stp val, val, [dstend, -32] - stp val, val, [dstend, -16] - ret - - .p2align 4 -L(set_long): - stp val, val, [dstin] -#if DC_ZVA_THRESHOLD - cmp count, DC_ZVA_THRESHOLD - ccmp val, 0, 0, cs - bic dst, dstin, 15 - b.eq L(zva_64) -#else - bic dst, dstin, 15 -#endif - /* Small-size or non-zero memset does not use DC ZVA. */ - sub count, dstend, dst - - /* - * Adjust count and bias for loop. By substracting extra 1 from count, - * it is easy to use tbz instruction to check whether loop tailing - * count is less than 33 bytes, so as to bypass 2 unneccesary stps. - */ - sub count, count, 64+16+1 - -#if DC_ZVA_THRESHOLD - /* Align loop on 16-byte boundary, this might be friendly to i-cache. */ - nop -#endif - -1: stp val, val, [dst, 16] - stp val, val, [dst, 32] - stp val, val, [dst, 48] - stp val, val, [dst, 64]! - subs count, count, 64 - b.hs 1b - - tbz count, 5, 1f /* Remaining count is less than 33 bytes? */ - stp val, val, [dst, 16] - stp val, val, [dst, 32] -1: stp val, val, [dstend, -32] - stp val, val, [dstend, -16] - ret - -#if DC_ZVA_THRESHOLD - .p2align 4 -L(zva_64): - stp val, val, [dst, 16] - stp val, val, [dst, 32] - stp val, val, [dst, 48] - bic dst, dst, 63 - - /* - * Previous memory writes might cross cache line boundary, and cause - * cache line partially dirty. Zeroing this kind of cache line using - * DC ZVA will incur extra cost, for it requires loading untouched - * part of the line from memory before zeoring. - * - * So, write the first 64 byte aligned block using stp to force - * fully dirty cache line. - */ - stp val, val, [dst, 64] - stp val, val, [dst, 80] - stp val, val, [dst, 96] - stp val, val, [dst, 112] - - sub count, dstend, dst - /* - * Adjust count and bias for loop. By substracting extra 1 from count, - * it is easy to use tbz instruction to check whether loop tailing - * count is less than 33 bytes, so as to bypass 2 unneccesary stps. - */ - sub count, count, 128+64+64+1 - add dst, dst, 128 - nop - - /* DC ZVA sets 64 bytes each time. */ -1: dc zva, dst - add dst, dst, 64 - subs count, count, 64 - b.hs 1b - - /* - * Write the last 64 byte aligned block using stp to force fully - * dirty cache line. - */ - stp val, val, [dst, 0] - stp val, val, [dst, 16] - stp val, val, [dst, 32] - stp val, val, [dst, 48] - - tbz count, 5, 1f /* Remaining count is less than 33 bytes? */ - stp val, val, [dst, 64] - stp val, val, [dst, 80] -1: stp val, val, [dstend, -32] - stp val, val, [dstend, -16] - ret -#endif - - -END (memset) diff --git a/libmesosphere/source/libc/kern_libc_config.arch.arm64.h b/libmesosphere/source/libc/kern_libc_config.arch.arm64.h deleted file mode 100644 index 5d01bc77..00000000 --- a/libmesosphere/source/libc/kern_libc_config.arch.arm64.h +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Copyright (c) 2018-2020 Atmosphère-NX - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -#pragma once - -/* Definitions for libc genericity. */ -#define MESOSPHERE_LIBC_MEMCPY_GENERIC 0 -#define MESOSPHERE_LIBC_MEMCMP_GENERIC 0 -#define MESOSPHERE_LIBC_MEMMOVE_GENERIC 0 -#define MESOSPHERE_LIBC_MEMSET_GENERIC 0 -#define MESOSPHERE_LIBC_STRNCPY_GENERIC 1 -#define MESOSPHERE_LIBC_STRNCMP_GENERIC 1 diff --git a/libmesosphere/source/libc/kern_libc_config.h b/libmesosphere/source/libc/kern_libc_config.h deleted file mode 100644 index f79fb750..00000000 --- a/libmesosphere/source/libc/kern_libc_config.h +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Copyright (c) 2018-2020 Atmosphère-NX - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -#pragma once - -#if defined(ATMOSPHERE_ARCH_ARM64) - - #include "kern_libc_config.arch.arm64.h" - -#else - - #error "Unknown architecture for libc" - -#endif diff --git a/libmesosphere/source/libc/kern_libc_generic.c b/libmesosphere/source/libc/kern_libc_generic.c deleted file mode 100644 index d41caa55..00000000 --- a/libmesosphere/source/libc/kern_libc_generic.c +++ /dev/null @@ -1,673 +0,0 @@ -/* - * Copyright (c) 2018-2020 Atmosphère-NX - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -#include -#include -#include -#include "kern_libc_config.h" - -/* Note: copied from newlib */ -#ifdef __cplusplus -extern "C" { -#endif - -/* -FUNCTION - <>---move possibly overlapping memory -INDEX - memmove -SYNOPSIS - #include - void *memmove(void *<[dst]>, const void *<[src]>, size_t <[length]>); -DESCRIPTION - This function moves <[length]> characters from the block of - memory starting at <<*<[src]>>> to the memory starting at - <<*<[dst]>>>. <> reproduces the characters correctly - at <<*<[dst]>>> even if the two areas overlap. -RETURNS - The function returns <[dst]> as passed. -PORTABILITY -<> is ANSI C. -<> requires no supporting OS subroutines. -QUICKREF - memmove ansi pure -*/ - -/* Nonzero if either X or Y is not aligned on a "long" boundary. */ -#define UNALIGNED(X, Y) \ - (((long)X & (sizeof (long) - 1)) | ((long)Y & (sizeof (long) - 1))) - -/* How many bytes are copied each iteration of the 4X unrolled loop. */ -#define BIGBLOCKSIZE (sizeof (long) << 2) - -/* How many bytes are copied each iteration of the word copy loop. */ -#define LITTLEBLOCKSIZE (sizeof (long)) - -/* Threshhold for punting to the byte copier. */ -#undef TOO_SMALL -#define TOO_SMALL(LEN) ((LEN) < BIGBLOCKSIZE) - -#if MESOSPHERE_LIBC_MEMMOVE_GENERIC - -/*SUPPRESS 20*/ -void * -//__inhibit_loop_to_libcall -__attribute__((weak)) -memmove (void *dst_void, - const void *src_void, - size_t length) -{ -#if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__) - char *dst = dst_void; - const char *src = src_void; - - if (src < dst && dst < src + length) - { - /* Have to copy backwards */ - src += length; - dst += length; - while (length--) - { - *--dst = *--src; - } - } - else - { - while (length--) - { - *dst++ = *src++; - } - } - - return dst_void; -#else - char *dst = dst_void; - const char *src = src_void; - long *aligned_dst; - const long *aligned_src; - - if (src < dst && dst < src + length) - { - /* Destructive overlap...have to copy backwards */ - src += length; - dst += length; - while (length--) - { - *--dst = *--src; - } - } - else - { - /* Use optimizing algorithm for a non-destructive copy to closely - match memcpy. If the size is small or either SRC or DST is unaligned, - then punt into the byte copy loop. This should be rare. */ - if (!TOO_SMALL(length) && !UNALIGNED (src, dst)) - { - aligned_dst = (long*)dst; - aligned_src = (long*)src; - - /* Copy 4X long words at a time if possible. */ - while (length >= BIGBLOCKSIZE) - { - *aligned_dst++ = *aligned_src++; - *aligned_dst++ = *aligned_src++; - *aligned_dst++ = *aligned_src++; - *aligned_dst++ = *aligned_src++; - length -= BIGBLOCKSIZE; - } - - /* Copy one long word at a time if possible. */ - while (length >= LITTLEBLOCKSIZE) - { - *aligned_dst++ = *aligned_src++; - length -= LITTLEBLOCKSIZE; - } - - /* Pick up any residual with a byte copier. */ - dst = (char*)aligned_dst; - src = (char*)aligned_src; - } - - while (length--) - { - *dst++ = *src++; - } - } - - return dst_void; -#endif /* not PREFER_SIZE_OVER_SPEED */ -} - -#endif /* MESOSPHERE_LIBC_MEMMOVE_GENERIC */ - -/* -FUNCTION - <>---copy memory regions -SYNOPSIS - #include - void* memcpy(void *restrict <[out]>, const void *restrict <[in]>, - size_t <[n]>); -DESCRIPTION - This function copies <[n]> bytes from the memory region - pointed to by <[in]> to the memory region pointed to by - <[out]>. - If the regions overlap, the behavior is undefined. -RETURNS - <> returns a pointer to the first byte of the <[out]> - region. -PORTABILITY -<> is ANSI C. -<> requires no supporting OS subroutines. -QUICKREF - memcpy ansi pure - */ - -#if MESOSPHERE_LIBC_MEMCPY_GENERIC - -void * -__attribute__((weak)) -memcpy (void * dst0, - const void * __restrict src0, - size_t len0) -{ -#if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__) - char *dst = (char *) dst0; - char *src = (char *) src0; - - void *save = dst0; - - while (len0--) - { - *dst++ = *src++; - } - - return save; -#else - char *dst = dst0; - const char *src = src0; - long *aligned_dst; - const long *aligned_src; - - /* If the size is small, or either SRC or DST is unaligned, - then punt into the byte copy loop. This should be rare. */ - if (!TOO_SMALL(len0) && !UNALIGNED (src, dst)) - { - aligned_dst = (long*)dst; - aligned_src = (long*)src; - - /* Copy 4X long words at a time if possible. */ - while (len0 >= BIGBLOCKSIZE) - { - *aligned_dst++ = *aligned_src++; - *aligned_dst++ = *aligned_src++; - *aligned_dst++ = *aligned_src++; - *aligned_dst++ = *aligned_src++; - len0 -= BIGBLOCKSIZE; - } - - /* Copy one long word at a time if possible. */ - while (len0 >= LITTLEBLOCKSIZE) - { - *aligned_dst++ = *aligned_src++; - len0 -= LITTLEBLOCKSIZE; - } - - /* Pick up any residual with a byte copier. */ - dst = (char*)aligned_dst; - src = (char*)aligned_src; - } - - while (len0--) - *dst++ = *src++; - - return dst0; -#endif /* not PREFER_SIZE_OVER_SPEED */ -} - -#endif /* MESOSPHERE_LIBC_MEMCPY_GENERIC */ - -/* -FUNCTION - <>---set an area of memory -INDEX - memset -SYNOPSIS - #include - void *memset(void *<[dst]>, int <[c]>, size_t <[length]>); -DESCRIPTION - This function converts the argument <[c]> into an unsigned - char and fills the first <[length]> characters of the array - pointed to by <[dst]> to the value. -RETURNS - <> returns the value of <[dst]>. -PORTABILITY -<> is ANSI C. - <> requires no supporting OS subroutines. -QUICKREF - memset ansi pure -*/ - -#include - -#undef LBLOCKSIZE -#undef UNALIGNED -#undef TOO_SMALL - -#define LBLOCKSIZE (sizeof(long)) -#define UNALIGNED(X) ((long)X & (LBLOCKSIZE - 1)) -#define TOO_SMALL(LEN) ((LEN) < LBLOCKSIZE) - -#if MESOSPHERE_LIBC_MEMSET_GENERIC - -void * -__attribute__((weak)) -memset (void *m, - int c, - size_t n) -{ - char *s = (char *) m; - -#if !defined(PREFER_SIZE_OVER_SPEED) && !defined(__OPTIMIZE_SIZE__) - unsigned int i; - unsigned long buffer; - unsigned long *aligned_addr; - unsigned int d = c & 0xff; /* To avoid sign extension, copy C to an - unsigned variable. */ - - while (UNALIGNED (s)) - { - if (n--) - *s++ = (char) c; - else - return m; - } - - if (!TOO_SMALL (n)) - { - /* If we get this far, we know that n is large and s is word-aligned. */ - aligned_addr = (unsigned long *) s; - - /* Store D into each char sized location in BUFFER so that - we can set large blocks quickly. */ - buffer = (d << 8) | d; - buffer |= (buffer << 16); - for (i = 32; i < LBLOCKSIZE * 8; i <<= 1) - buffer = (buffer << i) | buffer; - - /* Unroll the loop. */ - while (n >= LBLOCKSIZE*4) - { - *aligned_addr++ = buffer; - *aligned_addr++ = buffer; - *aligned_addr++ = buffer; - *aligned_addr++ = buffer; - n -= 4*LBLOCKSIZE; - } - - while (n >= LBLOCKSIZE) - { - *aligned_addr++ = buffer; - n -= LBLOCKSIZE; - } - /* Pick up the remainder with a bytewise loop. */ - s = (char*)aligned_addr; - } - -#endif /* not PREFER_SIZE_OVER_SPEED */ - - while (n--) - *s++ = (char) c; - - return m; -} - -#endif /* MESOSPHERE_LIBC_MEMSET_GENERIC */ - -/* -FUNCTION - <>---compare two memory areas -INDEX - memcmp -SYNOPSIS - #include - int memcmp(const void *<[s1]>, const void *<[s2]>, size_t <[n]>); -DESCRIPTION - This function compares not more than <[n]> characters of the - object pointed to by <[s1]> with the object pointed to by <[s2]>. -RETURNS - The function returns an integer greater than, equal to or - less than zero according to whether the object pointed to by - <[s1]> is greater than, equal to or less than the object - pointed to by <[s2]>. -PORTABILITY -<> is ANSI C. -<> requires no supporting OS subroutines. -QUICKREF - memcmp ansi pure -*/ - -#undef LBLOCKSIZE -#undef UNALIGNED -#undef TOO_SMALL - -/* Nonzero if either X or Y is not aligned on a "long" boundary. */ -#define UNALIGNED(X, Y) \ - (((long)X & (sizeof (long) - 1)) | ((long)Y & (sizeof (long) - 1))) - -/* How many bytes are copied each iteration of the word copy loop. */ -#define LBLOCKSIZE (sizeof (long)) - -/* Threshhold for punting to the byte copier. */ -#define TOO_SMALL(LEN) ((LEN) < LBLOCKSIZE) - -#if MESOSPHERE_LIBC_MEMCMP_GENERIC - -int -__attribute__((weak)) -memcmp (const void *m1, - const void *m2, - size_t n) -{ -#if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__) - unsigned char *s1 = (unsigned char *) m1; - unsigned char *s2 = (unsigned char *) m2; - - while (n--) - { - if (*s1 != *s2) - { - return *s1 - *s2; - } - s1++; - s2++; - } - return 0; -#else - unsigned char *s1 = (unsigned char *) m1; - unsigned char *s2 = (unsigned char *) m2; - unsigned long *a1; - unsigned long *a2; - - /* If the size is too small, or either pointer is unaligned, - then we punt to the byte compare loop. Hopefully this will - not turn up in inner loops. */ - if (!TOO_SMALL(n) && !UNALIGNED(s1,s2)) - { - /* Otherwise, load and compare the blocks of memory one - word at a time. */ - a1 = (unsigned long*) s1; - a2 = (unsigned long*) s2; - while (n >= LBLOCKSIZE) - { - if (*a1 != *a2) - break; - a1++; - a2++; - n -= LBLOCKSIZE; - } - - /* check m mod LBLOCKSIZE remaining characters */ - - s1 = (unsigned char*)a1; - s2 = (unsigned char*)a2; - } - - while (n--) - { - if (*s1 != *s2) - return *s1 - *s2; - s1++; - s2++; - } - - return 0; -#endif /* not PREFER_SIZE_OVER_SPEED */ -} - -#endif /* MESOSPHERE_LIBC_MEMCMP_GENERIC */ - -/* -FUNCTION - <>---counted copy string -INDEX - strncpy -SYNOPSIS - #include - char *strncpy(char *restrict <[dst]>, const char *restrict <[src]>, - size_t <[length]>); -DESCRIPTION - <> copies not more than <[length]> characters from the - the string pointed to by <[src]> (including the terminating - null character) to the array pointed to by <[dst]>. If the - string pointed to by <[src]> is shorter than <[length]> - characters, null characters are appended to the destination - array until a total of <[length]> characters have been - written. -RETURNS - This function returns the initial value of <[dst]>. -PORTABILITY -<> is ANSI C. -<> requires no supporting OS subroutines. -QUICKREF - strncpy ansi pure -*/ - -#include -#include - -/*SUPPRESS 560*/ -/*SUPPRESS 530*/ - -/* Nonzero if either X or Y is not aligned on a "long" boundary. */ -#define UNALIGNED(X, Y) \ - (((long)X & (sizeof (long) - 1)) | ((long)Y & (sizeof (long) - 1))) - -#if LONG_MAX == 2147483647L -#define DETECTNULL(X) (((X) - 0x01010101) & ~(X) & 0x80808080) -#else -#if LONG_MAX == 9223372036854775807L -/* Nonzero if X (a long int) contains a NULL byte. */ -#define DETECTNULL(X) (((X) - 0x0101010101010101) & ~(X) & 0x8080808080808080) -#else -#error long int is not a 32bit or 64bit type. -#endif -#endif - -#ifndef DETECTNULL -#error long int is not a 32bit or 64bit byte -#endif - -#undef TOO_SMALL -#define TOO_SMALL(LEN) ((LEN) < sizeof (long)) - -#if MESOSPHERE_LIBC_STRNCMP_GENERIC - -char * -strncpy (char *__restrict dst0, - const char *__restrict src0, - size_t count) -{ -#if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__) - char *dscan; - const char *sscan; - - dscan = dst0; - sscan = src0; - while (count > 0) - { - --count; - if ((*dscan++ = *sscan++) == '\0') - break; - } - while (count-- > 0) - *dscan++ = '\0'; - - return dst0; -#else - char *dst = dst0; - const char *src = src0; - long *aligned_dst; - const long *aligned_src; - - /* If SRC and DEST is aligned and count large enough, then copy words. */ - if (!UNALIGNED (src, dst) && !TOO_SMALL (count)) - { - aligned_dst = (long*)dst; - aligned_src = (long*)src; - - /* SRC and DEST are both "long int" aligned, try to do "long int" - sized copies. */ - while (count >= sizeof (long int) && !DETECTNULL(*aligned_src)) - { - count -= sizeof (long int); - *aligned_dst++ = *aligned_src++; - } - - dst = (char*)aligned_dst; - src = (char*)aligned_src; - } - - while (count > 0) - { - --count; - if ((*dst++ = *src++) == '\0') - break; - } - - while (count-- > 0) - *dst++ = '\0'; - - return dst0; -#endif /* not PREFER_SIZE_OVER_SPEED */ -} - -#endif /* MESOSPHERE_LIBC_STRNCPY_GENERIC */ - -/* -FUNCTION - <>---character string compare - -INDEX - strncmp -SYNOPSIS - #include - int strncmp(const char *<[a]>, const char * <[b]>, size_t <[length]>); -DESCRIPTION - <> compares up to <[length]> characters - from the string at <[a]> to the string at <[b]>. -RETURNS - If <<*<[a]>>> sorts lexicographically after <<*<[b]>>>, - <> returns a number greater than zero. If the two - strings are equivalent, <> returns zero. If <<*<[a]>>> - sorts lexicographically before <<*<[b]>>>, <> returns a - number less than zero. -PORTABILITY -<> is ANSI C. -<> requires no supporting OS subroutines. -QUICKREF - strncmp ansi pure -*/ - -#include -#include - -/* Nonzero if either X or Y is not aligned on a "long" boundary. */ -#define UNALIGNED(X, Y) \ - (((long)X & (sizeof (long) - 1)) | ((long)Y & (sizeof (long) - 1))) - -/* DETECTNULL returns nonzero if (long)X contains a NULL byte. */ -#if LONG_MAX == 2147483647L -#define DETECTNULL(X) (((X) - 0x01010101) & ~(X) & 0x80808080) -#else -#if LONG_MAX == 9223372036854775807L -#define DETECTNULL(X) (((X) - 0x0101010101010101) & ~(X) & 0x8080808080808080) -#else -#error long int is not a 32bit or 64bit type. -#endif -#endif - -#ifndef DETECTNULL -#error long int is not a 32bit or 64bit byte -#endif - -#if MESOSPHERE_LIBC_STRNCMP_GENERIC - -int -strncmp (const char *s1, - const char *s2, - size_t n) -{ -#if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__) - if (n == 0) - return 0; - - while (n-- != 0 && *s1 == *s2) - { - if (n == 0 || *s1 == '\0') - break; - s1++; - s2++; - } - - return (*(unsigned char *) s1) - (*(unsigned char *) s2); -#else - unsigned long *a1; - unsigned long *a2; - - if (n == 0) - return 0; - - /* If s1 or s2 are unaligned, then compare bytes. */ - if (!UNALIGNED (s1, s2)) - { - /* If s1 and s2 are word-aligned, compare them a word at a time. */ - a1 = (unsigned long*)s1; - a2 = (unsigned long*)s2; - while (n >= sizeof (long) && *a1 == *a2) - { - n -= sizeof (long); - - /* If we've run out of bytes or hit a null, return zero - since we already know *a1 == *a2. */ - if (n == 0 || DETECTNULL (*a1)) - return 0; - - a1++; - a2++; - } - - /* A difference was detected in last few bytes of s1, so search bytewise */ - s1 = (char*)a1; - s2 = (char*)a2; - } - - while (n-- > 0 && *s1 == *s2) - { - /* If we've run out of bytes or hit a null, return zero - since we already know *s1 == *s2. */ - if (n == 0 || *s1 == '\0') - return 0; - s1++; - s2++; - } - return (*(unsigned char *) s1) - (*(unsigned char *) s2); -#endif /* not PREFER_SIZE_OVER_SPEED */ -} - -#endif /* MESOSPHERE_LIBC_STRNCMP_GENERIC */ - -#ifdef __cplusplus -} /* extern "C" */ -#endif