From 21a704f0b6301da89b9f23bd766d71abb749c89f Mon Sep 17 00:00:00 2001 From: Michael Scire Date: Wed, 3 Apr 2019 20:34:16 -0700 Subject: [PATCH] Implement accelerated sha1 + hmac-sha1 --- nx/include/switch/crypto.h | 1 + nx/include/switch/crypto/hmac.h | 19 ++++ nx/include/switch/crypto/sha1.h | 29 ++++++ nx/source/crypto/hmac.c | 126 ++++++++++++++--------- nx/source/crypto/sha1.c | 174 ++++++++++++++++++++++++++++++++ 5 files changed, 301 insertions(+), 48 deletions(-) create mode 100644 nx/include/switch/crypto/sha1.h create mode 100644 nx/source/crypto/sha1.c diff --git a/nx/include/switch/crypto.h b/nx/include/switch/crypto.h index e6ba5eb9..b8fa7993 100644 --- a/nx/include/switch/crypto.h +++ b/nx/include/switch/crypto.h @@ -12,4 +12,5 @@ #include "crypto/aes_xts.h" #include "crypto/sha256.h" +#include "crypto/sha1.h" #include "crypto/hmac.h" \ No newline at end of file diff --git a/nx/include/switch/crypto/hmac.h b/nx/include/switch/crypto/hmac.h index 2f9caa3d..634a32b6 100644 --- a/nx/include/switch/crypto/hmac.h +++ b/nx/include/switch/crypto/hmac.h @@ -4,8 +4,17 @@ * @copyright libnx Authors */ #pragma once +#include "sha1.h" #include "sha256.h" +/// Context for HMAC-SHA1 operations. +typedef struct { + Sha1Context sha_ctx; + u32 key[SHA1_BLOCK_SIZE / sizeof(u32)]; + u32 mac[SHA1_HASH_SIZE / sizeof(u32)]; + bool finalized; +} HmacSha1Context; + /// Context for HMAC-SHA256 operations. typedef struct { Sha256Context sha_ctx; @@ -23,3 +32,13 @@ void hmacSha256ContextGetMac(HmacSha256Context *ctx, void *dst); /// Simple all-in-one HMAC-SHA256 calculator. void hmacSha256CalculateMac(void *dst, const void *key, size_t key_size, const void *src, size_t size); + +/// Initialize a HMAC-SHA1 context. +void hmacSha1ContextCreate(HmacSha1Context *out, const void *key, size_t key_size); +/// Updates HMAC-SHA1 context with data to hash +void hmacSha1ContextUpdate(HmacSha1Context *ctx, const void *src, size_t size); +/// Gets the context's output mac, finalizes the context. +void hmacSha1ContextGetMac(HmacSha1Context *ctx, void *dst); + +/// Simple all-in-one HMAC-SHA1 calculator. +void hmacSha1CalculateMac(void *dst, const void *key, size_t key_size, const void *src, size_t size); diff --git a/nx/include/switch/crypto/sha1.h b/nx/include/switch/crypto/sha1.h new file mode 100644 index 00000000..f9075729 --- /dev/null +++ b/nx/include/switch/crypto/sha1.h @@ -0,0 +1,29 @@ +/** + * @file sha1.h + * @brief Hardware accelerated SHA1 implementation. + * @copyright libnx Authors + */ +#pragma once +#include "../types.h" + +#define SHA1_HASH_SIZE 0x14 +#define SHA1_BLOCK_SIZE 0x40 + +/// Context for SHA1 operations. +typedef struct { + u32 intermediate_hash[SHA1_HASH_SIZE / sizeof(u32)]; + u8 buffer[SHA1_BLOCK_SIZE]; + u64 bits_consumed; + size_t num_buffered; + bool finalized; +} Sha1Context; + +/// Initialize a SHA1 context. +void sha1ContextCreate(Sha1Context *out); +/// Updates SHA1 context with data to hash +void sha1ContextUpdate(Sha1Context *ctx, const void *src, size_t size); +/// Gets the context's output hash, finalizes the context. +void sha1ContextGetHash(Sha1Context *ctx, void *dst); + +/// Simple all-in-one SHA1 calculator. +void sha1CalculateHash(void *dst, const void *src, size_t size); diff --git a/nx/source/crypto/hmac.c b/nx/source/crypto/hmac.c index 4fcd03fd..2cd7a2e2 100644 --- a/nx/source/crypto/hmac.c +++ b/nx/source/crypto/hmac.c @@ -7,62 +7,92 @@ #define HMAC_OPAD_VAL 0x5c5c5c5c #define HMAC_IPAD_XOR_OPAD_VAL (HMAC_IPAD_VAL ^ HMAC_OPAD_VAL) +/* Function bodies. */ + +#define HMAC_CONTEXT_CREATE(cipher) \ + /* Clear key. */ \ + memset(out->key, 0, sizeof(out->key)); \ +\ + /* Either hash the key into the context, or copy it directly if possible. */ \ + if (key_size <= sizeof(out->key)) { \ + memcpy(out->key, key, key_size); \ + } else { \ + cipher##ContextCreate(&out->sha_ctx); \ + cipher##ContextUpdate(&out->sha_ctx, key, key_size); \ + cipher##ContextGetHash(&out->sha_ctx, out->key); \ + } \ +\ + /* XOR key with IPAD. */ \ + for (size_t i = 0; i < sizeof(out->key) / sizeof(u32); i++) { \ + out->key[i] ^= HMAC_IPAD_VAL; \ + } \ +\ + /* Update hash context with key ^ ipad. */ \ + cipher##ContextCreate(&out->sha_ctx); \ + cipher##ContextUpdate(&out->sha_ctx, out->key, sizeof(out->key)) + +#define HMAC_CONTEXT_UPDATE(cipher) \ + /* Just update, since we want H((key ^ ipad) || data). */ \ + cipher##ContextUpdate(&ctx->sha_ctx, src, size) + +#define HMAC_CONTEXT_GET_MAC(cipher) \ + if (!ctx->finalized) { \ + /* Save H((key ^ ipad) || data) into mac. */ \ + cipher##ContextGetHash(&ctx->sha_ctx, ctx->mac); \ +\ + /* We want key ^ opad, so we xor (key ^ ipad) with (ipad ^ opad). */ \ + for (size_t i = 0; i < sizeof(ctx->key) / sizeof(u32); i++) { \ + ctx->key[i] ^= HMAC_IPAD_XOR_OPAD_VAL; \ + } \ +\ + /* Calculate H((key ^ opad) || H((key ^ ipad) || data)). */ \ + cipher##ContextCreate(&ctx->sha_ctx); \ + cipher##ContextUpdate(&ctx->sha_ctx, ctx->key, sizeof(ctx->key)); \ + cipher##ContextUpdate(&ctx->sha_ctx, ctx->mac, sizeof(ctx->mac)); \ + cipher##ContextGetHash(&ctx->sha_ctx, ctx->mac); \ +\ + /* We're done. */ \ + ctx->finalized = true; \ + } \ +\ + memcpy(dst, ctx->mac, sizeof(ctx->mac)) + +#define HMAC_CALCULATE_MAC(cipher) \ + /* Make a new context, calculate hash, store to output, clear memory. */ \ + Hmac##cipher##Context ctx; \ + hmac##cipher##ContextCreate(&ctx, key, key_size); \ + hmac##cipher##ContextUpdate(&ctx, src, size); \ + hmac##cipher##ContextGetMac(&ctx, dst); \ + memset(&ctx, 0, sizeof(ctx)) + void hmacSha256ContextCreate(HmacSha256Context *out, const void *key, size_t key_size) { - /* Clear key. */ - memset(out->key, 0, sizeof(out->key)); - - /* Either hash the key into the context, or copy it directly if possible. */ - if (key_size <= sizeof(out->key)) { - memcpy(out->key, key, key_size); - } else { - sha256ContextCreate(&out->sha_ctx); - sha256ContextUpdate(&out->sha_ctx, key, key_size); - sha256ContextGetHash(&out->sha_ctx, out->key); - } - - /* XOR key with IPAD. */ - for (size_t i = 0; i < sizeof(out->key) / sizeof(u32); i++) { - out->key[i] ^= HMAC_IPAD_VAL; - } - - /* Update hash context with key ^ ipad. */ - sha256ContextCreate(&out->sha_ctx); - sha256ContextUpdate(&out->sha_ctx, out->key, sizeof(out->key)); + HMAC_CONTEXT_CREATE(sha256); } void hmacSha256ContextUpdate(HmacSha256Context *ctx, const void *src, size_t size) { - /* Just update, since we want H((key ^ ipad) || data). */ - sha256ContextUpdate(&ctx->sha_ctx, src, size); + HMAC_CONTEXT_UPDATE(sha256); } void hmacSha256ContextGetMac(HmacSha256Context *ctx, void *dst) { - if (!ctx->finalized) { - /* Save H((key ^ ipad) || data) into mac. */ - sha256ContextGetHash(&ctx->sha_ctx, ctx->mac); - - /* We want key ^ opad, so we xor (key ^ ipad) with (ipad ^ opad). */ - for (size_t i = 0; i < sizeof(ctx->key) / sizeof(u32); i++) { - ctx->key[i] ^= HMAC_IPAD_XOR_OPAD_VAL; - } - - /* Calculate H((key ^ opad) || H((key ^ ipad) || data)). */ - sha256ContextCreate(&ctx->sha_ctx); - sha256ContextUpdate(&ctx->sha_ctx, ctx->key, sizeof(ctx->key)); - sha256ContextUpdate(&ctx->sha_ctx, ctx->mac, sizeof(ctx->mac)); - sha256ContextGetHash(&ctx->sha_ctx, ctx->mac); - - /* We're done. */ - ctx->finalized = true; - } - - memcpy(dst, ctx->mac, sizeof(ctx->mac)); + HMAC_CONTEXT_GET_MAC(sha256); } void hmacSha256CalculateMac(void *dst, const void *key, size_t key_size, const void *src, size_t size) { - /* Make a new context, calculate hash, store to output, clear memory. */ - HmacSha256Context ctx; - hmacSha256ContextCreate(&ctx, key, key_size); - hmacSha256ContextUpdate(&ctx, src, size); - hmacSha256ContextGetMac(&ctx, dst); - memset(&ctx, 0, sizeof(ctx)); + HMAC_CALCULATE_MAC(Sha256); +} + +void hmacSha1ContextCreate(HmacSha1Context *out, const void *key, size_t key_size) { + HMAC_CONTEXT_CREATE(sha1); +} + +void hmacSha1ContextUpdate(HmacSha1Context *ctx, const void *src, size_t size) { + HMAC_CONTEXT_UPDATE(sha1); +} + +void hmacSha1ContextGetMac(HmacSha1Context *ctx, void *dst) { + HMAC_CONTEXT_GET_MAC(sha1); +} + +void hmacSha1CalculateMac(void *dst, const void *key, size_t key_size, const void *src, size_t size) { + HMAC_CALCULATE_MAC(Sha1); } diff --git a/nx/source/crypto/sha1.c b/nx/source/crypto/sha1.c new file mode 100644 index 00000000..687d1b51 --- /dev/null +++ b/nx/source/crypto/sha1.c @@ -0,0 +1,174 @@ +#include +#include +#include + +#include "crypto/sha1.h" + +/* Define for doing four rounds of SHA1. */ +#define SHA1_DO_ROUND(insn, constant) \ +const u32 a = vgetq_lane_u32(cur_abcd, 0); \ +cur_abcd = v##insn##q_u32(cur_abcd, cur_e, vaddq_u32(w[i], constant)); \ +cur_e = vsha1h_u32(a) + +static const u32 s_roundConstants[4] = { + 0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6 +}; + +void sha1ContextCreate(Sha1Context *out) { + static const u32 H_0[SHA1_HASH_SIZE / sizeof(u32)] = { + 0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476, 0xc3d2e1f0 + }; + + memcpy(out->intermediate_hash, H_0, sizeof(out->intermediate_hash)); + memset(out->buffer, 0, sizeof(out->buffer)); + out->bits_consumed = 0; + out->num_buffered = 0; + out->finalized = false; +} + +static void _sha1ProcessBlocks(Sha1Context *ctx, const u8 *src_u8, size_t num_blocks) { + /* Setup round constants. */ + const uint32x4_t k0 = vdupq_n_u32(s_roundConstants[0]); + const uint32x4_t k1 = vdupq_n_u32(s_roundConstants[1]); + const uint32x4_t k2 = vdupq_n_u32(s_roundConstants[2]); + const uint32x4_t k3 = vdupq_n_u32(s_roundConstants[3]); + + /* Load hash variables with intermediate state. */ + uint32x4_t cur_abcd = vld1q_u32(ctx->intermediate_hash + 0); + u32 cur_e = ctx->intermediate_hash[4]; + + /* Actually do hash processing blocks. */ + while (num_blocks > 0) { + /* Save current state. */ + const uint32x4_t prev_abcd = cur_abcd; + const u32 prev_e = cur_e; + + uint32x4_t w[20]; + + /* Setup w[0-3] with message. */ + for (size_t i = 0; i < 4; i++) { + w[i] = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(src_u8))); + src_u8 += 0x10; + } + + /* Calculate w[4-19], w[i] = sha1su1(sha1su0(w[i-4], w[i-3], w[i-2]), w[i-1]); */ + for (size_t i = 4; i < 20; i++) { + w[i] = vsha1su1q_u32(vsha1su0q_u32(w[i-4], w[i-3], w[i-2]), w[i-1]); + } + + /* Do round calculations 0-20. Uses sha1c, k0. */ + size_t i = 0; + while (i < 5) { + SHA1_DO_ROUND(sha1c, k0); + i++; + } + + /* Do round calculations 20-40. Uses sha1p, k1. */ + while (i < 10) { + SHA1_DO_ROUND(sha1p, k1); + i++; + } + + /* Do round calculations 40-60. Uses sha1m, k2. */ + while (i < 15) { + SHA1_DO_ROUND(sha1m, k2); + i++; + } + + /* Do round calculations 60-80. Uses sha1p, k3. */ + while (i < 20) { + SHA1_DO_ROUND(sha1p, k3); + i++; + } + + /* Add to previous. */ + cur_abcd = vaddq_u32(cur_abcd, prev_abcd); + cur_e = cur_e + prev_e; + + num_blocks--; + } + + /* Save result to intermediate hash. */ + vst1q_u32(ctx->intermediate_hash, cur_abcd); + ctx->intermediate_hash[4] = cur_e; +} + +void sha1ContextUpdate(Sha1Context *ctx, const void *src, size_t size) { + /* Convert src to u8* for utility. */ + const u8 *cur_src = (const u8 *)src; + + /* Update bits consumed. */ + ctx->bits_consumed += (((ctx->num_buffered + size) / SHA1_BLOCK_SIZE) * SHA1_BLOCK_SIZE) * 8; + + /* Handle pre-buffered data. */ + if (ctx->num_buffered > 0) { + const size_t needed = SHA1_BLOCK_SIZE - ctx->num_buffered; + const size_t copyable = (size > needed ? needed : size); + memcpy(&ctx->buffer[ctx->num_buffered], cur_src, copyable); + cur_src += copyable; + ctx->num_buffered += copyable; + size -= copyable; + + if (ctx->num_buffered == SHA1_BLOCK_SIZE) { + _sha1ProcessBlocks(ctx, ctx->buffer, 1); + ctx->num_buffered = 0; + } + } + + /* Handle complete blocks. */ + if (size >= SHA1_BLOCK_SIZE) { + const size_t num_blocks = size / SHA1_BLOCK_SIZE; + _sha1ProcessBlocks(ctx, cur_src, num_blocks); + size -= SHA1_BLOCK_SIZE * num_blocks; + cur_src += SHA1_BLOCK_SIZE * num_blocks; + } + + /* Buffer remaining data. */ + if (size > 0) { + memcpy(ctx->buffer, cur_src, size); + ctx->num_buffered = size; + } +} + +void sha1ContextGetHash(Sha1Context *ctx, void *dst) { + if (!ctx->finalized) { + /* Process last block, if necessary. */ + { + ctx->bits_consumed += 8 * ctx->num_buffered; + ctx->buffer[ctx->num_buffered++] = 0x80; + + const size_t last_block_max_size = SHA1_BLOCK_SIZE - sizeof(u64); + /* If we've got space for the bits consumed field, just set to zero. */ + if (ctx->num_buffered <= last_block_max_size) { + memset(ctx->buffer + ctx->num_buffered, 0, last_block_max_size - ctx->num_buffered); + } else { + /* Pad with zeroes, and process. */ + memset(ctx->buffer + ctx->num_buffered, 0, SHA1_BLOCK_SIZE - ctx->num_buffered); + _sha1ProcessBlocks(ctx, ctx->buffer, 1); + + /* Clear the rest of the buffer with zeroes. */ + memset(ctx->buffer, 0, last_block_max_size); + } + + /* Copy in bits consumed field, then process last block. */ + u64 big_endian_bits_consumed = __builtin_bswap64(ctx->bits_consumed); + memcpy(ctx->buffer + last_block_max_size, &big_endian_bits_consumed, sizeof(big_endian_bits_consumed)); + _sha1ProcessBlocks(ctx, ctx->buffer, 1); + } + ctx->finalized = true; + } + + /* Copy endian-swapped intermediate hash out. */ + u32 *dst_u32 = (u32 *)dst; + for (size_t i = 0; i < sizeof(ctx->intermediate_hash) / sizeof(u32); i++) { + dst_u32[i] = __builtin_bswap32(ctx->intermediate_hash[i]); + } +} + +void sha1CalculateHash(void *dst, const void *src, size_t size) { + /* Make a new context, calculate hash, store to output. */ + Sha1Context ctx; + sha1ContextCreate(&ctx); + sha1ContextUpdate(&ctx, src, size); + sha1ContextGetHash(&ctx, dst); +}