Implement accelerated sha1 + hmac-sha1

This commit is contained in:
Michael Scire 2019-04-03 20:34:16 -07:00 committed by fincs
parent 0dde100135
commit 21a704f0b6
5 changed files with 301 additions and 48 deletions

View File

@ -12,4 +12,5 @@
#include "crypto/aes_xts.h"
#include "crypto/sha256.h"
#include "crypto/sha1.h"
#include "crypto/hmac.h"

View File

@ -4,8 +4,17 @@
* @copyright libnx Authors
*/
#pragma once
#include "sha1.h"
#include "sha256.h"
/// Context for HMAC-SHA1 operations.
typedef struct {
Sha1Context sha_ctx;
u32 key[SHA1_BLOCK_SIZE / sizeof(u32)];
u32 mac[SHA1_HASH_SIZE / sizeof(u32)];
bool finalized;
} HmacSha1Context;
/// Context for HMAC-SHA256 operations.
typedef struct {
Sha256Context sha_ctx;
@ -23,3 +32,13 @@ void hmacSha256ContextGetMac(HmacSha256Context *ctx, void *dst);
/// Simple all-in-one HMAC-SHA256 calculator.
void hmacSha256CalculateMac(void *dst, const void *key, size_t key_size, const void *src, size_t size);
/// Initialize a HMAC-SHA1 context.
void hmacSha1ContextCreate(HmacSha1Context *out, const void *key, size_t key_size);
/// Updates HMAC-SHA1 context with data to hash
void hmacSha1ContextUpdate(HmacSha1Context *ctx, const void *src, size_t size);
/// Gets the context's output mac, finalizes the context.
void hmacSha1ContextGetMac(HmacSha1Context *ctx, void *dst);
/// Simple all-in-one HMAC-SHA1 calculator.
void hmacSha1CalculateMac(void *dst, const void *key, size_t key_size, const void *src, size_t size);

View File

@ -0,0 +1,29 @@
/**
* @file sha1.h
* @brief Hardware accelerated SHA1 implementation.
* @copyright libnx Authors
*/
#pragma once
#include "../types.h"
#define SHA1_HASH_SIZE 0x14
#define SHA1_BLOCK_SIZE 0x40
/// Context for SHA1 operations.
typedef struct {
u32 intermediate_hash[SHA1_HASH_SIZE / sizeof(u32)];
u8 buffer[SHA1_BLOCK_SIZE];
u64 bits_consumed;
size_t num_buffered;
bool finalized;
} Sha1Context;
/// Initialize a SHA1 context.
void sha1ContextCreate(Sha1Context *out);
/// Updates SHA1 context with data to hash
void sha1ContextUpdate(Sha1Context *ctx, const void *src, size_t size);
/// Gets the context's output hash, finalizes the context.
void sha1ContextGetHash(Sha1Context *ctx, void *dst);
/// Simple all-in-one SHA1 calculator.
void sha1CalculateHash(void *dst, const void *src, size_t size);

View File

@ -7,62 +7,92 @@
#define HMAC_OPAD_VAL 0x5c5c5c5c
#define HMAC_IPAD_XOR_OPAD_VAL (HMAC_IPAD_VAL ^ HMAC_OPAD_VAL)
/* Function bodies. */
#define HMAC_CONTEXT_CREATE(cipher) \
/* Clear key. */ \
memset(out->key, 0, sizeof(out->key)); \
\
/* Either hash the key into the context, or copy it directly if possible. */ \
if (key_size <= sizeof(out->key)) { \
memcpy(out->key, key, key_size); \
} else { \
cipher##ContextCreate(&out->sha_ctx); \
cipher##ContextUpdate(&out->sha_ctx, key, key_size); \
cipher##ContextGetHash(&out->sha_ctx, out->key); \
} \
\
/* XOR key with IPAD. */ \
for (size_t i = 0; i < sizeof(out->key) / sizeof(u32); i++) { \
out->key[i] ^= HMAC_IPAD_VAL; \
} \
\
/* Update hash context with key ^ ipad. */ \
cipher##ContextCreate(&out->sha_ctx); \
cipher##ContextUpdate(&out->sha_ctx, out->key, sizeof(out->key))
#define HMAC_CONTEXT_UPDATE(cipher) \
/* Just update, since we want H((key ^ ipad) || data). */ \
cipher##ContextUpdate(&ctx->sha_ctx, src, size)
#define HMAC_CONTEXT_GET_MAC(cipher) \
if (!ctx->finalized) { \
/* Save H((key ^ ipad) || data) into mac. */ \
cipher##ContextGetHash(&ctx->sha_ctx, ctx->mac); \
\
/* We want key ^ opad, so we xor (key ^ ipad) with (ipad ^ opad). */ \
for (size_t i = 0; i < sizeof(ctx->key) / sizeof(u32); i++) { \
ctx->key[i] ^= HMAC_IPAD_XOR_OPAD_VAL; \
} \
\
/* Calculate H((key ^ opad) || H((key ^ ipad) || data)). */ \
cipher##ContextCreate(&ctx->sha_ctx); \
cipher##ContextUpdate(&ctx->sha_ctx, ctx->key, sizeof(ctx->key)); \
cipher##ContextUpdate(&ctx->sha_ctx, ctx->mac, sizeof(ctx->mac)); \
cipher##ContextGetHash(&ctx->sha_ctx, ctx->mac); \
\
/* We're done. */ \
ctx->finalized = true; \
} \
\
memcpy(dst, ctx->mac, sizeof(ctx->mac))
#define HMAC_CALCULATE_MAC(cipher) \
/* Make a new context, calculate hash, store to output, clear memory. */ \
Hmac##cipher##Context ctx; \
hmac##cipher##ContextCreate(&ctx, key, key_size); \
hmac##cipher##ContextUpdate(&ctx, src, size); \
hmac##cipher##ContextGetMac(&ctx, dst); \
memset(&ctx, 0, sizeof(ctx))
void hmacSha256ContextCreate(HmacSha256Context *out, const void *key, size_t key_size) {
/* Clear key. */
memset(out->key, 0, sizeof(out->key));
/* Either hash the key into the context, or copy it directly if possible. */
if (key_size <= sizeof(out->key)) {
memcpy(out->key, key, key_size);
} else {
sha256ContextCreate(&out->sha_ctx);
sha256ContextUpdate(&out->sha_ctx, key, key_size);
sha256ContextGetHash(&out->sha_ctx, out->key);
}
/* XOR key with IPAD. */
for (size_t i = 0; i < sizeof(out->key) / sizeof(u32); i++) {
out->key[i] ^= HMAC_IPAD_VAL;
}
/* Update hash context with key ^ ipad. */
sha256ContextCreate(&out->sha_ctx);
sha256ContextUpdate(&out->sha_ctx, out->key, sizeof(out->key));
HMAC_CONTEXT_CREATE(sha256);
}
void hmacSha256ContextUpdate(HmacSha256Context *ctx, const void *src, size_t size) {
/* Just update, since we want H((key ^ ipad) || data). */
sha256ContextUpdate(&ctx->sha_ctx, src, size);
HMAC_CONTEXT_UPDATE(sha256);
}
void hmacSha256ContextGetMac(HmacSha256Context *ctx, void *dst) {
if (!ctx->finalized) {
/* Save H((key ^ ipad) || data) into mac. */
sha256ContextGetHash(&ctx->sha_ctx, ctx->mac);
/* We want key ^ opad, so we xor (key ^ ipad) with (ipad ^ opad). */
for (size_t i = 0; i < sizeof(ctx->key) / sizeof(u32); i++) {
ctx->key[i] ^= HMAC_IPAD_XOR_OPAD_VAL;
}
/* Calculate H((key ^ opad) || H((key ^ ipad) || data)). */
sha256ContextCreate(&ctx->sha_ctx);
sha256ContextUpdate(&ctx->sha_ctx, ctx->key, sizeof(ctx->key));
sha256ContextUpdate(&ctx->sha_ctx, ctx->mac, sizeof(ctx->mac));
sha256ContextGetHash(&ctx->sha_ctx, ctx->mac);
/* We're done. */
ctx->finalized = true;
}
memcpy(dst, ctx->mac, sizeof(ctx->mac));
HMAC_CONTEXT_GET_MAC(sha256);
}
void hmacSha256CalculateMac(void *dst, const void *key, size_t key_size, const void *src, size_t size) {
/* Make a new context, calculate hash, store to output, clear memory. */
HmacSha256Context ctx;
hmacSha256ContextCreate(&ctx, key, key_size);
hmacSha256ContextUpdate(&ctx, src, size);
hmacSha256ContextGetMac(&ctx, dst);
memset(&ctx, 0, sizeof(ctx));
HMAC_CALCULATE_MAC(Sha256);
}
void hmacSha1ContextCreate(HmacSha1Context *out, const void *key, size_t key_size) {
HMAC_CONTEXT_CREATE(sha1);
}
void hmacSha1ContextUpdate(HmacSha1Context *ctx, const void *src, size_t size) {
HMAC_CONTEXT_UPDATE(sha1);
}
void hmacSha1ContextGetMac(HmacSha1Context *ctx, void *dst) {
HMAC_CONTEXT_GET_MAC(sha1);
}
void hmacSha1CalculateMac(void *dst, const void *key, size_t key_size, const void *src, size_t size) {
HMAC_CALCULATE_MAC(Sha1);
}

174
nx/source/crypto/sha1.c Normal file
View File

@ -0,0 +1,174 @@
#include <string.h>
#include <stdlib.h>
#include <arm_neon.h>
#include "crypto/sha1.h"
/* Define for doing four rounds of SHA1. */
#define SHA1_DO_ROUND(insn, constant) \
const u32 a = vgetq_lane_u32(cur_abcd, 0); \
cur_abcd = v##insn##q_u32(cur_abcd, cur_e, vaddq_u32(w[i], constant)); \
cur_e = vsha1h_u32(a)
static const u32 s_roundConstants[4] = {
0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6
};
void sha1ContextCreate(Sha1Context *out) {
static const u32 H_0[SHA1_HASH_SIZE / sizeof(u32)] = {
0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476, 0xc3d2e1f0
};
memcpy(out->intermediate_hash, H_0, sizeof(out->intermediate_hash));
memset(out->buffer, 0, sizeof(out->buffer));
out->bits_consumed = 0;
out->num_buffered = 0;
out->finalized = false;
}
static void _sha1ProcessBlocks(Sha1Context *ctx, const u8 *src_u8, size_t num_blocks) {
/* Setup round constants. */
const uint32x4_t k0 = vdupq_n_u32(s_roundConstants[0]);
const uint32x4_t k1 = vdupq_n_u32(s_roundConstants[1]);
const uint32x4_t k2 = vdupq_n_u32(s_roundConstants[2]);
const uint32x4_t k3 = vdupq_n_u32(s_roundConstants[3]);
/* Load hash variables with intermediate state. */
uint32x4_t cur_abcd = vld1q_u32(ctx->intermediate_hash + 0);
u32 cur_e = ctx->intermediate_hash[4];
/* Actually do hash processing blocks. */
while (num_blocks > 0) {
/* Save current state. */
const uint32x4_t prev_abcd = cur_abcd;
const u32 prev_e = cur_e;
uint32x4_t w[20];
/* Setup w[0-3] with message. */
for (size_t i = 0; i < 4; i++) {
w[i] = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(src_u8)));
src_u8 += 0x10;
}
/* Calculate w[4-19], w[i] = sha1su1(sha1su0(w[i-4], w[i-3], w[i-2]), w[i-1]); */
for (size_t i = 4; i < 20; i++) {
w[i] = vsha1su1q_u32(vsha1su0q_u32(w[i-4], w[i-3], w[i-2]), w[i-1]);
}
/* Do round calculations 0-20. Uses sha1c, k0. */
size_t i = 0;
while (i < 5) {
SHA1_DO_ROUND(sha1c, k0);
i++;
}
/* Do round calculations 20-40. Uses sha1p, k1. */
while (i < 10) {
SHA1_DO_ROUND(sha1p, k1);
i++;
}
/* Do round calculations 40-60. Uses sha1m, k2. */
while (i < 15) {
SHA1_DO_ROUND(sha1m, k2);
i++;
}
/* Do round calculations 60-80. Uses sha1p, k3. */
while (i < 20) {
SHA1_DO_ROUND(sha1p, k3);
i++;
}
/* Add to previous. */
cur_abcd = vaddq_u32(cur_abcd, prev_abcd);
cur_e = cur_e + prev_e;
num_blocks--;
}
/* Save result to intermediate hash. */
vst1q_u32(ctx->intermediate_hash, cur_abcd);
ctx->intermediate_hash[4] = cur_e;
}
void sha1ContextUpdate(Sha1Context *ctx, const void *src, size_t size) {
/* Convert src to u8* for utility. */
const u8 *cur_src = (const u8 *)src;
/* Update bits consumed. */
ctx->bits_consumed += (((ctx->num_buffered + size) / SHA1_BLOCK_SIZE) * SHA1_BLOCK_SIZE) * 8;
/* Handle pre-buffered data. */
if (ctx->num_buffered > 0) {
const size_t needed = SHA1_BLOCK_SIZE - ctx->num_buffered;
const size_t copyable = (size > needed ? needed : size);
memcpy(&ctx->buffer[ctx->num_buffered], cur_src, copyable);
cur_src += copyable;
ctx->num_buffered += copyable;
size -= copyable;
if (ctx->num_buffered == SHA1_BLOCK_SIZE) {
_sha1ProcessBlocks(ctx, ctx->buffer, 1);
ctx->num_buffered = 0;
}
}
/* Handle complete blocks. */
if (size >= SHA1_BLOCK_SIZE) {
const size_t num_blocks = size / SHA1_BLOCK_SIZE;
_sha1ProcessBlocks(ctx, cur_src, num_blocks);
size -= SHA1_BLOCK_SIZE * num_blocks;
cur_src += SHA1_BLOCK_SIZE * num_blocks;
}
/* Buffer remaining data. */
if (size > 0) {
memcpy(ctx->buffer, cur_src, size);
ctx->num_buffered = size;
}
}
void sha1ContextGetHash(Sha1Context *ctx, void *dst) {
if (!ctx->finalized) {
/* Process last block, if necessary. */
{
ctx->bits_consumed += 8 * ctx->num_buffered;
ctx->buffer[ctx->num_buffered++] = 0x80;
const size_t last_block_max_size = SHA1_BLOCK_SIZE - sizeof(u64);
/* If we've got space for the bits consumed field, just set to zero. */
if (ctx->num_buffered <= last_block_max_size) {
memset(ctx->buffer + ctx->num_buffered, 0, last_block_max_size - ctx->num_buffered);
} else {
/* Pad with zeroes, and process. */
memset(ctx->buffer + ctx->num_buffered, 0, SHA1_BLOCK_SIZE - ctx->num_buffered);
_sha1ProcessBlocks(ctx, ctx->buffer, 1);
/* Clear the rest of the buffer with zeroes. */
memset(ctx->buffer, 0, last_block_max_size);
}
/* Copy in bits consumed field, then process last block. */
u64 big_endian_bits_consumed = __builtin_bswap64(ctx->bits_consumed);
memcpy(ctx->buffer + last_block_max_size, &big_endian_bits_consumed, sizeof(big_endian_bits_consumed));
_sha1ProcessBlocks(ctx, ctx->buffer, 1);
}
ctx->finalized = true;
}
/* Copy endian-swapped intermediate hash out. */
u32 *dst_u32 = (u32 *)dst;
for (size_t i = 0; i < sizeof(ctx->intermediate_hash) / sizeof(u32); i++) {
dst_u32[i] = __builtin_bswap32(ctx->intermediate_hash[i]);
}
}
void sha1CalculateHash(void *dst, const void *src, size_t size) {
/* Make a new context, calculate hash, store to output. */
Sha1Context ctx;
sha1ContextCreate(&ctx);
sha1ContextUpdate(&ctx, src, size);
sha1ContextGetHash(&ctx, dst);
}