202 lines
6.8 KiB
C
202 lines
6.8 KiB
C
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
|
|
|
#ifdef FREEBL_NO_DEPEND
|
|
#include "stubs.h"
|
|
#endif
|
|
#include "blapii.h"
|
|
#include "blapit.h"
|
|
#include "gcm.h"
|
|
#include "secerr.h"
|
|
#include "prtypes.h"
|
|
|
|
#if defined(IS_LITTLE_ENDIAN)
|
|
|
|
#include <arm_neon.h>
|
|
|
|
SECStatus
|
|
gcm_HashWrite_hw(gcmHashContext *ghash, unsigned char *outbuf)
|
|
{
|
|
vst1_u8(outbuf, vrev64_u8(vcreate_u8(ghash->x_high)));
|
|
vst1_u8(outbuf + 8, vrev64_u8(vcreate_u8(ghash->x_low)));
|
|
return SECSuccess;
|
|
}
|
|
|
|
/* Carry-less multiplication. a * b = ret. */
|
|
static inline uint8x16_t
|
|
clmul(const uint8x8_t a, const uint8x8_t b)
|
|
{
|
|
uint8x16_t d, e, f, g, h, i, j, k, l, m, n;
|
|
uint8x8_t t_high, t_low;
|
|
uint8x16_t t0, t1, t2, t3;
|
|
const uint8x8_t k16 = vcreate_u8(0xffff);
|
|
const uint8x8_t k32 = vcreate_u8(0xffffffff);
|
|
const uint8x8_t k48 = vcreate_u8(0xffffffffffff);
|
|
|
|
// D = A * B
|
|
d = vreinterpretq_u8_p16(vmull_p8(vreinterpret_p8_u8(a),
|
|
vreinterpret_p8_u8(b)));
|
|
// E = A * B1
|
|
e = vreinterpretq_u8_p16(vmull_p8(vreinterpret_p8_u8(a),
|
|
vreinterpret_p8_u8(vext_u8(b, b, 1))));
|
|
// F = A1 * B
|
|
f = vreinterpretq_u8_p16(vmull_p8(vreinterpret_p8_u8(vext_u8(a, a, 1)),
|
|
vreinterpret_p8_u8(b)));
|
|
// G = A * B2
|
|
g = vreinterpretq_u8_p16(vmull_p8(vreinterpret_p8_u8(a),
|
|
vreinterpret_p8_u8(vext_u8(b, b, 2))));
|
|
// H = A2 * B
|
|
h = vreinterpretq_u8_p16(vmull_p8(vreinterpret_p8_u8(vext_u8(a, a, 2)),
|
|
vreinterpret_p8_u8(b)));
|
|
// I = A * B3
|
|
i = vreinterpretq_u8_p16(vmull_p8(vreinterpret_p8_u8(a),
|
|
vreinterpret_p8_u8(vext_u8(b, b, 3))));
|
|
// J = A3 * B
|
|
j = vreinterpretq_u8_p16(vmull_p8(vreinterpret_p8_u8(vext_u8(a, a, 3)),
|
|
vreinterpret_p8_u8(b)));
|
|
// K = A * B4
|
|
k = vreinterpretq_u8_p16(vmull_p8(vreinterpret_p8_u8(a),
|
|
vreinterpret_p8_u8(vext_u8(b, b, 4))));
|
|
// L = E + F
|
|
l = veorq_u8(e, f);
|
|
// M = G + H
|
|
m = veorq_u8(g, h);
|
|
// N = I + J
|
|
n = veorq_u8(i, j);
|
|
|
|
// t0 = (L) (P0 + P1) << 8
|
|
t_high = vget_high_u8(l);
|
|
t_low = vget_low_u8(l);
|
|
t_low = veor_u8(t_low, t_high);
|
|
t_high = vand_u8(t_high, k48);
|
|
t_low = veor_u8(t_low, t_high);
|
|
t0 = vcombine_u8(t_low, t_high);
|
|
t0 = vextq_u8(t0, t0, 15);
|
|
|
|
// t1 = (M) (P2 + P3) << 16
|
|
t_high = vget_high_u8(m);
|
|
t_low = vget_low_u8(m);
|
|
t_low = veor_u8(t_low, t_high);
|
|
t_high = vand_u8(t_high, k32);
|
|
t_low = veor_u8(t_low, t_high);
|
|
t1 = vcombine_u8(t_low, t_high);
|
|
t1 = vextq_u8(t1, t1, 14);
|
|
|
|
// t2 = (N) (P4 + P5) << 24
|
|
t_high = vget_high_u8(n);
|
|
t_low = vget_low_u8(n);
|
|
t_low = veor_u8(t_low, t_high);
|
|
t_high = vand_u8(t_high, k16);
|
|
t_low = veor_u8(t_low, t_high);
|
|
t2 = vcombine_u8(t_low, t_high);
|
|
t2 = vextq_u8(t2, t2, 13);
|
|
|
|
// t3 = (K) (P6 + P7) << 32
|
|
t_high = vget_high_u8(k);
|
|
t_low = vget_low_u8(k);
|
|
t_low = veor_u8(t_low, t_high);
|
|
t_high = vdup_n_u8(0);
|
|
t3 = vcombine_u8(t_low, t_high);
|
|
t3 = vextq_u8(t3, t3, 12);
|
|
|
|
t0 = veorq_u8(t0, t1);
|
|
t2 = veorq_u8(t2, t3);
|
|
return veorq_u8(veorq_u8(d, t0), t2);
|
|
}
|
|
|
|
SECStatus
|
|
gcm_HashMult_hw(gcmHashContext *ghash, const unsigned char *buf,
|
|
unsigned int count)
|
|
{
|
|
const uint8x8_t h_low = vcreate_u8(ghash->h_low);
|
|
const uint8x8_t h_high = vcreate_u8(ghash->h_high);
|
|
uint8x16_t ci;
|
|
uint8x8_t ci_low;
|
|
uint8x8_t ci_high;
|
|
uint8x16_t z0, z2, z1a;
|
|
uint8x16_t z_high, z_low;
|
|
uint8x16_t t;
|
|
int64x2_t t1, t2, t3;
|
|
uint64x2_t z_low_l, z_low_r, z_high_l, z_high_r;
|
|
size_t i;
|
|
|
|
ci = vcombine_u8(vcreate_u8(ghash->x_low), vcreate_u8(ghash->x_high));
|
|
|
|
for (i = 0; i < count; i++, buf += 16) {
|
|
ci = veorq_u8(ci, vcombine_u8(vrev64_u8(vld1_u8(buf + 8)),
|
|
vrev64_u8(vld1_u8(buf))));
|
|
ci_high = vget_high_u8(ci);
|
|
ci_low = vget_low_u8(ci);
|
|
|
|
/* Do binary mult ghash->X = C * ghash->H (Karatsuba). */
|
|
z0 = clmul(ci_low, h_low);
|
|
z2 = clmul(ci_high, h_high);
|
|
z1a = clmul(veor_u8(ci_high, ci_low), veor_u8(h_high, h_low));
|
|
z1a = veorq_u8(z0, z1a);
|
|
z1a = veorq_u8(z2, z1a);
|
|
z_high = vcombine_u8(veor_u8(vget_low_u8(z2), vget_high_u8(z1a)),
|
|
vget_high_u8(z2));
|
|
z_low = vcombine_u8(vget_low_u8(z0),
|
|
veor_u8(vget_high_u8(z0), vget_low_u8(z1a)));
|
|
|
|
/* Shift one (multiply by x) as gcm spec is stupid. */
|
|
z_low_l = vshlq_n_u64(vreinterpretq_u64_u8(z_low), 1);
|
|
z_low_r = vshrq_n_u64(vreinterpretq_u64_u8(z_low), 63);
|
|
z_high_l = vshlq_n_u64(vreinterpretq_u64_u8(z_high), 1);
|
|
z_high_r = vshrq_n_u64(vreinterpretq_u64_u8(z_high), 63);
|
|
z_low = vreinterpretq_u8_u64(
|
|
vcombine_u64(vget_low_u64(z_low_l),
|
|
vorr_u64(vget_high_u64(z_low_l),
|
|
vget_low_u64(z_low_r))));
|
|
z_high = vreinterpretq_u8_u64(
|
|
vcombine_u64(vorr_u64(vget_low_u64(z_high_l),
|
|
vget_high_u64(z_low_r)),
|
|
vorr_u64(vget_high_u64(z_high_l),
|
|
vget_low_u64(z_high_r))));
|
|
|
|
/* Reduce */
|
|
t1 = vshlq_n_s64(vreinterpretq_s64_u8(z_low), 57);
|
|
t2 = vshlq_n_s64(vreinterpretq_s64_u8(z_low), 62);
|
|
t3 = vshlq_n_s64(vreinterpretq_s64_u8(z_low), 63);
|
|
t = vreinterpretq_u8_s64(veorq_s64(t1, veorq_s64(t2, t3)));
|
|
|
|
z_low = vcombine_u8(vget_low_u8(z_low),
|
|
veor_u8(vget_high_u8(z_low), vget_low_u8(t)));
|
|
z_high = vcombine_u8(veor_u8(vget_low_u8(z_high), vget_high_u8(t)),
|
|
vget_high_u8(z_high));
|
|
|
|
t = vreinterpretq_u8_u64(vshrq_n_u64(vreinterpretq_u64_u8(z_low), 1));
|
|
z_high = veorq_u8(z_high, z_low);
|
|
z_low = veorq_u8(z_low, t);
|
|
t = vreinterpretq_u8_u64(vshrq_n_u64(vreinterpretq_u64_u8(t), 6));
|
|
z_low = vreinterpretq_u8_u64(
|
|
vshrq_n_u64(vreinterpretq_u64_u8(z_low), 1));
|
|
z_low = veorq_u8(z_low, z_high);
|
|
ci = veorq_u8(z_low, t);
|
|
}
|
|
|
|
vst1_u8((uint8_t *)&ghash->x_high, vget_high_u8(ci));
|
|
vst1_u8((uint8_t *)&ghash->x_low, vget_low_u8(ci));
|
|
return SECSuccess;
|
|
}
|
|
|
|
SECStatus
|
|
gcm_HashInit_hw(gcmHashContext *ghash)
|
|
{
|
|
ghash->ghash_mul = gcm_HashMult_hw;
|
|
ghash->x_low = 0;
|
|
ghash->x_high = 0;
|
|
ghash->hw = PR_TRUE;
|
|
return SECSuccess;
|
|
}
|
|
|
|
SECStatus
|
|
gcm_HashZeroX_hw(gcmHashContext *ghash)
|
|
{
|
|
ghash->x_low = 0;
|
|
ghash->x_high = 0;
|
|
return SECSuccess;
|
|
}
|
|
|
|
#endif /* IS_LITTLE_ENDIAN */
|