use crate::convert::*;
#[allow(unused)]
use zerocopy::transmute;
pub(crate) const MULTIPLE: u64 = 6364136223846793005;
#[cfg(all(target_feature = "ssse3", not(miri)))]
const SHUFFLE_MASK: u128 = 0x020a0700_0c01030e_050f0d08_06090b04_u128;
#[inline(always)]
#[cfg(feature = "folded_multiply")]
pub(crate) const fn folded_multiply(s: u64, by: u64) -> u64 {
let result = (s as u128).wrapping_mul(by as u128);
((result & 0xffff_ffff_ffff_ffff) as u64) ^ ((result >> 64) as u64)
}
#[inline(always)]
#[cfg(not(feature = "folded_multiply"))]
pub(crate) const fn folded_multiply(s: u64, by: u64) -> u64 {
let b1 = s.wrapping_mul(by.swap_bytes());
let b2 = s.swap_bytes().wrapping_mul(!by);
b1 ^ b2.swap_bytes()
}
#[inline(always)]
pub(crate) fn read_small(data: &[u8]) -> [u64; 2] {
debug_assert!(data.len() <= 8);
if data.len() >= 2 {
if data.len() >= 4 {
[data.read_u32().0 as u64, data.read_last_u32() as u64]
} else {
[data.read_u16().0 as u64, data[data.len() - 1] as u64]
}
} else {
if data.len() > 0 {
[data[0] as u64, data[0] as u64]
} else {
[0, 0]
}
}
}
#[inline(always)]
pub(crate) fn shuffle(a: u128) -> u128 {
#[cfg(all(target_feature = "ssse3", not(miri)))]
{
#[cfg(target_arch = "x86")]
use core::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use core::arch::x86_64::*;
unsafe { transmute!(_mm_shuffle_epi8(transmute!(a), transmute!(SHUFFLE_MASK))) }
}
#[cfg(not(all(target_feature = "ssse3", not(miri))))]
{
a.swap_bytes()
}
}
#[allow(unused)] #[inline(always)]
pub(crate) fn add_and_shuffle(a: u128, b: u128) -> u128 {
let sum = add_by_64s(a.convert(), b.convert());
shuffle(sum.convert())
}
#[allow(unused)] #[inline(always)]
pub(crate) fn shuffle_and_add(base: u128, to_add: u128) -> u128 {
let shuffled: [u64; 2] = shuffle(base).convert();
add_by_64s(shuffled, to_add.convert()).convert()
}
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "sse2", not(miri)))]
#[inline(always)]
pub(crate) fn add_by_64s(a: [u64; 2], b: [u64; 2]) -> [u64; 2] {
unsafe {
#[cfg(target_arch = "x86")]
use core::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use core::arch::x86_64::*;
transmute!(_mm_add_epi64(transmute!(a), transmute!(b)))
}
}
#[cfg(not(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "sse2", not(miri))))]
#[inline(always)]
pub(crate) fn add_by_64s(a: [u64; 2], b: [u64; 2]) -> [u64; 2] {
[a[0].wrapping_add(b[0]), a[1].wrapping_add(b[1])]
}
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "aes", not(miri)))]
#[allow(unused)]
#[inline(always)]
pub(crate) fn aesenc(value: u128, xor: u128) -> u128 {
#[cfg(target_arch = "x86")]
use core::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use core::arch::x86_64::*;
unsafe {
let value = transmute!(value);
transmute!(_mm_aesenc_si128(value, transmute!(xor)))
}
}
#[cfg(any(
all(feature = "nightly-arm-aes", target_arch = "aarch64", target_feature = "aes", not(miri)),
all(feature = "nightly-arm-aes", target_arch = "arm", target_feature = "aes", not(miri)),
))]
#[allow(unused)]
#[inline(always)]
pub(crate) fn aesenc(value: u128, xor: u128) -> u128 {
#[cfg(target_arch = "aarch64")]
use core::arch::aarch64::*;
#[cfg(target_arch = "arm")]
use core::arch::arm::*;
let res = unsafe { vaesmcq_u8(vaeseq_u8(transmute!(value), transmute!(0u128))) };
let value: u128 = transmute!(res);
xor ^ value
}
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "aes", not(miri)))]
#[allow(unused)]
#[inline(always)]
pub(crate) fn aesdec(value: u128, xor: u128) -> u128 {
#[cfg(target_arch = "x86")]
use core::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use core::arch::x86_64::*;
unsafe {
let value = transmute!(value);
transmute!(_mm_aesdec_si128(value, transmute!(xor)))
}
}
#[cfg(any(
all(feature = "nightly-arm-aes", target_arch = "aarch64", target_feature = "aes", not(miri)),
all(feature = "nightly-arm-aes", target_arch = "arm", target_feature = "aes", not(miri)),
))]
#[allow(unused)]
#[inline(always)]
pub(crate) fn aesdec(value: u128, xor: u128) -> u128 {
#[cfg(target_arch = "aarch64")]
use core::arch::aarch64::*;
#[cfg(target_arch = "arm")]
use core::arch::arm::*;
let res = unsafe { vaesimcq_u8(vaesdq_u8(transmute!(value), transmute!(0u128))) };
let value: u128 = transmute!(res);
xor ^ value
}
#[allow(unused)]
#[inline(always)]
pub(crate) fn add_in_length(enc: &mut u128, len: u64) {
#[cfg(all(target_arch = "x86_64", target_feature = "sse2", not(miri)))]
{
#[cfg(target_arch = "x86_64")]
use core::arch::x86_64::*;
unsafe {
let enc = enc as *mut u128;
let len = _mm_cvtsi64_si128(len as i64);
let data = _mm_loadu_si128(enc.cast());
let sum = _mm_add_epi64(data, len);
_mm_storeu_si128(enc.cast(), sum);
}
}
#[cfg(not(all(target_arch = "x86_64", target_feature = "sse2", not(miri))))]
{
let mut t: [u64; 2] = enc.convert();
t[0] = t[0].wrapping_add(len);
*enc = t.convert();
}
}
#[cfg(test)]
mod test {
use super::*;
#[cfg(all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "ssse3",
target_feature = "aes",
not(miri)
))]
#[test]
fn test_shuffle_does_not_collide_with_aes() {
let mut value: [u8; 16] = [0; 16];
let zero_mask_enc = aesenc(0, 0);
let zero_mask_dec = aesdec(0, 0);
for index in 0..16 {
value[index] = 1;
let excluded_positions_enc: [u8; 16] = aesenc(value.convert(), zero_mask_enc).convert();
let excluded_positions_dec: [u8; 16] = aesdec(value.convert(), zero_mask_dec).convert();
let actual_location: [u8; 16] = shuffle(value.convert()).convert();
for pos in 0..16 {
if actual_location[pos] != 0 {
assert_eq!(
0, excluded_positions_enc[pos],
"Forward Overlap between {:?} and {:?} at {}",
excluded_positions_enc, actual_location, index
);
assert_eq!(
0, excluded_positions_dec[pos],
"Reverse Overlap between {:?} and {:?} at {}",
excluded_positions_dec, actual_location, index
);
}
}
value[index] = 0;
}
}
#[test]
fn test_shuffle_contains_each_value() {
let value: [u8; 16] = 0x00010203_04050607_08090A0B_0C0D0E0F_u128.convert();
let shuffled: [u8; 16] = shuffle(value.convert()).convert();
for index in 0..16_u8 {
assert!(shuffled.contains(&index), "Value is missing {}", index);
}
}
#[test]
fn test_shuffle_moves_every_value() {
let mut value: [u8; 16] = [0; 16];
for index in 0..16 {
value[index] = 1;
let shuffled: [u8; 16] = shuffle(value.convert()).convert();
assert_eq!(0, shuffled[index], "Value is not moved {}", index);
value[index] = 0;
}
}
#[test]
fn test_shuffle_moves_high_bits() {
assert!(
shuffle(1) > (1_u128 << 80),
"Low bits must be moved to other half {:?} -> {:?}",
0,
shuffle(1)
);
assert!(
shuffle(1_u128 << 58) >= (1_u128 << 64),
"High bits must be moved to other half {:?} -> {:?}",
7,
shuffle(1_u128 << 58)
);
assert!(
shuffle(1_u128 << 58) < (1_u128 << 112),
"High bits must not remain high {:?} -> {:?}",
7,
shuffle(1_u128 << 58)
);
assert!(
shuffle(1_u128 << 64) < (1_u128 << 64),
"Low bits must be moved to other half {:?} -> {:?}",
8,
shuffle(1_u128 << 64)
);
assert!(
shuffle(1_u128 << 64) >= (1_u128 << 16),
"Low bits must not remain low {:?} -> {:?}",
8,
shuffle(1_u128 << 64)
);
assert!(
shuffle(1_u128 << 120) < (1_u128 << 50),
"High bits must be moved to low half {:?} -> {:?}",
15,
shuffle(1_u128 << 120)
);
}
#[cfg(all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "ssse3",
not(miri)
))]
#[test]
fn test_shuffle_does_not_loop() {
let numbered = 0x00112233_44556677_8899AABB_CCDDEEFF;
let mut shuffled = shuffle(numbered);
for count in 0..100 {
assert_ne!(numbered, shuffled, "Equal after {} vs {:x}", count, shuffled);
shuffled = shuffle(shuffled);
}
}
#[test]
fn test_add_length() {
let mut enc = (u64::MAX as u128) << 64 | 50;
add_in_length(&mut enc, u64::MAX);
assert_eq!(enc >> 64, u64::MAX as u128);
assert_eq!(enc as u64, 49);
}
}