use crate::core_arch::x86::__m256i;
use crate::core_arch::x86::__m512i;
#[cfg(test)]
use stdarch_test::assert_instr;
#[allow(improper_ctypes)]
extern "C" {
#[link_name = "llvm.x86.aesni.aesenc.256"]
fn aesenc_256(a: __m256i, round_key: __m256i) -> __m256i;
#[link_name = "llvm.x86.aesni.aesenclast.256"]
fn aesenclast_256(a: __m256i, round_key: __m256i) -> __m256i;
#[link_name = "llvm.x86.aesni.aesdec.256"]
fn aesdec_256(a: __m256i, round_key: __m256i) -> __m256i;
#[link_name = "llvm.x86.aesni.aesdeclast.256"]
fn aesdeclast_256(a: __m256i, round_key: __m256i) -> __m256i;
#[link_name = "llvm.x86.aesni.aesenc.512"]
fn aesenc_512(a: __m512i, round_key: __m512i) -> __m512i;
#[link_name = "llvm.x86.aesni.aesenclast.512"]
fn aesenclast_512(a: __m512i, round_key: __m512i) -> __m512i;
#[link_name = "llvm.x86.aesni.aesdec.512"]
fn aesdec_512(a: __m512i, round_key: __m512i) -> __m512i;
#[link_name = "llvm.x86.aesni.aesdeclast.512"]
fn aesdeclast_512(a: __m512i, round_key: __m512i) -> __m512i;
}
#[inline]
#[target_feature(enable = "vaes")]
#[cfg_attr(test, assert_instr(vaesenc))]
pub unsafe fn _mm256_aesenc_epi128(a: __m256i, round_key: __m256i) -> __m256i {
aesenc_256(a, round_key)
}
#[inline]
#[target_feature(enable = "vaes")]
#[cfg_attr(test, assert_instr(vaesenclast))]
pub unsafe fn _mm256_aesenclast_epi128(a: __m256i, round_key: __m256i) -> __m256i {
aesenclast_256(a, round_key)
}
#[inline]
#[target_feature(enable = "vaes")]
#[cfg_attr(test, assert_instr(vaesdec))]
pub unsafe fn _mm256_aesdec_epi128(a: __m256i, round_key: __m256i) -> __m256i {
aesdec_256(a, round_key)
}
#[inline]
#[target_feature(enable = "vaes")]
#[cfg_attr(test, assert_instr(vaesdeclast))]
pub unsafe fn _mm256_aesdeclast_epi128(a: __m256i, round_key: __m256i) -> __m256i {
aesdeclast_256(a, round_key)
}
#[inline]
#[target_feature(enable = "vaes,avx512f")]
#[cfg_attr(test, assert_instr(vaesenc))]
pub unsafe fn _mm512_aesenc_epi128(a: __m512i, round_key: __m512i) -> __m512i {
aesenc_512(a, round_key)
}
#[inline]
#[target_feature(enable = "vaes,avx512f")]
#[cfg_attr(test, assert_instr(vaesenclast))]
pub unsafe fn _mm512_aesenclast_epi128(a: __m512i, round_key: __m512i) -> __m512i {
aesenclast_512(a, round_key)
}
#[inline]
#[target_feature(enable = "vaes,avx512f")]
#[cfg_attr(test, assert_instr(vaesdec))]
pub unsafe fn _mm512_aesdec_epi128(a: __m512i, round_key: __m512i) -> __m512i {
aesdec_512(a, round_key)
}
#[inline]
#[target_feature(enable = "vaes,avx512f")]
#[cfg_attr(test, assert_instr(vaesdeclast))]
pub unsafe fn _mm512_aesdeclast_epi128(a: __m512i, round_key: __m512i) -> __m512i {
aesdeclast_512(a, round_key)
}
#[cfg(test)]
mod tests {
#![allow(overflowing_literals)]
use stdarch_test::simd_test;
use crate::core_arch::x86::*;
#[target_feature(enable = "avx2")]
unsafe fn helper_for_256_vaes(
linear: unsafe fn(__m128i, __m128i) -> __m128i,
vectorized: unsafe fn(__m256i, __m256i) -> __m256i,
) {
let a = _mm256_set_epi64x(
0xDCB4DB3657BF0B7D,
0x18DB0601068EDD9F,
0xB76B908233200DC5,
0xE478235FA8E22D5E,
);
let k = _mm256_set_epi64x(
0x672F6F105A94CEA7,
0x8298B8FFCA5F829C,
0xA3927047B3FB61D8,
0x978093862CDE7187,
);
let mut a_decomp = [_mm_setzero_si128(); 2];
a_decomp[0] = _mm256_extracti128_si256::<0>(a);
a_decomp[1] = _mm256_extracti128_si256::<1>(a);
let mut k_decomp = [_mm_setzero_si128(); 2];
k_decomp[0] = _mm256_extracti128_si256::<0>(k);
k_decomp[1] = _mm256_extracti128_si256::<1>(k);
let r = vectorized(a, k);
let mut e_decomp = [_mm_setzero_si128(); 2];
for i in 0..2 {
e_decomp[i] = linear(a_decomp[i], k_decomp[i]);
}
assert_eq_m128i(_mm256_extracti128_si256::<0>(r), e_decomp[0]);
assert_eq_m128i(_mm256_extracti128_si256::<1>(r), e_decomp[1]);
}
#[target_feature(enable = "sse2")]
unsafe fn setup_state_key<T>(broadcast: unsafe fn(__m128i) -> T) -> (T, T) {
let a = _mm_set_epi64x(0x0123456789abcdef, 0x8899aabbccddeeff);
let k = _mm_set_epi64x(0x1133557799bbddff, 0x0022446688aaccee);
(broadcast(a), broadcast(k))
}
#[target_feature(enable = "avx2")]
unsafe fn setup_state_key_256() -> (__m256i, __m256i) {
setup_state_key(_mm256_broadcastsi128_si256)
}
#[target_feature(enable = "avx512f")]
unsafe fn setup_state_key_512() -> (__m512i, __m512i) {
setup_state_key(_mm512_broadcast_i32x4)
}
#[simd_test(enable = "vaes,avx512vl")]
unsafe fn test_mm256_aesdec_epi128() {
let (a, k) = setup_state_key_256();
let e = _mm_set_epi64x(0x044e4f5176fec48f, 0xb57ecfa381da39ee);
let e = _mm256_broadcastsi128_si256(e);
let r = _mm256_aesdec_epi128(a, k);
assert_eq_m256i(r, e);
helper_for_256_vaes(_mm_aesdec_si128, _mm256_aesdec_epi128);
}
#[simd_test(enable = "vaes,avx512vl")]
unsafe fn test_mm256_aesdeclast_epi128() {
let (a, k) = setup_state_key_256();
let e = _mm_set_epi64x(0x36cad57d9072bf9e, 0xf210dd981fa4a493);
let e = _mm256_broadcastsi128_si256(e);
let r = _mm256_aesdeclast_epi128(a, k);
assert_eq_m256i(r, e);
helper_for_256_vaes(_mm_aesdeclast_si128, _mm256_aesdeclast_epi128);
}
#[simd_test(enable = "vaes,avx512vl")]
unsafe fn test_mm256_aesenc_epi128() {
let (a, k) = setup_state_key_256();
let e = _mm_set_epi64x(0x16ab0e57dfc442ed, 0x28e4ee1884504333);
let e = _mm256_broadcastsi128_si256(e);
let r = _mm256_aesenc_epi128(a, k);
assert_eq_m256i(r, e);
helper_for_256_vaes(_mm_aesenc_si128, _mm256_aesenc_epi128);
}
#[simd_test(enable = "vaes,avx512vl")]
unsafe fn test_mm256_aesenclast_epi128() {
let (a, k) = setup_state_key_256();
let e = _mm_set_epi64x(0xb6dd7df25d7ab320, 0x4b04f98cf4c860f8);
let e = _mm256_broadcastsi128_si256(e);
let r = _mm256_aesenclast_epi128(a, k);
assert_eq_m256i(r, e);
helper_for_256_vaes(_mm_aesenclast_si128, _mm256_aesenclast_epi128);
}
#[target_feature(enable = "avx512f")]
unsafe fn helper_for_512_vaes(
linear: unsafe fn(__m128i, __m128i) -> __m128i,
vectorized: unsafe fn(__m512i, __m512i) -> __m512i,
) {
let a = _mm512_set_epi64(
0xDCB4DB3657BF0B7D,
0x18DB0601068EDD9F,
0xB76B908233200DC5,
0xE478235FA8E22D5E,
0xAB05CFFA2621154C,
0x1171B47A186174C9,
0x8C6B6C0E7595CEC9,
0xBE3E7D4934E961BD,
);
let k = _mm512_set_epi64(
0x672F6F105A94CEA7,
0x8298B8FFCA5F829C,
0xA3927047B3FB61D8,
0x978093862CDE7187,
0xB1927AB22F31D0EC,
0xA9A5DA619BE4D7AF,
0xCA2590F56884FDC6,
0x19BE9F660038BDB5,
);
let mut a_decomp = [_mm_setzero_si128(); 4];
a_decomp[0] = _mm512_extracti32x4_epi32::<0>(a);
a_decomp[1] = _mm512_extracti32x4_epi32::<1>(a);
a_decomp[2] = _mm512_extracti32x4_epi32::<2>(a);
a_decomp[3] = _mm512_extracti32x4_epi32::<3>(a);
let mut k_decomp = [_mm_setzero_si128(); 4];
k_decomp[0] = _mm512_extracti32x4_epi32::<0>(k);
k_decomp[1] = _mm512_extracti32x4_epi32::<1>(k);
k_decomp[2] = _mm512_extracti32x4_epi32::<2>(k);
k_decomp[3] = _mm512_extracti32x4_epi32::<3>(k);
let r = vectorized(a, k);
let mut e_decomp = [_mm_setzero_si128(); 4];
for i in 0..4 {
e_decomp[i] = linear(a_decomp[i], k_decomp[i]);
}
assert_eq_m128i(_mm512_extracti32x4_epi32::<0>(r), e_decomp[0]);
assert_eq_m128i(_mm512_extracti32x4_epi32::<1>(r), e_decomp[1]);
assert_eq_m128i(_mm512_extracti32x4_epi32::<2>(r), e_decomp[2]);
assert_eq_m128i(_mm512_extracti32x4_epi32::<3>(r), e_decomp[3]);
}
#[simd_test(enable = "vaes,avx512f")]
unsafe fn test_mm512_aesdec_epi128() {
let (a, k) = setup_state_key_512();
let e = _mm_set_epi64x(0x044e4f5176fec48f, 0xb57ecfa381da39ee);
let e = _mm512_broadcast_i32x4(e);
let r = _mm512_aesdec_epi128(a, k);
assert_eq_m512i(r, e);
helper_for_512_vaes(_mm_aesdec_si128, _mm512_aesdec_epi128);
}
#[simd_test(enable = "vaes,avx512f")]
unsafe fn test_mm512_aesdeclast_epi128() {
let (a, k) = setup_state_key_512();
let e = _mm_set_epi64x(0x36cad57d9072bf9e, 0xf210dd981fa4a493);
let e = _mm512_broadcast_i32x4(e);
let r = _mm512_aesdeclast_epi128(a, k);
assert_eq_m512i(r, e);
helper_for_512_vaes(_mm_aesdeclast_si128, _mm512_aesdeclast_epi128);
}
#[simd_test(enable = "vaes,avx512f")]
unsafe fn test_mm512_aesenc_epi128() {
let (a, k) = setup_state_key_512();
let e = _mm_set_epi64x(0x16ab0e57dfc442ed, 0x28e4ee1884504333);
let e = _mm512_broadcast_i32x4(e);
let r = _mm512_aesenc_epi128(a, k);
assert_eq_m512i(r, e);
helper_for_512_vaes(_mm_aesenc_si128, _mm512_aesenc_epi128);
}
#[simd_test(enable = "vaes,avx512f")]
unsafe fn test_mm512_aesenclast_epi128() {
let (a, k) = setup_state_key_512();
let e = _mm_set_epi64x(0xb6dd7df25d7ab320, 0x4b04f98cf4c860f8);
let e = _mm512_broadcast_i32x4(e);
let r = _mm512_aesenclast_epi128(a, k);
assert_eq_m512i(r, e);
helper_for_512_vaes(_mm_aesenclast_si128, _mm512_aesenclast_epi128);
}
}