use alloc::vec::Vec;
use core::mem;
use core::ops::Shl;
use num_traits::One;
use crate::big_digit::{self, BigDigit, DoubleBigDigit};
use crate::biguint::BigUint;
struct MontyReducer {
n0inv: BigDigit,
}
fn inv_mod_alt(b: BigDigit) -> BigDigit {
assert_ne!(b & 1, 0);
let mut k0 = BigDigit::wrapping_sub(2, b);
let mut t = b - 1;
let mut i = 1;
while i < big_digit::BITS {
t = t.wrapping_mul(t);
k0 = k0.wrapping_mul(t + 1);
i <<= 1;
}
debug_assert_eq!(k0.wrapping_mul(b), 1);
k0.wrapping_neg()
}
impl MontyReducer {
fn new(n: &BigUint) -> Self {
let n0inv = inv_mod_alt(n.data[0]);
MontyReducer { n0inv }
}
}
#[allow(clippy::many_single_char_names)]
fn montgomery(x: &BigUint, y: &BigUint, m: &BigUint, k: BigDigit, n: usize) -> BigUint {
assert!(
x.data.len() == n && y.data.len() == n && m.data.len() == n,
"{:?} {:?} {:?} {}",
x,
y,
m,
n
);
let mut z = BigUint::ZERO;
z.data.resize(n * 2, 0);
let mut c: BigDigit = 0;
for i in 0..n {
let c2 = add_mul_vvw(&mut z.data[i..n + i], &x.data, y.data[i]);
let t = z.data[i].wrapping_mul(k);
let c3 = add_mul_vvw(&mut z.data[i..n + i], &m.data, t);
let cx = c.wrapping_add(c2);
let cy = cx.wrapping_add(c3);
z.data[n + i] = cy;
if cx < c2 || cy < c3 {
c = 1;
} else {
c = 0;
}
}
if c == 0 {
z.data = z.data[n..].to_vec();
} else {
{
let (first, second) = z.data.split_at_mut(n);
sub_vv(first, second, &m.data);
}
z.data = z.data[..n].to_vec();
}
z
}
#[inline(always)]
fn add_mul_vvw(z: &mut [BigDigit], x: &[BigDigit], y: BigDigit) -> BigDigit {
let mut c = 0;
for (zi, xi) in z.iter_mut().zip(x.iter()) {
let (z1, z0) = mul_add_www(*xi, y, *zi);
let (c_, zi_) = add_ww(z0, c, 0);
*zi = zi_;
c = c_ + z1;
}
c
}
#[inline(always)]
fn sub_vv(z: &mut [BigDigit], x: &[BigDigit], y: &[BigDigit]) -> BigDigit {
let mut c = 0;
for (i, (xi, yi)) in x.iter().zip(y.iter()).enumerate().take(z.len()) {
let zi = xi.wrapping_sub(*yi).wrapping_sub(c);
z[i] = zi;
c = ((yi & !xi) | ((yi | !xi) & zi)) >> (big_digit::BITS - 1)
}
c
}
#[inline(always)]
fn add_ww(x: BigDigit, y: BigDigit, c: BigDigit) -> (BigDigit, BigDigit) {
let yc = y.wrapping_add(c);
let z0 = x.wrapping_add(yc);
let z1 = if z0 < x || yc < y { 1 } else { 0 };
(z1, z0)
}
#[inline(always)]
fn mul_add_www(x: BigDigit, y: BigDigit, c: BigDigit) -> (BigDigit, BigDigit) {
let z = x as DoubleBigDigit * y as DoubleBigDigit + c as DoubleBigDigit;
((z >> big_digit::BITS) as BigDigit, z as BigDigit)
}
#[allow(clippy::many_single_char_names)]
pub(super) fn monty_modpow(x: &BigUint, y: &BigUint, m: &BigUint) -> BigUint {
assert!(m.data[0] & 1 == 1);
let mr = MontyReducer::new(m);
let num_words = m.data.len();
let mut x = x.clone();
if x.data.len() > num_words {
x %= m;
}
if x.data.len() < num_words {
x.data.resize(num_words, 0);
}
let mut rr = BigUint::one();
rr = (rr.shl(2 * num_words as u64 * u64::from(big_digit::BITS))) % m;
if rr.data.len() < num_words {
rr.data.resize(num_words, 0);
}
let mut one = BigUint::one();
one.data.resize(num_words, 0);
let n = 4;
let mut powers = Vec::with_capacity(1 << n);
powers.push(montgomery(&one, &rr, m, mr.n0inv, num_words));
powers.push(montgomery(&x, &rr, m, mr.n0inv, num_words));
for i in 2..1 << n {
let r = montgomery(&powers[i - 1], &powers[1], m, mr.n0inv, num_words);
powers.push(r);
}
let mut z = powers[0].clone();
z.data.resize(num_words, 0);
let mut zz = BigUint::ZERO;
zz.data.resize(num_words, 0);
for i in (0..y.data.len()).rev() {
let mut yi = y.data[i];
let mut j = 0;
while j < big_digit::BITS {
if i != y.data.len() - 1 || j != 0 {
zz = montgomery(&z, &z, m, mr.n0inv, num_words);
z = montgomery(&zz, &zz, m, mr.n0inv, num_words);
zz = montgomery(&z, &z, m, mr.n0inv, num_words);
z = montgomery(&zz, &zz, m, mr.n0inv, num_words);
}
zz = montgomery(
&z,
&powers[(yi >> (big_digit::BITS - n)) as usize],
m,
mr.n0inv,
num_words,
);
mem::swap(&mut z, &mut zz);
yi <<= n;
j += n;
}
}
zz = montgomery(&z, &one, m, mr.n0inv, num_words);
zz.normalize();
if zz >= *m {
zz -= m;
if zz >= *m {
zz %= m;
}
}
zz.normalize();
zz
}