/* mpfr_exp_2 -- exponential of a floating-point number
using algorithms in O(n^(1/2)*M(n)) and O(n^(1/3)*M(n))
Copyright 1999-2023 Free Software Foundation, Inc.
Contributed by the AriC and Caramba projects, INRIA.
This file is part of the GNU MPFR Library.
The GNU MPFR Library is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 3 of the License, or (at your
option) any later version.
The GNU MPFR Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
License for more details.
You should have received a copy of the GNU Lesser General Public License
along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see
https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
#define MPFR_NEED_LONGLONG_H /* MPFR_INT_CEIL_LOG2 */
#include "mpfr-impl.h"
static unsigned long
mpfr_exp2_aux (mpz_t, mpfr_srcptr, mpfr_prec_t, mpfr_exp_t *);
static unsigned long
mpfr_exp2_aux2 (mpz_t, mpfr_srcptr, mpfr_prec_t, mpfr_exp_t *);
static mpfr_exp_t
mpz_normalize (mpz_t, mpz_t, mpfr_exp_t);
static mpfr_exp_t
mpz_normalize2 (mpz_t, mpz_t, mpfr_exp_t, mpfr_exp_t);
/* if k = the number of bits of z > q, divides z by 2^(k-q) and returns k-q.
Otherwise do nothing and return 0.
*/
static mpfr_exp_t
mpz_normalize (mpz_t rop, mpz_t z, mpfr_exp_t q)
{
size_t k;
MPFR_MPZ_SIZEINBASE2 (k, z);
MPFR_ASSERTD (k == (mpfr_uexp_t) k);
if (MPFR_LIKELY(q < 0 || (mpfr_uexp_t) k > (mpfr_uexp_t) q))
{
mpz_fdiv_q_2exp (rop, z, (unsigned long) ((mpfr_uexp_t) k - q));
return (mpfr_exp_t) k - q;
}
mpz_set (rop, z);
return 0;
}
/* if expz > target, shift z by (expz-target) bits to the left.
if expz < target, shift z by (target-expz) bits to the right.
Returns target.
*/
static mpfr_exp_t
mpz_normalize2 (mpz_t rop, mpz_t z, mpfr_exp_t expz, mpfr_exp_t target)
{
if (MPFR_LIKELY(target > expz))
mpz_fdiv_q_2exp (rop, z, target - expz);
else
mpz_mul_2exp (rop, z, expz - target);
return target;
}
/* use Brent's formula exp(x) = (1+r+r^2/2!+r^3/3!+...)^(2^K)*2^n
where x = n*log(2)+(2^K)*r
together with the Paterson-Stockmeyer O(t^(1/2)) algorithm for the
evaluation of power series. The resulting complexity is O(n^(1/3)*M(n)).
This function returns with the exact flags due to exp.
*/
int
mpfr_exp_2 (mpfr_ptr y, mpfr_srcptr x, mpfr_rnd_t rnd_mode)
{
long n;
unsigned long K, k, l, err; /* FIXME: Which type ? */
int error_r;
mpfr_exp_t exps, expx;
mpfr_prec_t q, precy;
int inexact;
mpfr_t s, r;
mpz_t ss;
MPFR_GROUP_DECL(group);
MPFR_ZIV_DECL (loop);
MPFR_LOG_FUNC
(("x[%Pu]=%.*Rg rnd=%d", mpfr_get_prec(x), mpfr_log_prec, x, rnd_mode),
("y[%Pu]=%.*Rg inexact=%d", mpfr_get_prec(y), mpfr_log_prec, y,
inexact));
expx = MPFR_GET_EXP (x);
precy = MPFR_PREC(y);
/* First perform argument reduction: if x' = x - n*log(2) we have
exp(x) = exp(x)*2^n. We should take n near from x/log(2) but it does not
need to be exact.
Warning: we cannot use the 'double' type here, since on 64-bit machines
x may be as large as 2^62*log(2) without overflow, and then x/log(2)
is about 2^62: not every integer of that size can be represented as a
'double', thus the argument reduction would fail. */
if (MPFR_UNLIKELY(expx <= -2))
/* |x| <= 0.25, thus n = round(x/log(2)) = 0 */
n = 0;
else
{
mp_limb_t r_limb[(sizeof (long) -1) / sizeof(mp_limb_t) + 1];
/* Note: we use precision sizeof (long) * CHAR_BIT - 1 here since it is
more efficient that full limb precision.
The value of n will depend on whether MPFR_LONG_WITHIN_LIMB is
defined or not. For instance, for r = 0.111E0, one gets n = 0
in the former case and n = 1 in the latter case. */
MPFR_TMP_INIT1(r_limb, r, sizeof (long) * CHAR_BIT - 1);
mpfr_div (r, x, __gmpfr_const_log2_RNDD, MPFR_RNDN);
#ifdef MPFR_LONG_WITHIN_LIMB
/* The following code assume an unsigned long can fit in a mp_limb_t */
{
mp_limb_t a;
mpfr_exp_t exp;
/* Read the long directly (faster than using mpfr_get_si
since it fits, it is not singular, it can't be zero
and there is no conversion to do) */
MPFR_ASSERTD (MPFR_NOTZERO (r));
exp = MPFR_GET_EXP (r);
MPFR_ASSERTD (exp <= GMP_NUMB_BITS);
if (exp >= 1)
{
a = MPFR_MANT(r)[0] >> (GMP_NUMB_BITS - exp);
n = MPFR_IS_POS (r) ? a : a <= LONG_MAX ? - (long) a : LONG_MIN;
}
else
n = 0;
}
#else
/* Use generic way to get the long */
n = mpfr_get_si (r, MPFR_RNDN);
#endif
}
/* we have |x| <= (|n|+1)*log(2) */
MPFR_LOG_MSG (("d(x)=%1.30e n=%ld\n", mpfr_get_d1(x), n));
/* error_r bounds the cancelled bits in x - n*log(2) */
if (MPFR_UNLIKELY (n == 0))
error_r = 0;
else
{
error_r = mpfr_nbits_ulong (SAFE_ABS (unsigned long, n) + 1);
/* we have |x| <= 2^error_r * log(2) */
}
/* for the O(n^(1/2)*M(n)) method, the Taylor series computation of
n/K terms costs about n/(2K) multiplications when computed in fixed
point */
K = (precy < MPFR_EXP_2_THRESHOLD) ? __gmpfr_isqrt ((precy + 1) / 2) + 3
: __gmpfr_cuberoot (4*precy);
l = (precy - 1) / K + 1;
err = K + MPFR_INT_CEIL_LOG2 (2 * l + 18);
/* add K extra bits, i.e. failure probability <= 1/2^K = O(1/precy) */
q = precy + err + K + 10;
/* if |x| >> 1, take into account the cancelled bits */
if (expx > 0)
q += expx;
/* Even with to the mpfr_prec_round below, it is possible to use
the MPFR_GROUP_* macros here because mpfr_prec_round is only
called in a special case. */
MPFR_GROUP_INIT_2(group, q + error_r, r, s);
mpz_init (ss);
/* the algorithm consists in computing an upper bound of exp(x) using
a precision of q bits, and see if we can round to MPFR_PREC(y) taking
into account the maximal error. Otherwise we increase q. */
MPFR_ZIV_INIT (loop, q);
for (;;)
{
MPFR_LOG_MSG (("n=%ld K=%lu l=%lu q=%lu error_r=%d\n",
n, K, l, (unsigned long) q, error_r));
/* First reduce the argument to r = x - n * log(2),
so that r is small in absolute value. We want an upper
bound on r to get an upper bound on exp(x). */
/* if n<0, we have to get an upper bound of log(2)
in order to get an upper bound of r = x-n*log(2) */
mpfr_const_log2 (s, (n >= 0) ? MPFR_RNDZ : MPFR_RNDU);
/* s is within 1 ulp(s) of log(2) */
mpfr_mul_ui (r, s, (n < 0) ? -n : n, (n >= 0) ? MPFR_RNDZ : MPFR_RNDU);
/* r is within 3 ulps of |n|*log(2) */
if (n < 0)
MPFR_CHANGE_SIGN (r);
/* r <= n*log(2), within 3 ulps */
MPFR_LOG_VAR (x);
MPFR_LOG_VAR (r);
mpfr_sub (r, x, r, MPFR_RNDU);
while (MPFR_IS_PURE_FP(r) && MPFR_IS_NEG (r))
{ /* initial approximation n was too large */
n--;
mpfr_add (r, r, s, MPFR_RNDU);
}
/* if r is 0, we cannot round correctly */
if (MPFR_LIKELY(MPFR_IS_PURE_FP (r)))
{
/* since there was a cancellation in x - n*log(2), the low error_r
bits from r are zero and thus non significant, thus we can reduce
the working precision */
if (MPFR_LIKELY(error_r > 0))
{
MPFR_ASSERTD( MPFR_PREC(r) > q);
/* since MPFR_PREC(r) > q, there is no reallocation to do,
and it is safe to use it with MPFR_GROUP functions */
mpfr_prec_round (r, q, MPFR_RNDU);
}
/* the error on r is at most 3 ulps (3 ulps if error_r = 0,
and 1 + 3/2 if error_r > 0) */
MPFR_LOG_VAR (r);
MPFR_ASSERTD (MPFR_IS_POS (r));
mpfr_div_2ui (r, r, K, MPFR_RNDU); /* r = (x-n*log(2))/2^K, exact */
/* s <- 1 + r/1! + r^2/2! + ... + r^l/l! */
MPFR_ASSERTD (MPFR_IS_PURE_FP (r) && MPFR_EXP (r) < 0);
l = (precy < MPFR_EXP_2_THRESHOLD)
? mpfr_exp2_aux (ss, r, q, &exps) /* naive method */
: mpfr_exp2_aux2 (ss, r, q, &exps); /* Paterson/Stockmeyer meth */
MPFR_LOG_MSG (("l=%lu q=%lu (K+l)*q^2=%1.3e\n",
l, (unsigned long) q, (K + l) * (double) q * q));
for (k = 0; k < K; k++)
{
mpz_mul (ss, ss, ss);
exps *= 2;
exps += mpz_normalize (ss, ss, q);
}
mpfr_set_z_2exp (s, ss, exps, MPFR_RNDN);
/* error is at most 2^K*l, plus 2 to take into account of
the error of 3 ulps on r */
err = K + MPFR_INT_CEIL_LOG2 (l) + 2;
MPFR_LOG_MSG (("before mult. by 2^n:\n", 0));
MPFR_LOG_VAR (s);
MPFR_LOG_MSG (("err=%lu bits\n", K));
if (MPFR_LIKELY (MPFR_CAN_ROUND (s, q - err, precy, rnd_mode)))
{
MPFR_CLEAR_FLAGS ();
inexact = mpfr_mul_2si (y, s, n, rnd_mode);
break;
}
}
MPFR_ZIV_NEXT (loop, q);
MPFR_GROUP_REPREC_2(group, q+error_r, r, s);
}
MPFR_ZIV_FREE (loop);
mpz_clear (ss);
MPFR_GROUP_CLEAR (group);
return inexact;
}
/* s <- 1 + r/1! + r^2/2! + ... + r^l/l! while MPFR_EXP(r^l/l!)+MPFR_EXPR(r)>-q
using naive method with O(l) multiplications.
Return the number of iterations l.
The absolute error on s is less than 3*l*(l+1)*2^(-q).
Version using fixed-point arithmetic with mpz instead
of mpfr for internal computations.
*/
static unsigned long
mpfr_exp2_aux (mpz_t s, mpfr_srcptr r, mpfr_prec_t q, mpfr_exp_t *exps)
{
unsigned long l;
mpfr_exp_t dif, expt, expr;
mpz_t t, rr;
mp_size_t sbit, tbit;
MPFR_ASSERTD (MPFR_IS_PURE_FP (r));
expt = 0;
*exps = 1 - (mpfr_exp_t) q; /* s = 2^(q-1) */
mpz_init (t);
mpz_init (rr);
mpz_set_ui (t, 1);
mpz_set_ui (s, 1);
mpz_mul_2exp (s, s, q-1);
expr = mpfr_get_z_2exp (rr, r); /* no error here */
l = 0;
for (;;)
{
l++;
mpz_mul(t, t, rr);
expt += expr;
MPFR_MPZ_SIZEINBASE2 (sbit, s);
MPFR_MPZ_SIZEINBASE2 (tbit, t);
dif = *exps + sbit - expt - tbit;
/* truncates the bits of t which are < ulp(s) = 2^(1-q) */
expt += mpz_normalize (t, t, (mpfr_exp_t) q - dif);
/* error at most 2^(1-q) */
if (l > 1)
{
/* GMP doesn't optimize the case of power of 2 */
if (IS_POW2(l))
{
int bits = MPFR_INT_CEIL_LOG2(l);
mpz_fdiv_q_2exp (t, t, bits); /* error at most 2^(1-q) */
}
else
{
mpz_fdiv_q_ui (t, t, l); /* error at most 2^(1-q) */
}
/* the error wrt t^l/l! is here at most 3*l*ulp(s) */
MPFR_ASSERTD (expt == *exps);
}
if (mpz_sgn (t) == 0)
break;
mpz_add (s, s, t); /* no error here: exact */
/* ensures rr has the same size as t: after several shifts, the error
on rr is still at most ulp(t)=ulp(s) */
MPFR_MPZ_SIZEINBASE2 (tbit, t);
expr += mpz_normalize (rr, rr, tbit);
}
mpz_clear (t);
mpz_clear (rr);
return 3 * l * (l + 1);
}
/* s <- 1 + r/1! + r^2/2! + ... + r^l/l! while MPFR_EXP(r^l/l!)+MPFR_EXPR(r)>-q
using Paterson-Stockmeyer algorithm with O(sqrt(l)) multiplications.
Return l.
Uses m multiplications of full size and 2l/m of decreasing size,
i.e. a total equivalent to about m+l/m full multiplications,
i.e. 2*sqrt(l) for m=sqrt(l).
NOTE[VL]: The following sentence seems to be obsolete since MY_INIT_MPZ
is no longer used (r6919); sizer was the number of limbs of r.
Version using mpz. ss must have at least (sizer+1) limbs.
The error is bounded by (l^2+4*l) ulps where l is the return value.
*/
static unsigned long
mpfr_exp2_aux2 (mpz_t s, mpfr_srcptr r, mpfr_prec_t q, mpfr_exp_t *exps)
{
mpfr_exp_t expr, *expR, expt;
mpfr_prec_t ql;
unsigned long l, m, i;
mpz_t t, *R, rr, tmp;
mp_size_t sbit, rrbit;
MPFR_TMP_DECL(marker);
/* estimate value of l */
MPFR_ASSERTD (MPFR_GET_EXP (r) < 0);
l = q / (- MPFR_GET_EXP (r));
m = __gmpfr_isqrt (l);
/* we access R[2], thus we need m >= 2 */
if (m < 2)
m = 2;
MPFR_TMP_MARK(marker);
R = (mpz_t*) MPFR_TMP_ALLOC ((m + 1) * sizeof (mpz_t)); /* R[i] is r^i */
expR = (mpfr_exp_t*) MPFR_TMP_ALLOC((m + 1) * sizeof (mpfr_exp_t));
/* expR[i] is the exponent for R[i] */
mpz_init (tmp);
mpz_init (rr);
mpz_init (t);
mpz_set_ui (s, 0);
*exps = 1 - q; /* 1 ulp = 2^(1-q) */
for (i = 0 ; i <= m ; i++)
mpz_init (R[i]);
expR[1] = mpfr_get_z_2exp (R[1], r); /* exact operation: no error */
expR[1] = mpz_normalize2 (R[1], R[1], expR[1], 1 - q); /* error <= 1 ulp */
mpz_mul (t, R[1], R[1]); /* err(t) <= 2 ulps */
mpz_fdiv_q_2exp (R[2], t, q - 1); /* err(R[2]) <= 3 ulps */
expR[2] = 1 - q;
for (i = 3 ; i <= m ; i++)
{
if ((i & 1) == 1)
mpz_mul (t, R[i-1], R[1]); /* err(t) <= 2*i-2 */
else
mpz_mul (t, R[i/2], R[i/2]);
mpz_fdiv_q_2exp (R[i], t, q - 1); /* err(R[i]) <= 2*i-1 ulps */
expR[i] = 1 - q;
}
mpz_set_ui (R[0], 1);
mpz_mul_2exp (R[0], R[0], q-1);
expR[0] = 1-q; /* R[0]=1 */
mpz_set_ui (rr, 1);
expr = 0; /* rr contains r^l/l! */
/* by induction: err(rr) <= 2*l ulps */
l = 0;
ql = q; /* precision used for current giant step */
do
{
/* all R[i] must have exponent 1-ql */
if (l != 0)
for (i = 0 ; i < m ; i++)
expR[i] = mpz_normalize2 (R[i], R[i], expR[i], 1 - ql);
/* the absolute error on R[i]*rr is still 2*i-1 ulps */
expt = mpz_normalize2 (t, R[m-1], expR[m-1], 1 - ql);
/* err(t) <= 2*m-1 ulps */
/* computes t = 1 + r/(l+1) + ... + r^(m-1)*l!/(l+m-1)!
using Horner's scheme */
for (i = m-1 ; i-- != 0 ; )
{
mpz_fdiv_q_ui (t, t, l+i+1); /* err(t) += 1 ulp */
mpz_add (t, t, R[i]);
}
/* now err(t) <= (3m-2) ulps */
/* now multiplies t by r^l/l! and adds to s */
mpz_mul (t, t, rr);
expt += expr;
expt = mpz_normalize2 (t, t, expt, *exps);
/* err(t) <= (3m-1) + err_rr(l) <= (3m-2) + 2*l */
MPFR_ASSERTD (expt == *exps);
mpz_add (s, s, t); /* no error here */
/* updates rr, the multiplication of the factors l+i could be done
using binary splitting too, but it is not sure it would save much */
mpz_mul (t, rr, R[m]); /* err(t) <= err(rr) + 2m-1 */
expr += expR[m];
mpz_set_ui (tmp, 1);
for (i = 1 ; i <= m ; i++)
mpz_mul_ui (tmp, tmp, l + i);
mpz_fdiv_q (t, t, tmp); /* err(t) <= err(rr) + 2m */
l += m;
if (MPFR_UNLIKELY (mpz_sgn (t) == 0))
break;
expr += mpz_normalize (rr, t, ql); /* err_rr(l+1) <= err_rr(l) + 2m+1 */
if (MPFR_UNLIKELY (mpz_sgn (rr) == 0))
rrbit = 1;
else
MPFR_MPZ_SIZEINBASE2 (rrbit, rr);
MPFR_MPZ_SIZEINBASE2 (sbit, s);
ql = q - *exps - sbit + expr + rrbit;
/* TODO: Wrong cast. I don't want what is right, but this is
certainly wrong */
}
while ((size_t) expr + rrbit > (size_t) -q);
for (i = 0 ; i <= m ; i++)
mpz_clear (R[i]);
MPFR_TMP_FREE(marker);
mpz_clear (rr);
mpz_clear (t);
mpz_clear (tmp);
return l * (l + 4);
}