/* mpfr_log_ui -- compute natural logarithm of an unsigned long
Copyright 2014-2018 Free Software Foundation, Inc.
Contributed by the AriC and Caramba projects, INRIA.
This file is part of the GNU MPFR Library.
The GNU MPFR Library is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 3 of the License, or (at your
option) any later version.
The GNU MPFR Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
License for more details.
You should have received a copy of the GNU Lesser General Public License
along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see
http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
#define MPFR_NEED_LONGLONG_H
#include "mpfr-impl.h"
/* FIXME: mpfr_log_ui is much slower than mpfr_log on some values of n,
e.g. about 4 times as slow for n around ULONG_MAX/3 on an
x86_64 Linux machine, for 10^6 bits of precision. The reason is that
for say n=6148914691236517205 and prec=10^6, the value of T computed
has more than 50M bits, which is much more than needed. Indeed the
binary splitting algorithm for series with a finite radius of convergence
gives rationals of size n*log(n) for a target precision n. One might
truncate the rationals inside the algorithm, but then the error analysis
should be redone. */
/* Cf http://www.ginac.de/CLN/binsplit.pdf: the Taylor series of log(1+x)
up to order N for x=p/2^k is T/(B*Q).
P[0] <- (-p)^(n2-n1) [with opposite sign when n1=1]
q <- k*(n2-n1) [corresponding to Q[0] = 2^q]
B[0] <- n1 * (n1+1) * ... * (n2-1)
T[0] <- B[0]*Q[0] * S(n1,n2)
where S(n1,n2) = -sum((-x)^(i-n1+1)/i, i=n1..n2-1)
Assumes p is odd or zero, and -1/3 <= x = p/2^k <= 1/3.
*/
static void
S (mpz_t *P, unsigned long *q, mpz_t *B, mpz_t *T, unsigned long n1,
unsigned long n2, long p, unsigned long k, int need_P)
{
MPFR_ASSERTD (n1 < n2);
MPFR_ASSERTD (p == 0 || ((unsigned long) p & 1) != 0);
if (n2 == n1 + 1)
{
mpz_set_si (P[0], (n1 == 1) ? p : -p);
*q = k;
mpz_set_ui (B[0], n1);
/* T = B*Q*S where S = P/(B*Q) thus T = P */
mpz_set (T[0], P[0]);
/* since p is odd (or zero), there is no common factor 2 between
P and Q, or T and B */
}
else
{
unsigned long m = (n1 / 2) + (n2 / 2) + (n1 & 1UL & n2), q1;
/* m = floor((n1+n2)/2) */
MPFR_ASSERTD (n1 < m && m < n2);
S (P, q, B, T, n1, m, p, k, 1);
S (P + 1, &q1, B + 1, T + 1, m, n2, p, k, need_P);
/* T0 <- T0*B1*Q1 + P0*B0*T1 */
mpz_mul (T[1], T[1], P[0]);
mpz_mul (T[1], T[1], B[0]);
mpz_mul (T[0], T[0], B[1]);
/* Q[1] = 2^q1 */
mpz_mul_2exp (T[0], T[0], q1); /* mpz_mul (T[0], T[0], Q[1]) */
mpz_add (T[0], T[0], T[1]);
if (need_P)
mpz_mul (P[0], P[0], P[1]);
*q += q1; /* mpz_mul (Q[0], Q[0], Q[1]) */
mpz_mul (B[0], B[0], B[1]);
/* there should be no common factors 2 between P, Q and T,
since P is odd (or zero) */
}
}
int
mpfr_log_ui (mpfr_ptr x, unsigned long n, mpfr_rnd_t rnd_mode)
{
unsigned long k;
mpfr_prec_t w; /* working precision */
mpz_t three_n, *P, *B, *T;
mpfr_t t, q;
int inexact;
unsigned long N, lgN, i, kk;
long p;
MPFR_GROUP_DECL(group);
MPFR_TMP_DECL(marker);
MPFR_ZIV_DECL(loop);
MPFR_SAVE_EXPO_DECL (expo);
if (n <= 2)
{
if (n == 0)
{
MPFR_SET_INF (x);
MPFR_SET_NEG (x);
MPFR_SET_DIVBY0 ();
MPFR_RET (0); /* log(0) is an exact -infinity */
}
else if (n == 1)
{
MPFR_SET_ZERO (x);
MPFR_SET_POS (x);
MPFR_RET (0); /* only "normal" case where the result is exact */
}
/* now n=2 */
return mpfr_const_log2 (x, rnd_mode);
}
/* here n >= 3 */
/* Argument reduction: compute k such that 2/3 <= n/2^k < 4/3,
i.e., 2^(k+1) <= 3n < 2^(k+2).
FIXME: we could do better by considering n/(2^k*3^i*5^j),
which reduces the maximal distance to 1 from 1/3 to 1/8,
thus needing about 1.89 less terms in the Taylor expansion of
the reduced argument. Then log(2^k*3^i*5^j) can be computed
using a combination of log(16/15), log(25/24) and log(81/80),
see Section 6.5 of "A Fortran Multiple-Precision Arithmetic Package",
Richard P. Brent, ACM Transactions on Mathematical Software, 1978. */
mpz_init_set_ui (three_n, n);
mpz_mul_ui (three_n, three_n, 3);
k = mpz_sizeinbase (three_n, 2) - 2;
MPFR_ASSERTD (k >= 2);
mpz_clear (three_n);
/* The reduced argument is n/2^k - 1 = (n-2^k)/2^k.
Compute p = n-2^k. One has: |p| = |n-2^k| < 2^k/3 < n/2 <= LONG_MAX,
so that p and -p both fit in a long. */
if (k < sizeof (unsigned long) * CHAR_BIT)
n -= 1UL << k;
/* n is now the value of p mod ULONG_MAX+1 */
p = n > LONG_MAX ? - (long) - n : (long) n;
MPFR_TMP_MARK(marker);
w = MPFR_PREC(x) + MPFR_INT_CEIL_LOG2 (MPFR_PREC(x)) + 10;
MPFR_GROUP_INIT_2(group, w, t, q);
MPFR_SAVE_EXPO_MARK (expo);
kk = k;
if (p != 0)
while ((p % 2) == 0) /* replace p/2^kk by (p/2)/2^(kk-1) */
{
p /= 2;
kk --;
}
MPFR_ZIV_INIT (loop, w);
for (;;)
{
mpfr_t tmp;
unsigned int err;
unsigned long q0;
/* we need at most w/log2(2^kk/|p|) terms for an accuracy of w bits */
mpfr_init2 (tmp, 32);
mpfr_set_ui (tmp, (p > 0) ? p : -p, MPFR_RNDU);
mpfr_log2 (tmp, tmp, MPFR_RNDU);
mpfr_ui_sub (tmp, kk, tmp, MPFR_RNDD);
MPFR_ASSERTN (w <= ULONG_MAX);
mpfr_ui_div (tmp, w, tmp, MPFR_RNDU);
N = mpfr_get_ui (tmp, MPFR_RNDU);
if (N < 2)
N = 2;
lgN = MPFR_INT_CEIL_LOG2 (N) + 1;
mpfr_clear (tmp);
P = (mpz_t *) MPFR_TMP_ALLOC (3 * lgN * sizeof (mpz_t));
B = P + lgN;
T = B + lgN;
for (i = 0; i < lgN; i++)
{
mpz_init (P[i]);
mpz_init (B[i]);
mpz_init (T[i]);
}
S (P, &q0, B, T, 1, N, p, kk, 0);
/* mpz_mul (Q[0], B[0], Q[0]); */
/* mpz_mul_2exp (B[0], B[0], q0); */
mpfr_set_z (t, T[0], MPFR_RNDN); /* t = P[0] * (1 + theta_1) */
mpfr_set_z (q, B[0], MPFR_RNDN); /* q = B[0] * (1 + theta_2) */
mpfr_mul_2exp (q, q, q0, MPFR_RNDN); /* B[0]*Q[0] */
mpfr_div (t, t, q, MPFR_RNDN); /* t = T[0]/(B[0]*Q[0])*(1 + theta_3)^3
= log(n/2^k) * (1 + theta_4)^4
for |theta_i| < 2^(-w) */
/* argument reconstruction: add k*log(2) */
mpfr_const_log2 (q, MPFR_RNDN);
mpfr_mul_ui (q, q, k, MPFR_RNDN);
mpfr_add (t, t, q, MPFR_RNDN);
for (i = 0; i < lgN; i++)
{
mpz_clear (P[i]);
mpz_clear (B[i]);
mpz_clear (T[i]);
}
/* The maximal error is 5 ulps for P/Q, since |(1+/-u)^4 - 1| < 5*u
for u < 2^(-12), k ulps for k*log(2), and 1 ulp for the addition,
thus at most k+6 ulps.
Note that there might be some cancellation in the addition: the worst
case is when log(1 + p/2^kk) = log(2/3) ~ -0.405, and with n=3 which
gives k=2, thus we add 2*log(2) = 1.386. Thus in the worst case we
have an exponent decrease of 1, which accounts for +1 in the error. */
err = MPFR_INT_CEIL_LOG2 (k + 6) + 1;
if (MPFR_LIKELY (MPFR_CAN_ROUND (t, w - err, MPFR_PREC(x), rnd_mode)))
break;
MPFR_ZIV_NEXT (loop, w);
MPFR_GROUP_REPREC_2(group, w, t, q);
}
MPFR_ZIV_FREE (loop);
inexact = mpfr_set (x, t, rnd_mode);
MPFR_GROUP_CLEAR(group);
MPFR_TMP_FREE(marker);
MPFR_SAVE_EXPO_FREE (expo);
return mpfr_check_range (x, inexact, rnd_mode);
}