/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2019 Conrad Meyer <cem@FreeBSD.org>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/domainset.h>
#include <sys/fail.h>
#include <sys/limits.h>
#include <sys/lock.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/mutex.h>
#include <sys/queue.h>
#include <sys/random.h>
#include <sys/sdt.h>
#include <sys/sysctl.h>
#include <sys/systm.h>
#include <sys/taskqueue.h>
#include <machine/atomic.h>
#include <machine/smp.h>
#include <dev/random/randomdev.h>
#include <dev/random/random_harvestq.h>
#include <dev/random/fenestrasX/fx_brng.h>
#include <dev/random/fenestrasX/fx_hash.h>
#include <dev/random/fenestrasX/fx_pool.h>
#include <dev/random/fenestrasX/fx_priv.h>
#include <dev/random/fenestrasX/fx_pub.h>
/*
* Timer-based reseed interval growth factor and limit in seconds. (§ 3.2)
*/
#define FXENT_RESSED_INTVL_GFACT 3
#define FXENT_RESEED_INTVL_MAX 3600
/*
* Pool reseed schedule. Initially, only pool 0 is active. Until the timer
* interval reaches INTVL_MAX, only pool 0 is used.
*
* After reaching INTVL_MAX, pool k is either activated (if inactive) or used
* (if active) every 3^k timer reseeds. (§ 3.3)
*
* (Entropy harvesting only round robins across active pools.)
*/
#define FXENT_RESEED_BASE 3
/*
* Number of bytes from high quality sources to allocate to pool 0 before
* normal round-robin allocation after each timer reseed. (§ 3.4)
*/
#define FXENT_HI_SRC_POOL0_BYTES 32
/*
* § 3.1
*
* Low sources provide unconditioned entropy, such as mouse movements; high
* sources are assumed to provide high-quality random bytes. Pull sources are
* those which can be polled, i.e., anything randomdev calls a "random_source."
*
* In the whitepaper, low sources are pull. For us, at least in the existing
* design, low-quality sources push into some global ring buffer and then get
* forwarded into the RNG by a thread that continually polls. Presumably their
* design batches low entopy signals in some way (SHA512?) and only requests
* them dynamically on reseed. I'm not sure what the benefit is vs feeding
* into the pools directly.
*/
enum fxrng_ent_access_cls {
FXRNG_PUSH,
FXRNG_PULL,
};
enum fxrng_ent_source_cls {
FXRNG_HI,
FXRNG_LO,
FXRNG_GARBAGE,
};
struct fxrng_ent_cls {
enum fxrng_ent_access_cls entc_axx_cls;
enum fxrng_ent_source_cls entc_src_cls;
};
static const struct fxrng_ent_cls fxrng_hi_pull = {
.entc_axx_cls = FXRNG_PULL,
.entc_src_cls = FXRNG_HI,
};
static const struct fxrng_ent_cls fxrng_hi_push = {
.entc_axx_cls = FXRNG_PUSH,
.entc_src_cls = FXRNG_HI,
};
static const struct fxrng_ent_cls fxrng_lo_push = {
.entc_axx_cls = FXRNG_PUSH,
.entc_src_cls = FXRNG_LO,
};
static const struct fxrng_ent_cls fxrng_garbage = {
.entc_axx_cls = FXRNG_PUSH,
.entc_src_cls = FXRNG_GARBAGE,
};
/*
* This table is a mapping of randomdev's current source abstractions to the
* designations above; at some point, if the design seems reasonable, it would
* make more sense to pull this up into the abstraction layer instead.
*/
static const struct fxrng_ent_char {
const struct fxrng_ent_cls *entc_cls;
} fxrng_ent_char[ENTROPYSOURCE] = {
[RANDOM_CACHED] = {
.entc_cls = &fxrng_hi_push,
},
[RANDOM_ATTACH] = {
.entc_cls = &fxrng_lo_push,
},
[RANDOM_KEYBOARD] = {
.entc_cls = &fxrng_lo_push,
},
[RANDOM_MOUSE] = {
.entc_cls = &fxrng_lo_push,
},
[RANDOM_NET_TUN] = {
.entc_cls = &fxrng_lo_push,
},
[RANDOM_NET_ETHER] = {
.entc_cls = &fxrng_lo_push,
},
[RANDOM_NET_NG] = {
.entc_cls = &fxrng_lo_push,
},
[RANDOM_INTERRUPT] = {
.entc_cls = &fxrng_lo_push,
},
[RANDOM_SWI] = {
.entc_cls = &fxrng_lo_push,
},
[RANDOM_FS_ATIME] = {
.entc_cls = &fxrng_lo_push,
},
[RANDOM_UMA] = {
.entc_cls = &fxrng_lo_push,
},
[RANDOM_PURE_OCTEON] = {
.entc_cls = &fxrng_hi_push, /* Could be made pull. */
},
[RANDOM_PURE_SAFE] = {
.entc_cls = &fxrng_hi_push,
},
[RANDOM_PURE_GLXSB] = {
.entc_cls = &fxrng_hi_push,
},
[RANDOM_PURE_HIFN] = {
.entc_cls = &fxrng_hi_push,
},
[RANDOM_PURE_RDRAND] = {
.entc_cls = &fxrng_hi_pull,
},
[RANDOM_PURE_NEHEMIAH] = {
.entc_cls = &fxrng_hi_pull,
},
[RANDOM_PURE_RNDTEST] = {
.entc_cls = &fxrng_garbage,
},
[RANDOM_PURE_VIRTIO] = {
.entc_cls = &fxrng_hi_pull,
},
[RANDOM_PURE_BROADCOM] = {
.entc_cls = &fxrng_hi_push,
},
[RANDOM_PURE_CCP] = {
.entc_cls = &fxrng_hi_pull,
},
[RANDOM_PURE_DARN] = {
.entc_cls = &fxrng_hi_pull,
},
[RANDOM_PURE_TPM] = {
.entc_cls = &fxrng_hi_push,
},
[RANDOM_PURE_VMGENID] = {
.entc_cls = &fxrng_hi_push,
},
};
/* Useful for single-bit-per-source state. */
BITSET_DEFINE(fxrng_bits, ENTROPYSOURCE);
/* XXX Borrowed from not-yet-committed D22702. */
#ifndef BIT_TEST_SET_ATOMIC_ACQ
#define BIT_TEST_SET_ATOMIC_ACQ(_s, n, p) \
(atomic_testandset_acq_long( \
&(p)->__bits[__bitset_word((_s), (n))], (n)) != 0)
#endif
#define FXENT_TEST_SET_ATOMIC_ACQ(n, p) \
BIT_TEST_SET_ATOMIC_ACQ(ENTROPYSOURCE, n, p)
/* For special behavior on first-time entropy sources. (§ 3.1) */
static struct fxrng_bits __read_mostly fxrng_seen;
/* For special behavior for high-entropy sources after a reseed. (§ 3.4) */
_Static_assert(FXENT_HI_SRC_POOL0_BYTES <= UINT8_MAX, "");
static uint8_t __read_mostly fxrng_reseed_seen[ENTROPYSOURCE];
/* Entropy pools. Lock order is ENT -> RNG(root) -> RNG(leaf). */
static struct mtx fxent_pool_lk;
MTX_SYSINIT(fx_pool, &fxent_pool_lk, "fx entropy pool lock", MTX_DEF);
#define FXENT_LOCK() mtx_lock(&fxent_pool_lk)
#define FXENT_UNLOCK() mtx_unlock(&fxent_pool_lk)
#define FXENT_ASSERT(rng) mtx_assert(&fxent_pool_lk, MA_OWNED)
#define FXENT_ASSERT_NOT(rng) mtx_assert(&fxent_pool_lk, MA_NOTOWNED)
static struct fxrng_hash fxent_pool[FXRNG_NPOOLS];
static unsigned __read_mostly fxent_nactpools = 1;
static struct timeout_task fxent_reseed_timer;
static int __read_mostly fxent_timer_ready;
/*
* Track number of bytes of entropy harvested from high-quality sources prior
* to initial keying. The idea is to collect more jitter entropy when fewer
* high-quality bytes were available and less if we had other good sources. We
* want to provide always-on availability but don't necessarily have *any*
* great sources on some platforms.
*
* Like fxrng_ent_char: at some point, if the design seems reasonable, it would
* make more sense to pull this up into the abstraction layer instead.
*
* Jitter entropy is unimplemented for now.
*/
static unsigned long fxrng_preseed_ent;
void
fxrng_pools_init(void)
{
size_t i;
for (i = 0; i < nitems(fxent_pool); i++)
fxrng_hash_init(&fxent_pool[i]);
}
static inline bool
fxrng_hi_source(enum random_entropy_source src)
{
return (fxrng_ent_char[src].entc_cls->entc_src_cls == FXRNG_HI);
}
/*
* A racy check that this high-entropy source's event should contribute to
* pool0 on the basis of per-source byte count. The check is racy for two
* reasons:
* - Performance: The vast majority of the time, we've already taken 32 bytes
* from any present high quality source and the racy check lets us avoid
* dirtying the cache for the global array.
* - Correctness: It's fine that the check is racy. The failure modes are:
* • False positive: We will detect when we take the lock.
* • False negative: We still collect the entropy; it just won't be
* preferentially placed in pool0 in this case.
*/
static inline bool
fxrng_hi_pool0_eligible_racy(enum random_entropy_source src)
{
return (atomic_load_acq_8(&fxrng_reseed_seen[src]) <
FXENT_HI_SRC_POOL0_BYTES);
}
/*
* Top level entropy processing API from randomdev.
*
* Invoked by the core randomdev subsystem both for preload entropy, "push"
* sources (like interrupts, keyboard, etc) and pull sources (RDRAND, etc).
*/
void
fxrng_event_processor(struct harvest_event *event)
{
enum random_entropy_source src;
unsigned pool;
bool first_time, first_32;
src = event->he_source;
ASSERT_DEBUG(event->he_size <= sizeof(event->he_entropy),
"%s: he_size: %u > sizeof(he_entropy): %zu", __func__,
(unsigned)event->he_size, sizeof(event->he_entropy));
/*
* Zero bytes of source entropy doesn't count as observing this source
* for the first time. We still harvest the counter entropy.
*/
first_time = event->he_size > 0 &&
!FXENT_TEST_SET_ATOMIC_ACQ(src, &fxrng_seen);
if (__predict_false(first_time)) {
/*
* "The first time [any source] provides entropy, it is used to
* directly reseed the root PRNG. The entropy pools are
* bypassed." (§ 3.1)
*
* Unlike Windows, we cannot rely on loader(8) seed material
* being present, so we perform initial keying in the kernel.
* We use brng_generation 0 to represent an unkeyed state.
*
* Prior to initial keying, it doesn't make sense to try to mix
* the entropy directly with the root PRNG state, as the root
* PRNG is unkeyed. Instead, we collect pre-keying dynamic
* entropy in pool0 and do not bump the root PRNG seed version
* or set its key. Initial keying will incorporate pool0 and
* bump the brng_generation (seed version).
*
* After initial keying, we do directly mix in first-time
* entropy sources. We use the root BRNG to generate 32 bytes
* and use fxrng_hash to mix it with the new entropy source and
* re-key with the first 256 bits of hash output.
*/
FXENT_LOCK();
FXRNG_BRNG_LOCK(&fxrng_root);
if (__predict_true(fxrng_root.brng_generation > 0)) {
/* Bypass the pools: */
FXENT_UNLOCK();
fxrng_brng_src_reseed(event);
FXRNG_BRNG_ASSERT_NOT(&fxrng_root);
return;
}
/*
* Keying the root PRNG requires both FXENT_LOCK and the PRNG's
* lock, so we only need to hold on to the pool lock to prevent
* initial keying without this entropy.
*/
FXRNG_BRNG_UNLOCK(&fxrng_root);
/* Root PRNG hasn't been keyed yet, just accumulate event. */
fxrng_hash_update(&fxent_pool[0], &event->he_somecounter,
sizeof(event->he_somecounter));
fxrng_hash_update(&fxent_pool[0], event->he_entropy,
event->he_size);
if (fxrng_hi_source(src)) {
/* Prevent overflow. */
if (fxrng_preseed_ent <= ULONG_MAX - event->he_size)
fxrng_preseed_ent += event->he_size;
}
FXENT_UNLOCK();
return;
}
/* !first_time */
/*
* "The first 32 bytes produced by a high entropy source after a reseed
* from the pools is always put in pool 0." (§ 3.4)
*
* The first-32-byte tracking data in fxrng_reseed_seen is reset in
* fxent_timer_reseed_npools() below.
*/
first_32 = event->he_size > 0 &&
fxrng_hi_source(src) &&
atomic_load_acq_int(&fxent_nactpools) > 1 &&
fxrng_hi_pool0_eligible_racy(src);
if (__predict_false(first_32)) {
unsigned rem, seen;
FXENT_LOCK();
seen = fxrng_reseed_seen[src];
if (seen == FXENT_HI_SRC_POOL0_BYTES)
goto round_robin;
rem = FXENT_HI_SRC_POOL0_BYTES - seen;
rem = MIN(rem, event->he_size);
fxrng_reseed_seen[src] = seen + rem;
/*
* We put 'rem' bytes in pool0, and any remaining bytes are
* round-robin'd across other pools.
*/
fxrng_hash_update(&fxent_pool[0],
((uint8_t *)event->he_entropy) + event->he_size - rem,
rem);
if (rem == event->he_size) {
fxrng_hash_update(&fxent_pool[0], &event->he_somecounter,
sizeof(event->he_somecounter));
FXENT_UNLOCK();
return;
}
/*
* If fewer bytes were needed than this even provied, We only
* take the last rem bytes of the entropy buffer and leave the
* timecounter to be round-robin'd with the remaining entropy.
*/
event->he_size -= rem;
goto round_robin;
}
/* !first_32 */
FXENT_LOCK();
round_robin:
FXENT_ASSERT();
pool = event->he_destination % fxent_nactpools;
fxrng_hash_update(&fxent_pool[pool], event->he_entropy,
event->he_size);
fxrng_hash_update(&fxent_pool[pool], &event->he_somecounter,
sizeof(event->he_somecounter));
if (__predict_false(fxrng_hi_source(src) &&
atomic_load_acq_64(&fxrng_root_generation) == 0)) {
/* Prevent overflow. */
if (fxrng_preseed_ent <= ULONG_MAX - event->he_size)
fxrng_preseed_ent += event->he_size;
}
FXENT_UNLOCK();
}
/*
* Top level "seeded" API/signal from randomdev.
*
* This is our warning that a request is coming: we need to be seeded. In
* fenestrasX, a request for random bytes _never_ fails. "We (ed: ditto) have
* observed that there are many callers that never check for the error code,
* even if they are generating cryptographic key material." (§ 1.6)
*
* If we returned 'false', both read_random(9) and chacha20_randomstir()
* (arc4random(9)) will blindly charge on with something almost certainly worse
* than what we've got, or are able to get quickly enough.
*/
bool
fxrng_alg_seeded(void)
{
uint8_t hash[FXRNG_HASH_SZ];
sbintime_t sbt;
/* The vast majority of the time, we expect to already be seeded. */
if (__predict_true(atomic_load_acq_64(&fxrng_root_generation) != 0))
return (true);
/*
* Take the lock and recheck; only one thread needs to do the initial
* seeding work.
*/
FXENT_LOCK();
if (atomic_load_acq_64(&fxrng_root_generation) != 0) {
FXENT_UNLOCK();
return (true);
}
/* XXX Any one-off initial seeding goes here. */
fxrng_hash_finish(&fxent_pool[0], hash, sizeof(hash));
fxrng_hash_init(&fxent_pool[0]);
fxrng_brng_reseed(hash, sizeof(hash));
FXENT_UNLOCK();
randomdev_unblock();
explicit_bzero(hash, sizeof(hash));
/*
* This may be called too early for taskqueue_thread to be initialized.
* fxent_pool_timer_init will detect if we've already unblocked and
* queue the first timer reseed at that point.
*/
if (atomic_load_acq_int(&fxent_timer_ready) != 0) {
sbt = SBT_1S;
taskqueue_enqueue_timeout_sbt(taskqueue_thread,
&fxent_reseed_timer, -sbt, (sbt / 3), C_PREL(2));
}
return (true);
}
/*
* Timer-based reseeds and pool expansion.
*/
static void
fxent_timer_reseed_npools(unsigned n)
{
/*
* 64 * 8 => moderately large 512 bytes. Could be static, as we are
* only used in a static context. On the other hand, this is in
* threadqueue TASK context and we're likely nearly at top of stack
* already.
*/
uint8_t hash[FXRNG_HASH_SZ * FXRNG_NPOOLS];
unsigned i;
ASSERT_DEBUG(n > 0 && n <= FXRNG_NPOOLS, "n:%u", n);
FXENT_ASSERT();
/*
* Collect entropy from pools 0..n-1 by concatenating the output hashes
* and then feeding them into fxrng_brng_reseed, which will hash the
* aggregate together with the current root PRNG keystate to produce a
* new key. It will also bump the global generation counter
* appropriately.
*/
for (i = 0; i < n; i++) {
fxrng_hash_finish(&fxent_pool[i], hash + i * FXRNG_HASH_SZ,
FXRNG_HASH_SZ);
fxrng_hash_init(&fxent_pool[i]);
}
fxrng_brng_reseed(hash, n * FXRNG_HASH_SZ);
explicit_bzero(hash, n * FXRNG_HASH_SZ);
/*
* "The first 32 bytes produced by a high entropy source after a reseed
* from the pools is always put in pool 0." (§ 3.4)
*
* So here we reset the tracking (somewhat naively given the majority
* of sources on most machines are not what we consider "high", but at
* 32 bytes it's smaller than a cache line), so the next 32 bytes are
* prioritized into pool0.
*
* See corresponding use of fxrng_reseed_seen in fxrng_event_processor.
*/
memset(fxrng_reseed_seen, 0, sizeof(fxrng_reseed_seen));
FXENT_ASSERT();
}
static void
fxent_timer_reseed(void *ctx __unused, int pending __unused)
{
static unsigned reseed_intvl_sec = 1;
/* Only reseeds after FXENT_RESEED_INTVL_MAX is achieved. */
static uint64_t reseed_number = 1;
unsigned next_ival, i, k;
sbintime_t sbt;
if (reseed_intvl_sec < FXENT_RESEED_INTVL_MAX) {
next_ival = FXENT_RESSED_INTVL_GFACT * reseed_intvl_sec;
if (next_ival > FXENT_RESEED_INTVL_MAX)
next_ival = FXENT_RESEED_INTVL_MAX;
FXENT_LOCK();
fxent_timer_reseed_npools(1);
FXENT_UNLOCK();
} else {
/*
* The creation of entropy pools beyond 0 is enabled when the
* reseed interval hits the maximum. (§ 3.3)
*/
next_ival = reseed_intvl_sec;
/*
* Pool 0 is used every reseed; pool 1..0 every 3rd reseed; and in
* general, pool n..0 every 3^n reseeds.
*/
k = reseed_number;
reseed_number++;
/* Count how many pools, from [0, i), to use for reseed. */
for (i = 1; i < MIN(fxent_nactpools + 1, FXRNG_NPOOLS); i++) {
if ((k % FXENT_RESEED_BASE) != 0)
break;
k /= FXENT_RESEED_BASE;
}
/*
* If we haven't activated pool i yet, activate it and only
* reseed from [0, i-1). (§ 3.3)
*/
FXENT_LOCK();
if (i == fxent_nactpools + 1) {
fxent_timer_reseed_npools(fxent_nactpools);
fxent_nactpools++;
} else {
/* Just reseed from [0, i). */
fxent_timer_reseed_npools(i);
}
FXENT_UNLOCK();
}
/* Schedule the next reseed. */
sbt = next_ival * SBT_1S;
taskqueue_enqueue_timeout_sbt(taskqueue_thread, &fxent_reseed_timer,
-sbt, (sbt / 3), C_PREL(2));
reseed_intvl_sec = next_ival;
}
static void
fxent_pool_timer_init(void *dummy __unused)
{
sbintime_t sbt;
TIMEOUT_TASK_INIT(taskqueue_thread, &fxent_reseed_timer, 0,
fxent_timer_reseed, NULL);
if (atomic_load_acq_64(&fxrng_root_generation) != 0) {
sbt = SBT_1S;
taskqueue_enqueue_timeout_sbt(taskqueue_thread,
&fxent_reseed_timer, -sbt, (sbt / 3), C_PREL(2));
}
atomic_store_rel_int(&fxent_timer_ready, 1);
}
/* After taskqueue_thread is initialized in SI_SUB_TASKQ:SI_ORDER_SECOND. */
SYSINIT(fxent_pool_timer_init, SI_SUB_TASKQ, SI_ORDER_ANY,
fxent_pool_timer_init, NULL);