/* $NetBSD: mm.c,v 1.24 2019/03/09 08:42:25 maxv Exp $ */
/*
* Copyright (c) 2017 The NetBSD Foundation, Inc. All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Maxime Villard.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "prekern.h"
#define ELFROUND 64
static const uint8_t pads[4] = {
[BTSEG_NONE] = 0x00,
[BTSEG_TEXT] = 0xCC,
[BTSEG_RODATA] = 0x00,
[BTSEG_DATA] = 0x00
};
#define MM_PROT_READ 0x00
#define MM_PROT_WRITE 0x01
#define MM_PROT_EXECUTE 0x02
static const pt_entry_t protection_codes[3] = {
[MM_PROT_READ] = PTE_NX,
[MM_PROT_WRITE] = PTE_W | PTE_NX,
[MM_PROT_EXECUTE] = 0,
/* RWX does not exist */
};
struct bootspace bootspace;
extern paddr_t kernpa_start, kernpa_end;
vaddr_t iom_base;
paddr_t pa_avail = 0;
static const vaddr_t tmpva = (PREKERNBASE + NKL2_KIMG_ENTRIES * NBPD_L2);
void
mm_init(paddr_t first_pa)
{
pa_avail = first_pa;
}
static void
mm_enter_pa(paddr_t pa, vaddr_t va, pte_prot_t prot)
{
if (PTE_BASE[pl1_i(va)] & PTE_P) {
fatal("mm_enter_pa: mapping already present");
}
PTE_BASE[pl1_i(va)] = pa | PTE_P | protection_codes[prot];
}
static void
mm_reenter_pa(paddr_t pa, vaddr_t va, pte_prot_t prot)
{
PTE_BASE[pl1_i(va)] = pa | PTE_P | protection_codes[prot];
}
static void
mm_flush_va(vaddr_t va)
{
asm volatile("invlpg (%0)" ::"r" (va) : "memory");
}
static paddr_t
mm_palloc(size_t npages)
{
paddr_t pa;
size_t i;
/* Allocate the physical pages */
pa = pa_avail;
pa_avail += npages * PAGE_SIZE;
/* Zero them out */
for (i = 0; i < npages; i++) {
mm_reenter_pa(pa + i * PAGE_SIZE, tmpva,
MM_PROT_READ|MM_PROT_WRITE);
mm_flush_va(tmpva);
memset((void *)tmpva, 0, PAGE_SIZE);
}
return pa;
}
static bool
mm_pte_is_valid(pt_entry_t pte)
{
return ((pte & PTE_P) != 0);
}
static void
mm_mprotect(vaddr_t startva, size_t size, pte_prot_t prot)
{
size_t i, npages;
vaddr_t va;
paddr_t pa;
ASSERT(size % PAGE_SIZE == 0);
npages = size / PAGE_SIZE;
for (i = 0; i < npages; i++) {
va = startva + i * PAGE_SIZE;
pa = (PTE_BASE[pl1_i(va)] & PTE_FRAME);
mm_reenter_pa(pa, va, prot);
mm_flush_va(va);
}
}
void
mm_bootspace_mprotect(void)
{
pte_prot_t prot;
size_t i;
/* Remap the kernel segments with proper permissions. */
for (i = 0; i < BTSPACE_NSEGS; i++) {
if (bootspace.segs[i].type == BTSEG_TEXT) {
prot = MM_PROT_READ|MM_PROT_EXECUTE;
} else if (bootspace.segs[i].type == BTSEG_RODATA) {
prot = MM_PROT_READ;
} else {
continue;
}
mm_mprotect(bootspace.segs[i].va, bootspace.segs[i].sz, prot);
}
print_state(true, "Segments protection updated");
}
static size_t
mm_nentries_range(vaddr_t startva, vaddr_t endva, size_t pgsz)
{
size_t npages;
npages = roundup((endva / PAGE_SIZE), (pgsz / PAGE_SIZE)) -
rounddown((startva / PAGE_SIZE), (pgsz / PAGE_SIZE));
return (npages / (pgsz / PAGE_SIZE));
}
static void
mm_map_tree(vaddr_t startva, vaddr_t endva)
{
size_t i, nL4e, nL3e, nL2e;
size_t L4e_idx, L3e_idx, L2e_idx;
paddr_t pa;
/* Build L4. */
L4e_idx = pl4_i(startva);
nL4e = mm_nentries_range(startva, endva, NBPD_L4);
ASSERT(L4e_idx == 511);
ASSERT(nL4e == 1);
if (!mm_pte_is_valid(L4_BASE[L4e_idx])) {
pa = mm_palloc(1);
L4_BASE[L4e_idx] = pa | PTE_P | PTE_W;
}
/* Build L3. */
L3e_idx = pl3_i(startva);
nL3e = mm_nentries_range(startva, endva, NBPD_L3);
for (i = 0; i < nL3e; i++) {
if (mm_pte_is_valid(L3_BASE[L3e_idx+i])) {
continue;
}
pa = mm_palloc(1);
L3_BASE[L3e_idx+i] = pa | PTE_P | PTE_W;
}
/* Build L2. */
L2e_idx = pl2_i(startva);
nL2e = mm_nentries_range(startva, endva, NBPD_L2);
for (i = 0; i < nL2e; i++) {
if (mm_pte_is_valid(L2_BASE[L2e_idx+i])) {
continue;
}
pa = mm_palloc(1);
L2_BASE[L2e_idx+i] = pa | PTE_P | PTE_W;
}
}
static vaddr_t
mm_randva_kregion(size_t size, size_t pagesz)
{
vaddr_t sva, eva;
vaddr_t randva;
uint64_t rnd;
size_t i;
bool ok;
while (1) {
prng_get_rand(&rnd, sizeof(rnd));
randva = rounddown(KASLR_WINDOW_BASE +
rnd % (KASLR_WINDOW_SIZE - size), pagesz);
/* Detect collisions */
ok = true;
for (i = 0; i < BTSPACE_NSEGS; i++) {
if (bootspace.segs[i].type == BTSEG_NONE) {
continue;
}
sva = bootspace.segs[i].va;
eva = sva + bootspace.segs[i].sz;
if ((sva <= randva) && (randva < eva)) {
ok = false;
break;
}
if ((sva < randva + size) && (randva + size <= eva)) {
ok = false;
break;
}
if (randva < sva && eva < (randva + size)) {
ok = false;
break;
}
}
if (ok) {
break;
}
}
mm_map_tree(randva, randva + size);
return randva;
}
static paddr_t
bootspace_getend(void)
{
paddr_t pa, max = 0;
size_t i;
for (i = 0; i < BTSPACE_NSEGS; i++) {
if (bootspace.segs[i].type == BTSEG_NONE) {
continue;
}
pa = bootspace.segs[i].pa + bootspace.segs[i].sz;
if (pa > max)
max = pa;
}
return max;
}
static void
bootspace_addseg(int type, vaddr_t va, paddr_t pa, size_t sz)
{
size_t i;
for (i = 0; i < BTSPACE_NSEGS; i++) {
if (bootspace.segs[i].type == BTSEG_NONE) {
bootspace.segs[i].type = type;
bootspace.segs[i].va = va;
bootspace.segs[i].pa = pa;
bootspace.segs[i].sz = sz;
return;
}
}
fatal("bootspace_addseg: segments full");
}
static size_t
mm_shift_segment(vaddr_t va, size_t pagesz, size_t elfsz, size_t elfalign)
{
size_t shiftsize, offset;
uint64_t rnd;
if (elfalign == 0) {
elfalign = ELFROUND;
}
ASSERT(pagesz >= elfalign);
ASSERT(pagesz % elfalign == 0);
shiftsize = roundup(elfsz, pagesz) - roundup(elfsz, elfalign);
if (shiftsize == 0) {
return 0;
}
prng_get_rand(&rnd, sizeof(rnd));
offset = roundup(rnd % shiftsize, elfalign);
ASSERT((va + offset) % elfalign == 0);
memmove((void *)(va + offset), (void *)va, elfsz);
return offset;
}
static void
mm_map_head(void)
{
size_t i, npages, size;
uint64_t rnd;
vaddr_t randva;
/*
* To get the size of the head, we give a look at the read-only
* mapping of the kernel we created in locore. We're identity mapped,
* so kernpa = kernva.
*/
size = elf_get_head_size((vaddr_t)kernpa_start);
npages = size / PAGE_SIZE;
prng_get_rand(&rnd, sizeof(rnd));
randva = rounddown(HEAD_WINDOW_BASE + rnd % (HEAD_WINDOW_SIZE - size),
PAGE_SIZE);
mm_map_tree(randva, randva + size);
/* Enter the area and build the ELF info */
for (i = 0; i < npages; i++) {
mm_enter_pa(kernpa_start + i * PAGE_SIZE,
randva + i * PAGE_SIZE, MM_PROT_READ|MM_PROT_WRITE);
}
elf_build_head(randva);
/* Register the values in bootspace */
bootspace.head.va = randva;
bootspace.head.pa = kernpa_start;
bootspace.head.sz = size;
}
vaddr_t
mm_map_segment(int segtype, paddr_t pa, size_t elfsz, size_t elfalign)
{
size_t i, npages, size, pagesz, offset;
vaddr_t randva;
char pad;
if (elfsz <= PAGE_SIZE) {
pagesz = NBPD_L1;
} else {
pagesz = NBPD_L2;
}
size = roundup(elfsz, pagesz);
randva = mm_randva_kregion(size, pagesz);
npages = size / PAGE_SIZE;
for (i = 0; i < npages; i++) {
mm_enter_pa(pa + i * PAGE_SIZE,
randva + i * PAGE_SIZE, MM_PROT_READ|MM_PROT_WRITE);
}
offset = mm_shift_segment(randva, pagesz, elfsz, elfalign);
ASSERT(offset + elfsz <= size);
pad = pads[segtype];
memset((void *)randva, pad, offset);
memset((void *)(randva + offset + elfsz), pad, size - elfsz - offset);
bootspace_addseg(segtype, randva, pa, size);
return (randva + offset);
}
static void
mm_map_boot(void)
{
size_t i, npages, size;
vaddr_t randva;
paddr_t bootpa;
/*
* The "boot" region is special: its page tree has a fixed size, but
* the number of pages entered is lower.
*/
/* Create the page tree */
size = (NKL2_KIMG_ENTRIES + 1) * NBPD_L2;
randva = mm_randva_kregion(size, PAGE_SIZE);
/* Enter the area and build the ELF info */
bootpa = bootspace_getend();
size = (pa_avail - bootpa);
npages = size / PAGE_SIZE;
for (i = 0; i < npages; i++) {
mm_enter_pa(bootpa + i * PAGE_SIZE,
randva + i * PAGE_SIZE, MM_PROT_READ|MM_PROT_WRITE);
}
elf_build_boot(randva, bootpa);
/* Enter the ISA I/O MEM */
iom_base = randva + npages * PAGE_SIZE;
npages = IOM_SIZE / PAGE_SIZE;
for (i = 0; i < npages; i++) {
mm_enter_pa(IOM_BEGIN + i * PAGE_SIZE,
iom_base + i * PAGE_SIZE, MM_PROT_READ|MM_PROT_WRITE);
}
/* Register the values in bootspace */
bootspace.boot.va = randva;
bootspace.boot.pa = bootpa;
bootspace.boot.sz = (size_t)(iom_base + IOM_SIZE) -
(size_t)bootspace.boot.va;
/* Initialize the values that are located in the "boot" region */
extern uint64_t PDPpaddr;
bootspace.spareva = bootspace.boot.va + NKL2_KIMG_ENTRIES * NBPD_L2;
bootspace.pdir = bootspace.boot.va + (PDPpaddr - bootspace.boot.pa);
bootspace.smodule = (vaddr_t)iom_base + IOM_SIZE;
bootspace.emodule = bootspace.boot.va + NKL2_KIMG_ENTRIES * NBPD_L2;
}
/*
* There is a variable number of independent regions: one head, several kernel
* segments, one boot. They are all mapped at random VAs.
*
* Head contains the ELF Header and ELF Section Headers, and we use them to
* map the rest of the regions. Head must be placed in memory *before* the
* other regions.
*
* At the end of this function, the bootspace structure is fully constructed.
*/
void
mm_map_kernel(void)
{
memset(&bootspace, 0, sizeof(bootspace));
mm_map_head();
print_state(true, "Head region mapped");
elf_map_sections();
print_state(true, "Segments mapped");
mm_map_boot();
print_state(true, "Boot region mapped");
}