/* $NetBSD: linux_machdep.c,v 1.169 2021/11/01 05:07:16 thorpej Exp $ */
/*-
* Copyright (c) 1995, 2000, 2008, 2009 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Frank van der Linden, and by Andrew Doran.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.169 2021/11/01 05:07:16 thorpej Exp $");
#if defined(_KERNEL_OPT)
#include "opt_user_ldt.h"
#endif
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/signalvar.h>
#include <sys/kernel.h>
#include <sys/proc.h>
#include <sys/buf.h>
#include <sys/reboot.h>
#include <sys/conf.h>
#include <sys/exec.h>
#include <sys/file.h>
#include <sys/callout.h>
#include <sys/mbuf.h>
#include <sys/msgbuf.h>
#include <sys/mount.h>
#include <sys/vnode.h>
#include <sys/device.h>
#include <sys/syscallargs.h>
#include <sys/filedesc.h>
#include <sys/exec_elf.h>
#include <sys/disklabel.h>
#include <sys/ioctl.h>
#include <sys/wait.h>
#include <sys/kauth.h>
#include <sys/kmem.h>
#include <miscfs/specfs/specdev.h>
#include <compat/linux/common/linux_types.h>
#include <compat/linux/common/linux_signal.h>
#include <compat/linux/common/linux_util.h>
#include <compat/linux/common/linux_ioctl.h>
#include <compat/linux/common/linux_hdio.h>
#include <compat/linux/common/linux_exec.h>
#include <compat/linux/common/linux_machdep.h>
#include <compat/linux/common/linux_errno.h>
#include <compat/linux/linux_syscallargs.h>
#include <sys/cpu.h>
#include <machine/cpufunc.h>
#include <machine/psl.h>
#include <machine/reg.h>
#include <machine/segments.h>
#include <machine/specialreg.h>
#include <machine/sysarch.h>
#include <machine/vmparam.h>
#include <x86/fpu.h>
/*
* To see whether wscons is configured (for virtual console ioctl calls).
*/
#if defined(_KERNEL_OPT)
#include "wsdisplay.h"
#endif
#if (NWSDISPLAY > 0)
#include <dev/wscons/wsconsio.h>
#include <dev/wscons/wsdisplay_usl_io.h>
#if defined(_KERNEL_OPT)
#include "opt_xserver.h"
#endif
#endif
#ifdef DEBUG_LINUX
#define DPRINTF(a) uprintf a
#else
#define DPRINTF(a)
#endif
extern struct disklist *x86_alldisks;
static struct biosdisk_info *fd2biosinfo(struct proc *, struct file *);
static void linux_save_ucontext(struct lwp *, struct trapframe *,
const sigset_t *, stack_t *, struct linux_ucontext *);
static void linux_save_sigcontext(struct lwp *, struct trapframe *,
const sigset_t *, struct linux_sigcontext *);
static int linux_restore_sigcontext(struct lwp *,
struct linux_sigcontext *, register_t *);
static void linux_rt_sendsig(const ksiginfo_t *, const sigset_t *);
static void linux_old_sendsig(const ksiginfo_t *, const sigset_t *);
extern char linux_sigcode[], linux_rt_sigcode[];
/*
* Deal with some i386-specific things in the Linux emulation code.
*/
void
linux_setregs(struct lwp *l, struct exec_package *epp, vaddr_t stack)
{
struct trapframe *tf;
#ifdef USER_LDT
pmap_ldt_cleanup(l);
#endif
fpu_clear(l, __Linux_NPXCW__);
tf = l->l_md.md_regs;
tf->tf_gs = 0;
tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
tf->tf_edi = 0;
tf->tf_esi = 0;
tf->tf_ebp = 0;
tf->tf_ebx = l->l_proc->p_psstrp;
tf->tf_edx = 0;
tf->tf_ecx = 0;
tf->tf_eax = 0;
tf->tf_eip = epp->ep_entry;
tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
tf->tf_eflags = PSL_USERSET;
tf->tf_esp = stack;
tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
}
/*
* Send an interrupt to process.
*
* Stack is set up to allow sigcode stored
* in u. to call routine, followed by kcall
* to sigreturn routine below. After sigreturn
* resets the signal mask, the stack, and the
* frame pointer, it returns to the user
* specified pc, psl.
*/
void
linux_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
{
if (SIGACTION(curproc, ksi->ksi_signo).sa_flags & SA_SIGINFO)
linux_rt_sendsig(ksi, mask);
else
linux_old_sendsig(ksi, mask);
}
static void
linux_save_ucontext(struct lwp *l, struct trapframe *tf, const sigset_t *mask,
stack_t *sas, struct linux_ucontext *uc)
{
uc->uc_flags = 0;
uc->uc_link = NULL;
native_to_linux_sigaltstack(&uc->uc_stack, sas);
linux_save_sigcontext(l, tf, mask, &uc->uc_mcontext);
native_to_linux_sigset(&uc->uc_sigmask, mask);
(void)memset(&uc->uc_fpregs_mem, 0, sizeof(uc->uc_fpregs_mem));
}
static void
linux_save_sigcontext(struct lwp *l, struct trapframe *tf,
const sigset_t *mask, struct linux_sigcontext *sc)
{
struct pcb *pcb = lwp_getpcb(l);
/* Save register context. */
sc->sc_gs = tf->tf_gs;
sc->sc_fs = tf->tf_fs;
sc->sc_es = tf->tf_es;
sc->sc_ds = tf->tf_ds;
sc->sc_eflags = tf->tf_eflags;
sc->sc_edi = tf->tf_edi;
sc->sc_esi = tf->tf_esi;
sc->sc_esp = tf->tf_esp;
sc->sc_ebp = tf->tf_ebp;
sc->sc_ebx = tf->tf_ebx;
sc->sc_edx = tf->tf_edx;
sc->sc_ecx = tf->tf_ecx;
sc->sc_eax = tf->tf_eax;
sc->sc_eip = tf->tf_eip;
sc->sc_cs = tf->tf_cs;
sc->sc_esp_at_signal = tf->tf_esp;
sc->sc_ss = tf->tf_ss;
sc->sc_err = tf->tf_err;
sc->sc_trapno = tf->tf_trapno;
sc->sc_cr2 = pcb->pcb_cr2;
sc->sc_387 = NULL;
/* Save signal stack. */
/* Linux doesn't save the onstack flag in sigframe */
/* Save signal mask. */
native_to_linux_old_sigset(&sc->sc_mask, mask);
}
static void
linux_rt_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
{
struct lwp *l = curlwp;
struct proc *p = l->l_proc;
struct trapframe *tf;
struct linux_rt_sigframe *fp, frame;
int onstack, error;
int sig = ksi->ksi_signo;
sig_t catcher = SIGACTION(p, sig).sa_handler;
stack_t *sas = &l->l_sigstk;
tf = l->l_md.md_regs;
/* Do we need to jump onto the signal stack? */
onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
(SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
/* Allocate space for the signal handler context. */
if (onstack)
fp = (struct linux_rt_sigframe *)((char *)sas->ss_sp +
sas->ss_size);
else
fp = (struct linux_rt_sigframe *)tf->tf_esp;
fp--;
DPRINTF(("rt: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
onstack, fp, sig, tf->tf_eip,
((struct pcb *)lwp_getpcb(l))->pcb_cr2));
memset(&frame, 0, sizeof(frame));
/* Build stack frame for signal trampoline. */
frame.sf_handler = catcher;
frame.sf_sig = native_to_linux_signo[sig];
frame.sf_sip = &fp->sf_si;
frame.sf_ucp = &fp->sf_uc;
/*
* XXX: the following code assumes that the constants for
* siginfo are the same between linux and NetBSD.
*/
native_to_linux_siginfo(&frame.sf_si, &ksi->ksi_info);
/* Save register context. */
linux_save_ucontext(l, tf, mask, sas, &frame.sf_uc);
sendsig_reset(l, sig);
mutex_exit(p->p_lock);
error = copyout(&frame, fp, sizeof(frame));
mutex_enter(p->p_lock);
if (error != 0) {
/*
* Process has trashed its stack; give it an illegal
* instruction to halt it in its tracks.
*/
sigexit(l, SIGILL);
/* NOTREACHED */
}
/*
* Build context to run handler in.
*/
tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
tf->tf_eip = ((int)p->p_sigctx.ps_sigcode) +
(linux_rt_sigcode - linux_sigcode);
tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
tf->tf_eflags &= ~PSL_CLEARSIG;
tf->tf_esp = (int)fp;
tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
/* Remember that we're now on the signal stack. */
if (onstack)
sas->ss_flags |= SS_ONSTACK;
}
static void
linux_old_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
{
struct lwp *l = curlwp;
struct proc *p = l->l_proc;
struct trapframe *tf;
struct linux_sigframe *fp, frame;
int onstack, error;
int sig = ksi->ksi_signo;
sig_t catcher = SIGACTION(p, sig).sa_handler;
stack_t *sas = &l->l_sigstk;
tf = l->l_md.md_regs;
/* Do we need to jump onto the signal stack? */
onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
(SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
/* Allocate space for the signal handler context. */
if (onstack)
fp = (struct linux_sigframe *) ((char *)sas->ss_sp +
sas->ss_size);
else
fp = (struct linux_sigframe *)tf->tf_esp;
fp--;
DPRINTF(("old: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
onstack, fp, sig, tf->tf_eip,
((struct pcb *)lwp_getpcb(l))->pcb_cr2));
memset(&frame, 0, sizeof(frame));
/* Build stack frame for signal trampoline. */
frame.sf_handler = catcher;
frame.sf_sig = native_to_linux_signo[sig];
linux_save_sigcontext(l, tf, mask, &frame.sf_sc);
sendsig_reset(l, sig);
mutex_exit(p->p_lock);
error = copyout(&frame, fp, sizeof(frame));
mutex_enter(p->p_lock);
if (error != 0) {
/*
* Process has trashed its stack; give it an illegal
* instruction to halt it in its tracks.
*/
sigexit(l, SIGILL);
/* NOTREACHED */
}
/*
* Build context to run handler in.
*/
tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
tf->tf_eip = (int)p->p_sigctx.ps_sigcode;
tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
tf->tf_eflags &= ~PSL_CLEARSIG;
tf->tf_esp = (int)fp;
tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
/* Remember that we're now on the signal stack. */
if (onstack)
sas->ss_flags |= SS_ONSTACK;
}
/*
* System call to cleanup state after a signal
* has been taken. Reset signal mask and
* stack state from context left by sendsig (above).
* Return to previous pc and psl as specified by
* context left by sendsig. Check carefully to
* make sure that the user has not modified the
* psl to gain improper privileges or to cause
* a machine fault.
*/
int
linux_sys_rt_sigreturn(struct lwp *l, const struct linux_sys_rt_sigreturn_args *uap, register_t *retval)
{
/* {
syscallarg(struct linux_ucontext *) ucp;
} */
struct linux_ucontext context, *ucp = SCARG(uap, ucp);
int error;
/*
* The trampoline code hands us the context.
* It is unsafe to keep track of it ourselves, in the event that a
* program jumps out of a signal handler.
*/
if ((error = copyin(ucp, &context, sizeof(*ucp))) != 0)
return error;
/* XXX XAX we can do better here by using more of the ucontext */
return linux_restore_sigcontext(l, &context.uc_mcontext, retval);
}
int
linux_sys_sigreturn(struct lwp *l, const struct linux_sys_sigreturn_args *uap, register_t *retval)
{
/* {
syscallarg(struct linux_sigcontext *) scp;
} */
struct linux_sigcontext context, *scp = SCARG(uap, scp);
int error;
/*
* The trampoline code hands us the context.
* It is unsafe to keep track of it ourselves, in the event that a
* program jumps out of a signal handler.
*/
if ((error = copyin((void *)scp, &context, sizeof(*scp))) != 0)
return error;
return linux_restore_sigcontext(l, &context, retval);
}
static int
linux_restore_sigcontext(struct lwp *l, struct linux_sigcontext *scp,
register_t *retval)
{
struct proc *p = l->l_proc;
stack_t *sas = &l->l_sigstk;
struct trapframe *tf;
sigset_t mask;
ssize_t ss_gap;
/* Restore register context. */
tf = l->l_md.md_regs;
DPRINTF(("sigreturn enter esp=0x%x eip=0x%x\n", tf->tf_esp, tf->tf_eip));
/*
* Check for security violations. If we're returning to
* protected mode, the CPU will validate the segment registers
* automatically and generate a trap on violations. We handle
* the trap, rather than doing all of the checking here.
*/
if (((scp->sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 ||
!USERMODE(scp->sc_cs))
return EINVAL;
tf->tf_gs = scp->sc_gs;
tf->tf_fs = scp->sc_fs;
tf->tf_es = scp->sc_es;
tf->tf_ds = scp->sc_ds;
tf->tf_eflags = scp->sc_eflags;
tf->tf_edi = scp->sc_edi;
tf->tf_esi = scp->sc_esi;
tf->tf_ebp = scp->sc_ebp;
tf->tf_ebx = scp->sc_ebx;
tf->tf_edx = scp->sc_edx;
tf->tf_ecx = scp->sc_ecx;
tf->tf_eax = scp->sc_eax;
tf->tf_eip = scp->sc_eip;
tf->tf_cs = scp->sc_cs;
tf->tf_esp = scp->sc_esp_at_signal;
tf->tf_ss = scp->sc_ss;
/* Restore signal stack. */
/*
* Linux really does it this way; it doesn't have space in sigframe
* to save the onstack flag.
*/
mutex_enter(p->p_lock);
ss_gap = (ssize_t)((char *)scp->sc_esp_at_signal - (char *)sas->ss_sp);
if (ss_gap >= 0 && ss_gap < sas->ss_size)
sas->ss_flags |= SS_ONSTACK;
else
sas->ss_flags &= ~SS_ONSTACK;
/* Restore signal mask. */
linux_old_to_native_sigset(&mask, &scp->sc_mask);
(void) sigprocmask1(l, SIG_SETMASK, &mask, 0);
mutex_exit(p->p_lock);
DPRINTF(("sigreturn exit esp=0x%x eip=0x%x\n", tf->tf_esp, tf->tf_eip));
return EJUSTRETURN;
}
#ifdef USER_LDT
static int
linux_read_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap,
register_t *retval)
{
struct x86_get_ldt_args gl;
int error;
union descriptor *ldt_buf;
size_t sz;
/*
* I've checked the linux code - this function is asymmetric with
* linux_write_ldt, and returns raw ldt entries.
* NB, the code I saw zerod the spare parts of the user buffer.
*/
DPRINTF(("linux_read_ldt!"));
sz = 8192 * sizeof(*ldt_buf);
ldt_buf = kmem_zalloc(sz, KM_SLEEP);
gl.start = 0;
gl.desc = NULL;
gl.num = SCARG(uap, bytecount) / sizeof(union descriptor);
error = x86_get_ldt1(l, &gl, ldt_buf);
/* NB gl.num might have changed */
if (error == 0) {
*retval = gl.num * sizeof(*ldtstore);
error = copyout(ldt_buf, SCARG(uap, ptr),
gl.num * sizeof *ldt_buf);
}
kmem_free(ldt_buf, sz);
return error;
}
struct linux_ldt_info {
u_int entry_number;
u_long base_addr;
u_int limit;
u_int seg_32bit:1;
u_int contents:2;
u_int read_exec_only:1;
u_int limit_in_pages:1;
u_int seg_not_present:1;
u_int useable:1;
};
static int
linux_write_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap,
int oldmode)
{
struct linux_ldt_info ldt_info;
union descriptor d;
struct x86_set_ldt_args sl;
int error;
DPRINTF(("linux_write_ldt %d\n", oldmode));
if (SCARG(uap, bytecount) != sizeof(ldt_info))
return (EINVAL);
if ((error = copyin(SCARG(uap, ptr), &ldt_info, sizeof(ldt_info))) != 0)
return error;
if (ldt_info.entry_number >= 8192)
return (EINVAL);
if (ldt_info.contents == 3) {
if (oldmode)
return (EINVAL);
if (ldt_info.seg_not_present)
return (EINVAL);
}
if (ldt_info.base_addr == 0 && ldt_info.limit == 0 &&
(oldmode || (ldt_info.contents == 0 &&
ldt_info.read_exec_only == 1 && ldt_info.seg_32bit == 0 &&
ldt_info.limit_in_pages == 0 && ldt_info.seg_not_present == 1 &&
ldt_info.useable == 0))) {
/* this means you should zero the ldt */
(void)memset(&d, 0, sizeof(d));
} else {
d.sd.sd_lobase = ldt_info.base_addr & 0xffffff;
d.sd.sd_hibase = (ldt_info.base_addr >> 24) & 0xff;
d.sd.sd_lolimit = ldt_info.limit & 0xffff;
d.sd.sd_hilimit = (ldt_info.limit >> 16) & 0xf;
d.sd.sd_type = 16 | (ldt_info.contents << 2) |
(!ldt_info.read_exec_only << 1);
d.sd.sd_dpl = SEL_UPL;
d.sd.sd_p = !ldt_info.seg_not_present;
d.sd.sd_def32 = ldt_info.seg_32bit;
d.sd.sd_gran = ldt_info.limit_in_pages;
if (!oldmode)
d.sd.sd_xx = ldt_info.useable;
else
d.sd.sd_xx = 0;
}
sl.start = ldt_info.entry_number;
sl.desc = NULL;
sl.num = 1;
DPRINTF(("linux_write_ldt: idx=%d, base=0x%lx, limit=0x%x\n",
ldt_info.entry_number, ldt_info.base_addr, ldt_info.limit));
return x86_set_ldt1(l, &sl, &d);
}
#endif /* USER_LDT */
int
linux_sys_modify_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap, register_t *retval)
{
/* {
syscallarg(int) func;
syscallarg(void *) ptr;
syscallarg(size_t) bytecount;
} */
switch (SCARG(uap, func)) {
#ifdef USER_LDT
case 0:
return linux_read_ldt(l, (const void *)uap, retval);
case 1:
return linux_write_ldt(l, (const void *)uap, 1);
case 2:
#ifdef notyet
return linux_read_default_ldt(l, (const void *)uap, retval);
#else
return (ENOSYS);
#endif
case 0x11:
return linux_write_ldt(l, (const void *)uap, 0);
#endif /* USER_LDT */
default:
return (ENOSYS);
}
}
/*
* XXX Pathetic hack to make svgalib work. This will fake the major
* device number of an opened VT so that svgalib likes it. grmbl.
* Should probably do it 'wrong the right way' and use a mapping
* array for all major device numbers, and map linux_mknod too.
*/
dev_t
linux_fakedev(dev_t dev, int raw)
{
extern const struct cdevsw ptc_cdevsw, pts_cdevsw;
const struct cdevsw *cd = cdevsw_lookup(dev);
if (raw) {
#if (NWSDISPLAY > 0)
extern const struct cdevsw wsdisplay_cdevsw;
if (cd == &wsdisplay_cdevsw)
return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1));
#endif
}
if (cd == &ptc_cdevsw)
return makedev(LINUX_PTC_MAJOR, minor(dev));
if (cd == &pts_cdevsw)
return makedev(LINUX_PTS_MAJOR, minor(dev));
return dev;
}
#if (NWSDISPLAY > 0)
/*
* That's not complete, but enough to get an X server running.
*/
#define NR_KEYS 128
static const u_short plain_map[NR_KEYS] = {
0x0200, 0x001b, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036,
0x0037, 0x0038, 0x0039, 0x0030, 0x002d, 0x003d, 0x007f, 0x0009,
0x0b71, 0x0b77, 0x0b65, 0x0b72, 0x0b74, 0x0b79, 0x0b75, 0x0b69,
0x0b6f, 0x0b70, 0x005b, 0x005d, 0x0201, 0x0702, 0x0b61, 0x0b73,
0x0b64, 0x0b66, 0x0b67, 0x0b68, 0x0b6a, 0x0b6b, 0x0b6c, 0x003b,
0x0027, 0x0060, 0x0700, 0x005c, 0x0b7a, 0x0b78, 0x0b63, 0x0b76,
0x0b62, 0x0b6e, 0x0b6d, 0x002c, 0x002e, 0x002f, 0x0700, 0x030c,
0x0703, 0x0020, 0x0207, 0x0100, 0x0101, 0x0102, 0x0103, 0x0104,
0x0105, 0x0106, 0x0107, 0x0108, 0x0109, 0x0208, 0x0209, 0x0307,
0x0308, 0x0309, 0x030b, 0x0304, 0x0305, 0x0306, 0x030a, 0x0301,
0x0302, 0x0303, 0x0300, 0x0310, 0x0206, 0x0200, 0x003c, 0x010a,
0x010b, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
0x030e, 0x0702, 0x030d, 0x001c, 0x0701, 0x0205, 0x0114, 0x0603,
0x0118, 0x0601, 0x0602, 0x0117, 0x0600, 0x0119, 0x0115, 0x0116,
0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d,
0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
}, shift_map[NR_KEYS] = {
0x0200, 0x001b, 0x0021, 0x0040, 0x0023, 0x0024, 0x0025, 0x005e,
0x0026, 0x002a, 0x0028, 0x0029, 0x005f, 0x002b, 0x007f, 0x0009,
0x0b51, 0x0b57, 0x0b45, 0x0b52, 0x0b54, 0x0b59, 0x0b55, 0x0b49,
0x0b4f, 0x0b50, 0x007b, 0x007d, 0x0201, 0x0702, 0x0b41, 0x0b53,
0x0b44, 0x0b46, 0x0b47, 0x0b48, 0x0b4a, 0x0b4b, 0x0b4c, 0x003a,
0x0022, 0x007e, 0x0700, 0x007c, 0x0b5a, 0x0b58, 0x0b43, 0x0b56,
0x0b42, 0x0b4e, 0x0b4d, 0x003c, 0x003e, 0x003f, 0x0700, 0x030c,
0x0703, 0x0020, 0x0207, 0x010a, 0x010b, 0x010c, 0x010d, 0x010e,
0x010f, 0x0110, 0x0111, 0x0112, 0x0113, 0x0213, 0x0203, 0x0307,
0x0308, 0x0309, 0x030b, 0x0304, 0x0305, 0x0306, 0x030a, 0x0301,
0x0302, 0x0303, 0x0300, 0x0310, 0x0206, 0x0200, 0x003e, 0x010a,
0x010b, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
0x030e, 0x0702, 0x030d, 0x0200, 0x0701, 0x0205, 0x0114, 0x0603,
0x020b, 0x0601, 0x0602, 0x0117, 0x0600, 0x020a, 0x0115, 0x0116,
0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d,
0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
}, altgr_map[NR_KEYS] = {
0x0200, 0x0200, 0x0200, 0x0040, 0x0200, 0x0024, 0x0200, 0x0200,
0x007b, 0x005b, 0x005d, 0x007d, 0x005c, 0x0200, 0x0200, 0x0200,
0x0b71, 0x0b77, 0x0918, 0x0b72, 0x0b74, 0x0b79, 0x0b75, 0x0b69,
0x0b6f, 0x0b70, 0x0200, 0x007e, 0x0201, 0x0702, 0x0914, 0x0b73,
0x0917, 0x0919, 0x0b67, 0x0b68, 0x0b6a, 0x0b6b, 0x0b6c, 0x0200,
0x0200, 0x0200, 0x0700, 0x0200, 0x0b7a, 0x0b78, 0x0916, 0x0b76,
0x0915, 0x0b6e, 0x0b6d, 0x0200, 0x0200, 0x0200, 0x0700, 0x030c,
0x0703, 0x0200, 0x0207, 0x050c, 0x050d, 0x050e, 0x050f, 0x0510,
0x0511, 0x0512, 0x0513, 0x0514, 0x0515, 0x0208, 0x0202, 0x0911,
0x0912, 0x0913, 0x030b, 0x090e, 0x090f, 0x0910, 0x030a, 0x090b,
0x090c, 0x090d, 0x090a, 0x0310, 0x0206, 0x0200, 0x007c, 0x0516,
0x0517, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
0x030e, 0x0702, 0x030d, 0x0200, 0x0701, 0x0205, 0x0114, 0x0603,
0x0118, 0x0601, 0x0602, 0x0117, 0x0600, 0x0119, 0x0115, 0x0116,
0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d,
0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
}, ctrl_map[NR_KEYS] = {
0x0200, 0x0200, 0x0200, 0x0000, 0x001b, 0x001c, 0x001d, 0x001e,
0x001f, 0x007f, 0x0200, 0x0200, 0x001f, 0x0200, 0x0008, 0x0200,
0x0011, 0x0017, 0x0005, 0x0012, 0x0014, 0x0019, 0x0015, 0x0009,
0x000f, 0x0010, 0x001b, 0x001d, 0x0201, 0x0702, 0x0001, 0x0013,
0x0004, 0x0006, 0x0007, 0x0008, 0x000a, 0x000b, 0x000c, 0x0200,
0x0007, 0x0000, 0x0700, 0x001c, 0x001a, 0x0018, 0x0003, 0x0016,
0x0002, 0x000e, 0x000d, 0x0200, 0x020e, 0x007f, 0x0700, 0x030c,
0x0703, 0x0000, 0x0207, 0x0100, 0x0101, 0x0102, 0x0103, 0x0104,
0x0105, 0x0106, 0x0107, 0x0108, 0x0109, 0x0208, 0x0204, 0x0307,
0x0308, 0x0309, 0x030b, 0x0304, 0x0305, 0x0306, 0x030a, 0x0301,
0x0302, 0x0303, 0x0300, 0x0310, 0x0206, 0x0200, 0x0200, 0x010a,
0x010b, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
0x030e, 0x0702, 0x030d, 0x001c, 0x0701, 0x0205, 0x0114, 0x0603,
0x0118, 0x0601, 0x0602, 0x0117, 0x0600, 0x0119, 0x0115, 0x0116,
0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d,
0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
};
const u_short * const linux_keytabs[] = {
plain_map, shift_map, altgr_map, altgr_map, ctrl_map
};
#endif
static struct biosdisk_info *
fd2biosinfo(struct proc *p, struct file *fp)
{
struct vnode *vp;
const char *blkname;
char diskname[16];
int i;
struct nativedisk_info *nip;
struct disklist *dl = x86_alldisks;
if (dl == NULL)
return NULL;
if (fp->f_type != DTYPE_VNODE)
return NULL;
vp = (struct vnode *)fp->f_data;
if (vp->v_type != VBLK)
return NULL;
blkname = devsw_blk2name(major(vp->v_rdev));
snprintf(diskname, sizeof diskname, "%s%llu", blkname,
(unsigned long long)DISKUNIT(vp->v_rdev));
for (i = 0; i < dl->dl_nnativedisks; i++) {
nip = &dl->dl_nativedisks[i];
if (strcmp(diskname, nip->ni_devname))
continue;
if (nip->ni_nmatches != 0)
return &dl->dl_biosdisks[nip->ni_biosmatches[0]];
}
return NULL;
}
/*
* We come here in a last attempt to satisfy a Linux ioctl() call
*/
int
linux_machdepioctl(struct lwp *l, const struct linux_sys_ioctl_args *uap, register_t *retval)
{
/* {
syscallarg(int) fd;
syscallarg(u_long) com;
syscallarg(void *) data;
} */
struct sys_ioctl_args bia;
u_long com;
int error, error1;
#if (NWSDISPLAY > 0)
struct vt_mode lvt;
struct kbentry kbe;
#endif
struct linux_hd_geometry hdg;
struct linux_hd_big_geometry hdg_big;
struct biosdisk_info *bip;
file_t *fp;
int fd;
struct disklabel label;
struct partinfo partp;
int (*ioctlf)(struct file *, u_long, void *);
u_long start, biostotal, realtotal;
u_char heads, sectors;
u_int cylinders;
struct ioctl_pt pt;
fd = SCARG(uap, fd);
SCARG(&bia, fd) = fd;
SCARG(&bia, data) = SCARG(uap, data);
com = SCARG(uap, com);
if ((fp = fd_getfile(fd)) == NULL)
return (EBADF);
switch (com) {
#if (NWSDISPLAY > 0)
case LINUX_KDGKBMODE:
com = KDGKBMODE;
break;
case LINUX_KDSKBMODE:
com = KDSKBMODE;
if ((unsigned)SCARG(uap, data) == LINUX_K_MEDIUMRAW)
SCARG(&bia, data) = (void *)K_RAW;
break;
case LINUX_KIOCSOUND:
SCARG(&bia, data) =
(void *)(((unsigned long)SCARG(&bia, data)) & 0xffff);
/* fall through */
case LINUX_KDMKTONE:
com = KDMKTONE;
break;
case LINUX_KDSETMODE:
com = KDSETMODE;
break;
case LINUX_KDGETMODE:
/* KD_* values are equal to the wscons numbers */
com = WSDISPLAYIO_GMODE;
break;
case LINUX_KDENABIO:
com = KDENABIO;
break;
case LINUX_KDDISABIO:
com = KDDISABIO;
break;
case LINUX_KDGETLED:
com = KDGETLED;
break;
case LINUX_KDSETLED:
com = KDSETLED;
break;
case LINUX_VT_OPENQRY:
com = VT_OPENQRY;
break;
case LINUX_VT_GETMODE:
memset(&lvt, 0, sizeof(lvt));
error = fp->f_ops->fo_ioctl(fp, VT_GETMODE, &lvt);
if (error != 0)
goto out;
lvt.relsig = native_to_linux_signo[lvt.relsig];
lvt.acqsig = native_to_linux_signo[lvt.acqsig];
lvt.frsig = native_to_linux_signo[lvt.frsig];
error = copyout(&lvt, SCARG(uap, data), sizeof (lvt));
goto out;
case LINUX_VT_SETMODE:
error = copyin(SCARG(uap, data), &lvt, sizeof (lvt));
if (error != 0)
goto out;
lvt.relsig = linux_to_native_signo[lvt.relsig];
lvt.acqsig = linux_to_native_signo[lvt.acqsig];
lvt.frsig = linux_to_native_signo[lvt.frsig];
error = fp->f_ops->fo_ioctl(fp, VT_SETMODE, &lvt);
goto out;
case LINUX_VT_DISALLOCATE:
/* XXX should use WSDISPLAYIO_DELSCREEN */
error = 0;
goto out;
case LINUX_VT_RELDISP:
com = VT_RELDISP;
break;
case LINUX_VT_ACTIVATE:
com = VT_ACTIVATE;
break;
case LINUX_VT_WAITACTIVE:
com = VT_WAITACTIVE;
break;
case LINUX_VT_GETSTATE:
com = VT_GETSTATE;
break;
case LINUX_KDGKBTYPE:
{
static const u_int8_t kb101 = KB_101;
/* This is what Linux does. */
error = copyout(&kb101, SCARG(uap, data), 1);
goto out;
}
case LINUX_KDGKBENT:
/*
* The Linux KDGKBENT ioctl is different from the
* SYSV original. So we handle it in machdep code.
* XXX We should use keyboard mapping information
* from wsdisplay, but this would be expensive.
*/
if ((error = copyin(SCARG(uap, data), &kbe,
sizeof(struct kbentry))))
goto out;
if (kbe.kb_table >= sizeof(linux_keytabs) / sizeof(u_short *)
|| kbe.kb_index >= NR_KEYS) {
error = EINVAL;
goto out;
}
kbe.kb_value = linux_keytabs[kbe.kb_table][kbe.kb_index];
error = copyout(&kbe, SCARG(uap, data),
sizeof(struct kbentry));
goto out;
#endif
case LINUX_HDIO_GETGEO:
case LINUX_HDIO_GETGEO_BIG:
/*
* Try to mimic Linux behaviour: return the BIOS geometry
* if possible (extending its # of cylinders if it's beyond
* the 1023 limit), fall back to the MI geometry (i.e.
* the real geometry) if not found, by returning an
* error. See common/linux_hdio.c
*/
bip = fd2biosinfo(curproc, fp);
ioctlf = fp->f_ops->fo_ioctl;
error = ioctlf(fp, DIOCGDINFO, (void *)&label);
error1 = ioctlf(fp, DIOCGPARTINFO, (void *)&partp);
if (error != 0 && error1 != 0) {
error = error1;
goto out;
}
start = error1 != 0 ? partp.pi_offset : 0;
if (bip != NULL && bip->bi_head != 0 && bip->bi_sec != 0
&& bip->bi_cyl != 0) {
heads = bip->bi_head;
sectors = bip->bi_sec;
cylinders = bip->bi_cyl;
biostotal = heads * sectors * cylinders;
realtotal = label.d_ntracks * label.d_nsectors *
label.d_ncylinders;
if (realtotal > biostotal)
cylinders = realtotal / (heads * sectors);
} else {
heads = label.d_ntracks;
cylinders = label.d_ncylinders;
sectors = label.d_nsectors;
}
if (com == LINUX_HDIO_GETGEO) {
memset(&hdg, 0, sizeof(hdg));
hdg.start = start;
hdg.heads = heads;
hdg.cylinders = cylinders;
hdg.sectors = sectors;
error = copyout(&hdg, SCARG(uap, data), sizeof hdg);
goto out;
} else {
memset(&hdg_big, 0, sizeof(hdg_big));
hdg_big.start = start;
hdg_big.heads = heads;
hdg_big.cylinders = cylinders;
hdg_big.sectors = sectors;
error = copyout(&hdg_big, SCARG(uap, data),
sizeof hdg_big);
goto out;
}
default:
/*
* Unknown to us. If it's on a device, just pass it through
* using PTIOCLINUX, the device itself might be able to
* make some sense of it.
* XXX hack: if the function returns EJUSTRETURN,
* it has stuffed a sysctl return value in pt.data.
*/
ioctlf = fp->f_ops->fo_ioctl;
pt.com = SCARG(uap, com);
pt.data = SCARG(uap, data);
error = ioctlf(fp, PTIOCLINUX, &pt);
if (error == EJUSTRETURN) {
retval[0] = (register_t)pt.data;
error = 0;
}
if (error == ENOTTY) {
DPRINTF(("linux_machdepioctl: invalid ioctl %08lx\n",
com));
}
goto out;
}
SCARG(&bia, com) = com;
error = sys_ioctl(curlwp, &bia, retval);
out:
fd_putfile(fd);
return error;
}
/*
* Set I/O permissions for a process. Just set the maximum level
* right away (ignoring the argument), otherwise we would have
* to rely on I/O permission maps, which are not implemented.
*/
int
linux_sys_iopl(struct lwp *l, const struct linux_sys_iopl_args *uap, register_t *retval)
{
/* {
syscallarg(int) level;
} */
struct trapframe *fp = l->l_md.md_regs;
if (kauth_authorize_machdep(l->l_cred, KAUTH_MACHDEP_IOPL,
NULL, NULL, NULL, NULL) != 0)
return EPERM;
fp->tf_eflags |= PSL_IOPL;
*retval = 0;
return 0;
}
/*
* See above. If a root process tries to set access to an I/O port,
* just let it have the whole range.
*/
int
linux_sys_ioperm(struct lwp *l, const struct linux_sys_ioperm_args *uap, register_t *retval)
{
/* {
syscallarg(unsigned int) lo;
syscallarg(unsigned int) hi;
syscallarg(int) val;
} */
struct trapframe *fp = l->l_md.md_regs;
if (kauth_authorize_machdep(l->l_cred, SCARG(uap, val) ?
KAUTH_MACHDEP_IOPERM_SET : KAUTH_MACHDEP_IOPERM_GET, NULL, NULL,
NULL, NULL) != 0)
return EPERM;
if (SCARG(uap, val))
fp->tf_eflags |= PSL_IOPL;
*retval = 0;
return 0;
}
int
linux_usertrap(struct lwp *l, vaddr_t trapaddr,
void *arg)
{
return 0;
}
const char *
linux_get_uname_arch(void)
{
static char uname_arch[5] = "i386";
if (uname_arch[1] == '3')
uname_arch[1] += cpu_class;
return uname_arch;
}