/*
* Copyright (C) 2016 Red Hat, Inc.
* Author: Michael S. Tsirkin <mst@redhat.com>
* This work is licensed under the terms of the GNU GPL, version 2.
*
* Simple descriptor-based ring. virtio 0.9 compatible event index is used for
* signalling, unconditionally.
*/
#define _GNU_SOURCE
#include "main.h"
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
/* Next - Where next entry will be written.
* Prev - "Next" value when event triggered previously.
* Event - Peer requested event after writing this entry.
*/
static inline bool need_event(unsigned short event,
unsigned short next,
unsigned short prev)
{
return (unsigned short)(next - event - 1) < (unsigned short)(next - prev);
}
/* Design:
* Guest adds descriptors with unique index values and DESC_HW in flags.
* Host overwrites used descriptors with correct len, index, and DESC_HW clear.
* Flags are always set last.
*/
#define DESC_HW 0x1
struct desc {
unsigned short flags;
unsigned short index;
unsigned len;
unsigned long long addr;
};
/* how much padding is needed to avoid false cache sharing */
#define HOST_GUEST_PADDING 0x80
/* Mostly read */
struct event {
unsigned short kick_index;
unsigned char reserved0[HOST_GUEST_PADDING - 2];
unsigned short call_index;
unsigned char reserved1[HOST_GUEST_PADDING - 2];
};
struct data {
void *buf; /* descriptor is writeable, we can't get buf from there */
void *data;
} *data;
struct desc *ring;
struct event *event;
struct guest {
unsigned avail_idx;
unsigned last_used_idx;
unsigned num_free;
unsigned kicked_avail_idx;
unsigned char reserved[HOST_GUEST_PADDING - 12];
} guest;
struct host {
/* we do not need to track last avail index
* unless we have more than one in flight.
*/
unsigned used_idx;
unsigned called_used_idx;
unsigned char reserved[HOST_GUEST_PADDING - 4];
} host;
/* implemented by ring */
void alloc_ring(void)
{
int ret;
int i;
ret = posix_memalign((void **)&ring, 0x1000, ring_size * sizeof *ring);
if (ret) {
perror("Unable to allocate ring buffer.\n");
exit(3);
}
event = malloc(sizeof *event);
if (!event) {
perror("Unable to allocate event buffer.\n");
exit(3);
}
memset(event, 0, sizeof *event);
guest.avail_idx = 0;
guest.kicked_avail_idx = -1;
guest.last_used_idx = 0;
host.used_idx = 0;
host.called_used_idx = -1;
for (i = 0; i < ring_size; ++i) {
struct desc desc = {
.index = i,
};
ring[i] = desc;
}
guest.num_free = ring_size;
data = malloc(ring_size * sizeof *data);
if (!data) {
perror("Unable to allocate data buffer.\n");
exit(3);
}
memset(data, 0, ring_size * sizeof *data);
}
/* guest side */
int add_inbuf(unsigned len, void *buf, void *datap)
{
unsigned head, index;
if (!guest.num_free)
return -1;
guest.num_free--;
head = (ring_size - 1) & (guest.avail_idx++);
/* Start with a write. On MESI architectures this helps
* avoid a shared state with consumer that is polling this descriptor.
*/
ring[head].addr = (unsigned long)(void*)buf;
ring[head].len = len;
/* read below might bypass write above. That is OK because it's just an
* optimization. If this happens, we will get the cache line in a
* shared state which is unfortunate, but probably not worth it to
* add an explicit full barrier to avoid this.
*/
barrier();
index = ring[head].index;
data[index].buf = buf;
data[index].data = datap;
/* Barrier A (for pairing) */
smp_release();
ring[head].flags = DESC_HW;
return 0;
}
void *get_buf(unsigned *lenp, void **bufp)
{
unsigned head = (ring_size - 1) & guest.last_used_idx;
unsigned index;
void *datap;
if (ring[head].flags & DESC_HW)
return NULL;
/* Barrier B (for pairing) */
smp_acquire();
*lenp = ring[head].len;
index = ring[head].index & (ring_size - 1);
datap = data[index].data;
*bufp = data[index].buf;
data[index].buf = NULL;
data[index].data = NULL;
guest.num_free++;
guest.last_used_idx++;
return datap;
}
bool used_empty()
{
unsigned head = (ring_size - 1) & guest.last_used_idx;
return (ring[head].flags & DESC_HW);
}
void disable_call()
{
/* Doing nothing to disable calls might cause
* extra interrupts, but reduces the number of cache misses.
*/
}
bool enable_call()
{
event->call_index = guest.last_used_idx;
/* Flush call index write */
/* Barrier D (for pairing) */
smp_mb();
return used_empty();
}
void kick_available(void)
{
/* Flush in previous flags write */
/* Barrier C (for pairing) */
smp_mb();
if (!need_event(event->kick_index,
guest.avail_idx,
guest.kicked_avail_idx))
return;
guest.kicked_avail_idx = guest.avail_idx;
kick();
}
/* host side */
void disable_kick()
{
/* Doing nothing to disable kicks might cause
* extra interrupts, but reduces the number of cache misses.
*/
}
bool enable_kick()
{
event->kick_index = host.used_idx;
/* Barrier C (for pairing) */
smp_mb();
return avail_empty();
}
bool avail_empty()
{
unsigned head = (ring_size - 1) & host.used_idx;
return !(ring[head].flags & DESC_HW);
}
bool use_buf(unsigned *lenp, void **bufp)
{
unsigned head = (ring_size - 1) & host.used_idx;
if (!(ring[head].flags & DESC_HW))
return false;
/* make sure length read below is not speculated */
/* Barrier A (for pairing) */
smp_acquire();
/* simple in-order completion: we don't need
* to touch index at all. This also means we
* can just modify the descriptor in-place.
*/
ring[head].len--;
/* Make sure len is valid before flags.
* Note: alternative is to write len and flags in one access -
* possible on 64 bit architectures but wmb is free on Intel anyway
* so I have no way to test whether it's a gain.
*/
/* Barrier B (for pairing) */
smp_release();
ring[head].flags = 0;
host.used_idx++;
return true;
}
void call_used(void)
{
/* Flush in previous flags write */
/* Barrier D (for pairing) */
smp_mb();
if (!need_event(event->call_index,
host.used_idx,
host.called_used_idx))
return;
host.called_used_idx = host.used_idx;
call();
}