#ifndef _TOOLS_LINUX_RING_BUFFER_H_ #define _TOOLS_LINUX_RING_BUFFER_H_ #include <asm/barrier.h> #include <linux/perf_event.h> /* * Contract with kernel for walking the perf ring buffer from * user space requires the following barrier pairing (quote * from kernel/events/ring_buffer.c): * * Since the mmap() consumer (userspace) can run on a * different CPU: * * kernel user * * if (LOAD ->data_tail) { LOAD ->data_head * (A) smp_rmb() (C) * STORE $data LOAD $data * smp_wmb() (B) smp_mb() (D) * STORE ->data_head STORE ->data_tail * } * * Where A pairs with D, and B pairs with C. * * In our case A is a control dependency that separates the * load of the ->data_tail and the stores of $data. In case * ->data_tail indicates there is no room in the buffer to * store $data we do not. * * D needs to be a full barrier since it separates the data * READ from the tail WRITE. * * For B a WMB is sufficient since it separates two WRITEs, * and for C an RMB is sufficient since it separates two READs. * * Note, instead of B, C, D we could also use smp_store_release() * in B and D as well as smp_load_acquire() in C. * * However, this optimization does not make sense for all kernel * supported architectures since for a fair number it would * resolve into READ_ONCE() + smp_mb() pair for smp_load_acquire(), * and smp_mb() + WRITE_ONCE() pair for smp_store_release(). * * Thus for those smp_wmb() in B and smp_rmb() in C would still * be less expensive. For the case of D this has either the same * cost or is less expensive, for example, due to TSO x86 can * avoid the CPU barrier entirely. */ static inline u64 ring_buffer_read_head(struct perf_event_mmap_page *base) { /* * Architectures where smp_load_acquire() does not fallback to * READ_ONCE() + smp_mb() pair. */ #if defined(__x86_64__) || defined(__aarch64__) || defined(__powerpc64__) || \ defined(__ia64__) || defined(__sparc__) && defined(__arch64__) return smp_load_acquire(&base->data_head); #else u64 head = READ_ONCE(base->data_head); smp_rmb(); return head; #endif } static inline void ring_buffer_write_tail(struct perf_event_mmap_page *base, u64 tail) { smp_store_release(&base->data_tail, tail); } #endif /* _TOOLS_LINUX_RING_BUFFER_H_ */ |