.\" $NetBSD: mbuf.9,v 1.67 2022/06/28 20:12:52 rillig Exp $
.\"
.\" Copyright (c) 1997 The NetBSD Foundation, Inc.
.\" All rights reserved.
.\"
.\" This documentation is derived from text contributed to The NetBSD Foundation
.\" by S.P.Zeidler (aka stargazer).
.\"
.\" Redistribution and use in source and binary forms, with or without
.\" modification, are permitted provided that the following conditions
.\" are met:
.\" 1. Redistributions of source code must retain the above copyright
.\" notice, this list of conditions and the following disclaimer.
.\" 2. Redistributions in binary form must reproduce the above copyright
.\" notice, this list of conditions and the following disclaimer in the
.\" documentation and/or other materials provided with the distribution.
.\"
.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
.\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
.\" POSSIBILITY OF SUCH DAMAGE.
.\"
.Dd June 28, 2022
.Dt MBUF 9
.Os
.Sh NAME
.Nm mbuf ,
.Nm m_get ,
.Nm m_gethdr ,
.Nm m_devget ,
.Nm m_copym ,
.Nm m_copypacket ,
.Nm m_copydata ,
.Nm m_copyback ,
.Nm m_copyback_cow ,
.Nm m_cat ,
.Nm m_dup ,
.Nm m_makewritable ,
.Nm m_pulldown ,
.Nm m_pullup ,
.Nm m_copyup ,
.Nm m_split ,
.Nm m_adj ,
.Nm m_apply ,
.Nm m_free ,
.Nm m_freem ,
.Nm mtod ,
.Nm MGET ,
.Nm MGETHDR ,
.Nm MEXTMALLOC ,
.Nm MEXTADD ,
.Nm MCLGET ,
.Nm m_copy_pkthdr ,
.Nm m_move_pkthdr ,
.Nm m_remove_pkthdr ,
.Nm m_align ,
.Nm M_LEADINGSPACE ,
.Nm M_TRAILINGSPACE ,
.Nm M_PREPEND ,
.Nm MCHTYPE
.Nd "functions and macros for managing memory used by networking code"
.Sh SYNOPSIS
.In sys/mbuf.h
.Ft struct mbuf *
.Fn m_get "int how" "int type"
.Ft struct mbuf *
.Fn m_gethdr "int how" "int type"
.Ft struct mbuf *
.Fn m_devget "char *buf" "int totlen" "int off" "struct ifnet *ifp"
.Ft struct mbuf *
.Fn m_copym "struct mbuf *m" "int off" "int len" "int wait"
.Ft struct mbuf *
.Fn m_copypacket "struct mbuf *m" "int how"
.Ft void
.Fn m_copydata "struct mbuf *m" "int off" "int len" "void *cp"
.Ft void
.Fn m_copyback "struct mbuf *m0" "int off" "int len" "void *cp"
.Ft struct mbuf *
.Fn m_copyback_cow "struct mbuf *m0" "int off" "int len" "void *cp" "int how"
.Ft int
.Fn m_makewritable "struct mbuf **mp" "int off" "int len" "int how"
.Ft void
.Fn m_cat "struct mbuf *m" "struct mbuf *n"
.Ft struct mbuf *
.Fn m_dup "struct mbuf *m" "int off" "int len" "int wait"
.Ft struct mbuf *
.Fn m_pulldown "struct mbuf *m" "int off" "int len" "int *offp"
.Ft struct mbuf *
.Fn m_pullup "struct mbuf *n" "int len"
.Ft struct mbuf *
.Fn m_copyup "struct mbuf *m" "int len" "int dstoff"
.Ft struct mbuf *
.Fn m_split "struct mbuf *m0" "int len" "int wait"
.Ft void
.Fn m_adj "struct mbuf *mp" "int req_len"
.Ft int
.Fn m_apply "struct mbuf *m" "int off" "int len" "int *f(void *, void *, unsigned int)" "void *arg"
.Ft struct mbuf *
.Fn m_free "struct mbuf *m"
.Ft void
.Fn m_freem "struct mbuf *m"
.Ft datatype
.Fn mtod "struct mbuf *m" "datatype"
.Ft void
.Fn MGET "struct mbuf *m" "int how" "int type"
.Ft void
.Fn MGETHDR "struct mbuf *m" "int how" "int type"
.Ft void
.Fn MEXTMALLOC "struct mbuf *m" "int len" "int how"
.Ft void
.Fn MEXTADD "struct mbuf *m" "void *buf" "int size" "int type" "void (*free)(struct mbuf *, void *, size_t, void *)" "void *arg"
.Ft void
.Fn MCLGET "struct mbuf *m" "int how"
.Ft void
.Fn m_copy_pkthdr "struct mbuf *to" "struct mbuf *from"
.Ft void
.Fn m_move_pkthdr "struct mbuf *to" "struct mbuf *from"
.Ft void
.Fn m_remove_pkthdr "struct mbuf *m"
.Ft void
.Fn m_align "struct mbuf *m" "int len"
.Ft int
.Fn M_LEADINGSPACE "struct mbuf *m"
.Ft int
.Fn M_TRAILINGSPACE "struct mbuf *m"
.Ft void
.Fn M_PREPEND "struct mbuf *m" "int plen" "int how"
.Ft void
.Fn MCHTYPE "struct mbuf *m" "int type"
.Sh DESCRIPTION
The
.Nm
functions and macros provide an easy and consistent way to handle
a networking stack's memory management needs.
An
.Nm
consists of a header and a data area.
It is of a fixed size,
.Dv MSIZE
.Pq defined in Aq Pa machine/param.h ,
which includes the size of the header.
The header contains a pointer to the next
.Nm
in the
.Sq "mbuf chain" ,
a pointer to the next
.Sq "mbuf chain" ,
a pointer to the data area, the amount of data in this mbuf, its type
and a
.Dv flags
field.
.Pp
The
.Dv type
variable can signify:
.Bl -tag -compact -offset indent -width "XXXXXXXXXXX"
.It Dv MT_FREE
the mbuf should be on the ``free'' list
.It Dv MT_DATA
data was dynamically allocated
.It Dv MT_HEADER
data is a packet header
.It Dv MT_SONAME
data is a socket name
.It Dv MT_SOOPTS
data is socket options
.It Dv MT_FTABLE
data is the fragment reassembly header
.It Dv MT_CONTROL
mbuf contains ancillary \&(protocol control\&) data
.It Dv MT_OOBDATA
mbuf contains out-of-band data.
.El
.Pp
The
.Dv flags
variable contains information describing the
.Nm ,
notably:
.Bl -tag -compact -offset indent -width "XXXXXXXXXXX"
.It Dv M_EXT
has external storage
.It Dv M_PKTHDR
is start of record
.It Dv M_EOR
is end of record
.El
.Pp
If an
.Nm
designates the start of a record
.Pq Dv M_PKTHDR ,
its
.Dv flags
field may contain additional information describing the content of
the record:
.Bl -tag -compact -offset indent -width "XXXXXXXXXXX"
.It Dv M_BCAST
sent/received as link-level broadcast
.It Dv M_MCAST
sent/received as link-level multicast
.El
.Pp
An
.Nm
may add a single
.Sq "mbuf cluster"
of
.Dv MCLBYTES
bytes
.Pq defined in Aq Pa machine/param.h ,
which has no additional overhead
and is used instead of the internal data area; this is done when at least
.Dv MINCLSIZE
bytes of data must be stored.
.Pp
When the
.Dv M_EXT
flag is set on an mbuf,
the external storage area could be shared among multiple mbufs.
Therefore, care must be taken when overwriting the data content of an
mbuf, because its external storage could be considered as read-only.
.Bl -tag -width compact
.It Fn m_get "int how" "int type"
Allocates an mbuf and initializes it to contain internal data.
The
.Fa how
parameter is a choice of
.Dv M_WAIT / M_DONTWAIT
from caller.
.Dv M_WAIT
means the call cannot fail, but may take forever.
The
.Fa type
parameter is an mbuf type.
.It Fn m_gethdr "int how" "int type"
Allocates an mbuf and initializes it to contain a packet header and internal
data.
The
.Fa how
parameter is a choice of
.Dv M_WAIT / M_DONTWAIT
from caller.
The
.Fa type
parameter is an mbuf type.
.It Fn m_devget "char *buf" "int totlen" "int off" "struct ifnet *ifp"
Copies
.Fa len
bytes from device local memory into mbufs.
If parameter
.Fa off
is non-zero, the packet is supposed to be trailer-encapsulated and
.Fa off
bytes plus the type and length fields will be skipped before copying.
Returns the top of the mbuf chain it created.
.It Fn m_copym "struct mbuf *m" "int off" "int len" "int wait"
Creates a copy of an mbuf chain starting
.Fa off
bytes from the beginning, continuing for
.Fa len
bytes.
If the
.Fa len
requested is
.Dv M_COPYALL ,
the complete mbuf chain will be copied.
The
.Fa wait
parameter is a choice of
.Dv M_WAIT / M_DONTWAIT
from caller.
.It Fn m_copypacket "struct mbuf *m" "int how"
Copies an entire packet, including header.
This function is an optimization of the common case
.Li m_copym ( m , 0 , Dv M_COPYALL , Fa how ) .
However, contrary to
.Fn m_copym ,
a header must be present.
It is incorrect to use
.Fn m_copypacket
with an mbuf that does not have a header.
.It Fn m_copydata "struct mbuf *m" "int off" "int len" "void *cp"
Copies
.Fa len
bytes data from mbuf chain
.Fa m
into the buffer
.Fa cp ,
starting
.Fa off
bytes from the beginning.
.It Fn m_copyback "struct mbuf *m0" "int off" "int len" "void *cp"
Copies
.Fa len
bytes data from buffer
.Fa cp
back into the mbuf chain
.Fa m0 ,
starting
.Fa off
bytes from the beginning of the chain, extending the mbuf chain if necessary.
.Fn m_copyback
can only fail when extending the chain.
The caller should check for this kind of failure
by checking the resulting length of the chain in that case.
It is an error to use
.Fn m_copyback
on read-only mbufs.
.It Fn m_copyback_cow "struct mbuf *m0" "int off" "int len" "void *cp" \
"int how"
Copies
.Fa len
bytes data from buffer
.Fa cp
back into the mbuf chain
.Fa m0
as
.Fn m_copyback
does.
Unlike
.Fn m_copyback ,
it is safe to use
.Fn m_copyback_cow
on read-only mbufs.
If needed,
.Fn m_copyback_cow
automatically allocates new mbufs and adjusts the chain.
On success, it returns a pointer to the resulting mbuf chain,
and frees the original mbuf
.Fa m0 .
Otherwise, it returns
.Dv NULL .
The
.Fa how
parameter is a choice of
.Dv M_WAIT / M_DONTWAIT
from the caller.
Unlike
.Fn m_copyback ,
extending the mbuf chain isn't supported.
It is an error to attempt to extend the mbuf chain using
.Fn m_copyback_cow .
.It Fn m_makewritable "struct mbuf **mp" "int off" "int len" "int how"
Rearranges an mbuf chain so that
.Fa len
bytes from offset
.Fa off
are writable.
When it meets read-only mbufs, it allocates new mbufs, adjusts the chain as
.Fn m_copyback_cow
does, and copies the original content into them.
.Fn m_makewritable
does
.Em not
guarantee that all
.Fa len
bytes at
.Fa off
are consecutive.
The
.Fa how
parameter is a choice of
.Dv M_WAIT / M_DONTWAIT
from the caller.
.Fn m_makewritable
preserves the contents of the mbuf chain even in the case of failure.
It updates a pointer to the mbuf chain pointed to by
.Fa mp .
It returns 0 on success.
Otherwise, it returns an error code, typically
.Er ENOBUFS .
.It Fn m_cat "struct mbuf *m" "struct mbuf *n"
Concatenates mbuf chain
.Fa n
to
.Fa m .
Both chains must be of the same type; packet headers will
.Em not
be updated if present.
.It Fn m_dup "struct mbuf *m" "int off" "int len" "int wait"
Similarly to
.Fn m_copym ,
the function creates a copy of an mbuf chain starting
.Fa off
bytes from the beginning, continuing for
.Fa len
bytes.
While
.Fn m_copym
tries to share external storage for mbufs with
.Dv M_EXT
flag,
.Fn m_dup
will deep-copy the whole data content into new mbuf chain
and avoids shared external storage.
.It Fn m_pulldown "struct mbuf *m" "int off" "int len" "int *offp"
Rearranges an mbuf chain so that
.Fa len
bytes from offset
.Fa off
are contiguous and in the data area of an mbuf.
The return value points to an mbuf in the middle of the mbuf chain
.Fa m .
If we call the return value
.Fa n ,
the contiguous data region is available at
.Li "mtod(n, void *) + *offp" ,
or
.Li "mtod(n, void *)"
if
.Fa offp
is
.Dv NULL .
The top of the mbuf chain
.Fa m ,
and mbufs up to
.Fa off ,
will not be modified.
On successful return, it is guaranteed that the mbuf pointed to by
.Fa n
does not have a shared external storage,
therefore it is safe to update the contiguous region.
Returns
.Dv NULL
and frees the mbuf chain on failure.
.Fa len
must be smaller than or equal to
.Dv MCLBYTES .
.It Fn m_pullup "struct mbuf *m" "int len"
Rearranges an mbuf chain so that
.Fa len
bytes are contiguous
and in the data area of an mbuf (so that
.Fn mtod
will work for a structure of size
.Fa len ) .
Returns the resulting
mbuf chain on success, frees it and returns
.Dv NULL
on failure.
If there is room, it will add up to
.Dv max_protohdr
-
.Fa len
extra bytes to the
contiguous region to possibly avoid being called again.
.Fa len
must be smaller or equal than
.Dv MHLEN .
.It Fn m_copyup "struct mbuf *m" "int len" "int dstoff"
Similar to
.Fn m_pullup
but copies
.Fa len
bytes of data into a new mbuf at
.Fa dstoff
bytes into the mbuf.
The
.Fa dstoff
argument aligns the data and leaves room for a link layer header.
Returns the new
mbuf chain on success, and frees the mbuf chain and returns
.Dv NULL
on failure.
Note that
the function does not allocate mbuf clusters, so
.Fa len + dstoff
must be less than
.Dv MHLEN .
.It Fn m_split "struct mbuf *m0" "int len" "int wait"
Partitions an mbuf chain in two pieces, returning the tail,
which is all but the first
.Fa len
bytes.
In case of failure, it returns
.Dv NULL
and restores the chain to its original state.
.It Fn m_adj "struct mbuf *mp" "int req_len"
Shaves off
.Fa req_len
bytes from head or tail of the (valid) data area.
If
.Fa req_len
is greater than zero, front bytes are being shaved off, if it's smaller,
from the back (and if it is zero, the mbuf will stay bearded).
This function does not move data in any way, but is used to manipulate the
data area pointer and data length variable of the mbuf in a non-clobbering
way.
.It Fn m_apply "struct mbuf *m" "int off" "int len" "int (*f)(void *, void *, unsigned int)" "void *arg"
Apply function
.Fa f
to the data in an mbuf chain starting
.Fa off
bytes from the beginning, continuing for
.Fa len
bytes.
Neither
.Fa off
nor
.Fa len
may be negative.
.Fa arg
will be supplied as first argument for
.Fa f ,
the second argument will be the pointer to the data buffer of a
packet (starting after
.Fa off
bytes in the stream), and the third argument is the amount
of data in bytes in this call.
If
.Fa f
returns something not equal to zero
.Fn m_apply
will bail out, returning the return code of
.Fa f .
Upon successful completion it will return zero.
.It Fn m_free "struct mbuf *m"
Frees mbuf
.Fa m .
.It Fn m_freem "struct mbuf *m"
Frees the mbuf chain beginning with
.Fa m .
This function contains the elementary sanity check for a
.Dv NULL
pointer.
.It Fn mtod "struct mbuf *m" "datatype"
Returns a pointer to the data contained in the specified mbuf
.Fa m ,
type-casted to the specified data type
.Fa datatype .
.It Fn MGET "struct mbuf *m" "int how" "int type"
Allocates mbuf
.Fa m
and initializes it to contain internal data.
See
.Fn m_get .
.It Fn MGETHDR "struct mbuf *m" "int how" "int type"
Allocates mbuf
.Fa m
and initializes it to contain a packet header.
See
.Fn m_gethdr .
.It Fn MEXTMALLOC "struct mbuf *m" "int len" "int how"
Allocates external storage of size
.Fa len
for mbuf
.Fa m .
The
.Fa how
parameter is a choice of
.Dv M_WAIT / M_DONTWAIT
from caller.
The flag
.Dv M_EXT
is set upon success.
.It Fn MEXTADD "struct mbuf *m" "void *buf" "int size" "int type" "void (*free)(struct mbuf *, void *, size_t, void *)" "void *arg"
Adds pre-allocated external storage
.Fa buf
to a normal mbuf
.Fa m ;
the parameters
.Fa size ,
.Fa type ,
.Fa free
and
.Fa arg
describe the external storage.
.Fa size
is the size of the storage,
.Fa type
describes its
.Xr malloc 9
type,
.Fa free
is a free routine (if not the usual one), and
.Fa arg
is a possible argument to the free routine.
The flag
.Dv M_EXT
is set upon success.
If a free routine is specified, it will be called when the mbuf is freed.
In the case of former, the first argument for a free routine is the mbuf
.Fa m
and the routine is expected to free it in addition to the external storage
pointed by second argument.
In the case of latter, the first argument for the routine is NULL.
.It Fn MCLGET "struct mbuf *m" "int how"
Allocates and adds an mbuf cluster to a normal mbuf
.Fa m .
The
.Fa how
parameter is a choice of
.Dv M_WAIT / M_DONTWAIT
from caller.
The flag
.Dv M_EXT
is set upon success.
.It Fn m_copy_pkthdr "struct mbuf *to" "struct mbuf *from"
Copies the mbuf pkthdr from mbuf
.Fa from
to mbuf
.Fa to .
.Fa from
must have the type flag
.Dv M_PKTHDR
set, and
.Fa to
must be empty.
.It Fn m_move_pkthdr "struct mbuf *to" "struct mbuf *from"
Moves the mbuf pkthdr from mbuf
.Fa from
to mbuf
.Fa to .
.Fa from
must have the type flag
.Dv M_PKTHDR
set, and
.Fa to
must be empty.
The flag
.Dv M_PKTHDR
in mbuf
.Fa from
will be cleared.
.It Fn m_remove_pkthdr "struct mbuf *m"
Removes the mbuf pkthdr from mbuf
.Fa m .
.Fa m
must have the flag
.Dv M_PKTHDR
set.
This flag will be cleared.
.It Fn m_align "struct mbuf *m" "int len"
Sets the data pointer of a newly allocated mbuf
.Fa m
to
.Fa len
bytes from the end of the mbuf data area, so that
.Fa len
bytes of data written to the mbuf
.Fa m ,
starting at the data pointer, will be aligned to the end of the data area.
.It Fn M_LEADINGSPACE "struct mbuf *m"
Returns the amount of space available before the current start of valid
data in mbuf
.Fa m .
Returns 0 if the mbuf data part is shared across multiple mbufs
.Pq i.e. not writable .
.It Fn M_TRAILINGSPACE "struct mbuf *m"
Returns the amount of space available after the current end of valid
data in mbuf
.Fa m .
Returns 0 if the mbuf data part is shared across multiple mbufs
.Pq i.e. not writable .
.It Fn M_PREPEND "struct mbuf *m" "int plen" "int how"
Prepends space of size
.Fa plen
to mbuf
.Fa m .
If a new mbuf must be allocated,
.Fa how
specifies whether to wait.
If
.Fa how
is
.Dv M_DONTWAIT
and allocation fails, the original mbuf chain is freed and
.Fa m
is set to
.Dv NULL .
It is illegal for the
.Fa plen
parameter to be greater than
.Dv MHLEN .
.It Fn MCHTYPE "struct mbuf *m" "int type"
Change mbuf
.Fa m
to new type
.Fa type .
.El
.Sh CODE REFERENCES
The
.Nm
management functions are implemented within the file
.Pa sys/kern/uipc_mbuf.c .
Function prototypes, and the functions implemented as macros
are located in
.Pa sys/sys/mbuf.h .
.Sh SEE ALSO
.Pa /usr/share/doc/reference/ref9/net ,
.Xr netstat 1 ,
.Xr m_tag 9 ,
.Xr malloc 9
.Rs
.%A Jun-ichiro Hagino
.%T "Mbuf issues in 4.4BSD IPv6/IPsec support (experiences from KAME IPv6/IPsec implementation)"
.%B "Proceedings of the freenix track: 2000 USENIX annual technical conference"
.%D June 2000
.Re
.Sh AUTHORS
.An -nosplit
The original mbuf data structures were designed by Rob Gurwitz
when he did the initial TCP/IP implementation at BBN.
.Pp
Further extensions and enhancements were made by Bill Joy, Sam Leffler,
and Mike Karels at CSRG.
.Pp
Current implementation of external storage by
.An Matt Thomas
.Aq matt@3am-software.com
and
.An Jason R. Thorpe
.Aq thorpej@NetBSD.org .