.\" $NetBSD: vnodeops.9,v 1.101 2023/06/15 09:13:36 hannken Exp $
.\"
.\" Copyright (c) 2001, 2005, 2006 The NetBSD Foundation, Inc.
.\" All rights reserved.
.\"
.\" This code is derived from software contributed to The NetBSD Foundation
.\" by Gregory McGarry.
.\"
.\" Redistribution and use in source and binary forms, with or without
.\" modification, are permitted provided that the following conditions
.\" are met:
.\" 1. Redistributions of source code must retain the above copyright
.\" notice, this list of conditions and the following disclaimer.
.\" 2. Redistributions in binary form must reproduce the above copyright
.\" notice, this list of conditions and the following disclaimer in the
.\" documentation and/or other materials provided with the distribution.
.\"
.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
.\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
.\" POSSIBILITY OF SUCH DAMAGE.
.\"
.Dd June 15, 2023
.Dt VNODEOPS 9
.Os
.Sh NAME
.Nm vnodeops ,
.Nm VOP_LOOKUP ,
.Nm VOP_CREATE ,
.Nm VOP_MKNOD ,
.Nm VOP_OPEN ,
.Nm VOP_CLOSE ,
.Nm VOP_ACCESS ,
.Nm VOP_GETATTR ,
.Nm VOP_SETATTR ,
.Nm VOP_READ ,
.Nm VOP_WRITE ,
.Nm VOP_FALLOCATE ,
.Nm VOP_FDISCARD ,
.Nm VOP_IOCTL ,
.Nm VOP_FCNTL ,
.Nm VOP_POLL ,
.Nm VOP_KQFILTER ,
.Nm VOP_REVOKE ,
.Nm VOP_MMAP ,
.Nm VOP_FSYNC ,
.Nm VOP_SEEK ,
.Nm VOP_REMOVE ,
.Nm VOP_LINK ,
.Nm VOP_RENAME ,
.Nm VOP_MKDIR ,
.Nm VOP_RMDIR ,
.Nm VOP_SYMLINK ,
.Nm VOP_READDIR ,
.Nm VOP_READLINK ,
.Nm VOP_ABORTOP ,
.Nm VOP_INACTIVE ,
.Nm VOP_RECLAIM ,
.Nm VOP_LOCK ,
.Nm VOP_UNLOCK ,
.Nm VOP_ISLOCKED ,
.Nm VOP_BMAP ,
.Nm VOP_PRINT ,
.Nm VOP_PATHCONF ,
.Nm VOP_ADVLOCK ,
.Nm VOP_WHITEOUT ,
.Nm VOP_GETPAGES ,
.Nm VOP_PUTPAGES ,
.Nm VOP_STRATEGY ,
.Nm VOP_BWRITE ,
.Nm VOP_GETEXTATTR ,
.Nm VOP_SETEXTATTR ,
.Nm VOP_LISTEXTATTR ,
.Nm VOP_DELETEEXTATTR
.Nd vnode operations
.Sh SYNOPSIS
.In sys/param.h
.In sys/buf.h
.In sys/dirent.h
.In sys/vnode.h
.In sys/mount.h
.In sys/namei.h
.In sys/unistd.h
.In sys/fcntl.h
.In sys/lockf.h
.In sys/extattr.h
.Ft int
.Fn VOP_LOOKUP "struct vnode *dvp" "struct vnode **vpp" \
"struct componentname *cnp"
.Ft int
.Fn VOP_CREATE "struct vnode *dvp" "struct vnode **vpp" \
"struct componentname *cnp" "struct vattr *vap"
.Ft int
.Fn VOP_MKNOD "struct vnode *dvp" "struct vnode **vpp" \
"struct componentname *cnp" "struct vattr *vap"
.Ft int
.Fn VOP_OPEN "struct vnode *vp" "int mode" "kauth_cred_t cred"
.Ft int
.Fn VOP_CLOSE "struct vnode *vp" "int fflag" "kauth_cred_t cred"
.Ft int
.Fn VOP_ACCESS "struct vnode *vp" "int mode" "kauth_cred_t cred"
.Ft int
.Fn VOP_GETATTR "struct vnode *vp" "struct vattr *vap" "kauth_cred_t cred"
.Ft int
.Fn VOP_SETATTR "struct vnode *vp" "struct vattr *vap" "kauth_cred_t cred"
.Ft int
.Fn VOP_READ "struct vnode *vp" "struct uio *uio" "int ioflag" \
"kauth_cred_t cred"
.Ft int
.Fn VOP_WRITE "struct vnode *vp" "struct uio *uio" "int ioflag" \
"kauth_cred_t cred"
.Ft int
.Fn VOP_FALLOCATE "struct vnode *vp" "off_t pos" "off_t len"
.Ft int
.Fn VOP_FDISCARD "struct vnode *vp" "off_t pos" "off_t len"
.Ft int
.Fn VOP_IOCTL "struct vnode *vp" "u_long command" "void *data" \
"int fflag" "kauth_cred_t cred"
.Ft int
.Fn VOP_FCNTL "struct vnode *vp" "u_int command" "void *data" \
"int fflag" "kauth_cred_t cred"
.Ft int
.Fn VOP_POLL "struct vnode *vp" "int events"
.Ft int
.Fn VOP_KQFILTER "struct vnode *vp" "struct knote *kn"
.Ft int
.Fn VOP_REVOKE "struct vnode *vp" "int flags"
.Ft int
.Fn VOP_MMAP "struct vnode *vp" "vm_prot_t prot" "kauth_cred_t cred"
.Ft int
.Fn VOP_FSYNC "struct vnode *vp" "kauth_cred_t cred" "int flags" \
"off_t offlo" "off_t offhi"
.Ft int
.Fn VOP_SEEK "struct vnode *vp" "off_t oldoff" "off_t newoff" \
"kauth_cred_t cred"
.Ft int
.Fn VOP_REMOVE "struct vnode *dvp" "struct vnode *vp" \
"struct componentname *cnp"
.Ft int
.Fn VOP_LINK "struct vnode *dvp" "struct vnode *vp" \
"struct componentname *cnp"
.Ft int
.Fn VOP_RENAME "struct vnode *fdvp" "struct vnode *fvp" \
"struct componentname *fcnp" "struct vnode *tdvp" \
"struct vnode *tvp" "struct componentname *tcnp"
.Ft int
.Fn VOP_MKDIR "struct vnode *dvp" "struct vnode **vpp" \
"struct componentname *cnp" "struct vattr *vap"
.Ft int
.Fn VOP_RMDIR "struct vnode *dvp" "struct vnode *vp" \
"struct componentname *cnp"
.Ft int
.Fn VOP_SYMLINK "struct vnode *dvp" "struct vnode **vpp" \
"struct componentname *cnp" "struct vattr *vap" "char *target"
.Ft int
.Fn VOP_READDIR "struct vnode *vp" "struct uio *uio" \
"kauth_cred_t cred" "int *eofflag" "off_t **cookies" "int *ncookies"
.Ft int
.Fn VOP_READLINK "struct vnode *vp" "struct uio *uio" "kauth_cred_t cred"
.Ft int
.Fn VOP_ABORTOP "struct vnode *dvp" "struct componentname *cnp"
.Ft int
.Fn VOP_INACTIVE "struct vnode *vp"
.Ft int
.Fn VOP_RECLAIM "struct vnode *vp"
.Ft int
.Fn VOP_LOCK "struct vnode *vp" "int flags"
.Ft int
.Fn VOP_UNLOCK "struct vnode *vp"
.Ft int
.Fn VOP_ISLOCKED "struct vnode *vp"
.Ft int
.Fn VOP_BMAP "struct vnode *vp" "daddr_t bn" "struct vnode **vpp" \
"daddr_t *bnp" "int *runp"
.Ft int
.Fn VOP_PRINT "struct vnode *vp"
.Ft int
.Fn VOP_PATHCONF "struct vnode *vp" "int name" "register_t *retval"
.Ft int
.Fn VOP_ADVLOCK "struct vnode *vp" "void *id" "int op" \
"struct flock *fl" "int flags"
.Ft int
.Fn VOP_WHITEOUT "struct vnode *dvp" "struct componentname *cnp" \
"int flags"
.Ft int
.Fn VOP_GETPAGES "struct vnode *vp" "voff_t offset" "struct vm_page **m" \
"int *count" "int centeridx" "vm_prot_t access_type" "int advice" "int flags"
.Ft int
.Fn VOP_PUTPAGES "struct vnode *vp" "voff_t offlo" "voff_t offhi" \
"int flags"
.Ft int
.Fn VOP_STRATEGY "struct vnode *vp" "struct buf *bp"
.Ft int
.Fn VOP_BWRITE "struct vnode *vp" "struct buf *bp"
.Ft int
.Fn VOP_GETEXTATTR "struct vnode *vp" "int attrnamespace" "const char *name" \
"struct uio *uio" "size_t *size" "kauth_cred_t cred"
.Ft int
.Fn VOP_SETEXTATTR "struct vnode *vp" "int attrnamespace" "const char *name" \
"struct uio *uio" "kauth_cred_t cred"
.Ft int
.Fn VOP_LISTEXTATTR "struct vnode *vp" "int attrnamespace" "struct uio *uio" \
"size_t *size" "kauth_cred_t cred"
.Ft int
.Fn VOP_DELETEEXTATTR "struct vnode *vp" "int attrnamespace" \
"const char *name" "kauth_cred_t cred"
.Pp
Not all header files are required for each function.
.Sh DESCRIPTION
The vnode operations vector describes what operations can be done to
the file associated with the vnode.
The system maintains one vnode operations vector for each file system
type configured into the kernel.
The vnode operations vector contains a pointer to a function for each
operation supported by the file system.
Many of the functions described in the vnode operations vector are
closely related to their corresponding system calls.
In most cases, they are called as a result of the system call
associated with the operation being invoked.
.Pp
Functions in the vnode operations vector are invoked using specialized
macros.
The following table gives a summary of the operations.
.Pp
.Bl -column "VOP_DELETEEXTATTR" "Wake up process sleeping on lock" -compact
.It Sy Macro Ta Sy Description
.It VOP_LOOKUP Lookup file name in name cache
.It VOP_CREATE Create a new file
.It VOP_MKNOD Make a new device
.It VOP_OPEN Open a file
.It VOP_CLOSE Close a file
.It VOP_ACCESS Determine file accessibility
.It VOP_GETATTR Get file attributes
.It VOP_SETATTR Set file attributes
.It VOP_READ Read from a file
.It VOP_WRITE Write to a file
.It VOP_FALLOCATE Allocate backing for a file
.It VOP_FDISCARD Discard backing for a file
.It VOP_IOCTL Perform device-specific I/O
.It VOP_FCNTL Perform file control
.It VOP_POLL Test if poll event has occurred
.It VOP_KQFILTER Register a knote
.It VOP_REVOKE Eliminate vnode activity
.It VOP_MMAP Map file into user address space
.It VOP_FSYNC Flush pending data to disk
.It VOP_SEEK Test if file is seekable
.It VOP_REMOVE Remove a file
.It VOP_LINK Link a file
.It VOP_RENAME Rename a file
.It VOP_MKDIR Make a new directory
.It VOP_RMDIR Remove a directory
.It VOP_SYMLINK Create a symbolic link
.It VOP_READDIR Read directory entry
.It VOP_READLINK Read contents of a symlink
.It VOP_ABORTOP Abort pending operation
.It VOP_INACTIVE Release the inactive vnode
.It VOP_RECLAIM Reclaim vnode for another file
.It VOP_LOCK Sleep until vnode lock is free
.It VOP_UNLOCK Wake up process sleeping on lock
.It VOP_ISLOCKED Test if vnode is locked
.It VOP_BMAP Logical block number conversion
.It VOP_PRINT Print debugging information
.It VOP_PATHCONF Return POSIX pathconf data
.It VOP_ADVLOCK Advisory record locking
.It VOP_WHITEOUT Whiteout vnode
.It VOP_GETPAGES Read VM pages from file
.It VOP_PUTPAGES Write VM pages to file
.It VOP_STRATEGY Read/write a file system buffer
.It VOP_BWRITE Write a file system buffer
.It VOP_GETEXTATTR Get extended attribute
.It VOP_SETEXTATTR Set extended attribute
.It VOP_LISTEXTATTR List extended attributes
.It VOP_DELETEEXTATTR Remove extended attribute
.El
.Pp
The implementation details of the vnode operations vector are not
quite what is described here.
.Pp
If the file system type does not support a specific operation, it must
nevertheless assign an appropriate stub in the vnode operations
vector to do the minimum required of it.
In most cases, such functions either do nothing or return an error
value to the effect that it is not supported.
.Pp
Many of the functions in the vnode operations vector take a
componentname structure.
It is used to encapsulate many parameters into a single function
argument.
It has the following structure:
.Bd -literal
struct componentname {
/*
* Arguments to lookup.
*/
uint32_t cn_nameiop; /* namei operation */
uint32_t cn_flags; /* flags to namei */
kauth_cred_t cn_cred; /* credentials */
/*
* Shared between lookup and commit routines.
*/
const char *cn_nameptr; /* pointer to looked up name */
size_t cn_namelen; /* length of looked up component */
size_t cn_consume; /* chars to consume in lookup() */
};
.Ed
.Pp
The top half of the structure is used exclusively for the pathname
lookups using
.Fn VOP_LOOKUP
and is initialized by the caller.
The semantics of the lookup are affected by the lookup operation
specified in
.Em cn_nameiop
and the flags specified in
.Em cn_flags .
Valid operations are:
.Pp
.Bl -tag -offset indent -width LOOKUP -compact
.It LOOKUP
perform name lookup only
.It CREATE
set up for file creation
.It DELETE
set up for file deletion
.It RENAME
set up for file renaming
.It OPMASK
mask for operation
.El
.Pp
Valid values for
.Em cn->cn_flags
are:
.Pp
.Bl -tag -offset indent -width LOCKPARENT -compact
.It LOCKLEAF
lock inode on return
.It LOCKPARENT
want parent vnode returned locked
.It NOCACHE
name must not be left in name cache (see
.Xr namecache 9 )
.It FOLLOW
follow symbolic links
.It NOFOLLOW
do not follow symbolic links (pseudo)
.It MODMASK
mask of operational modifiers
.El
.Pp
No vnode operations may be called from interrupt context.
Most operations also require the vnode to be locked on entry.
To prevent deadlocks, when acquiring locks on multiple vnodes, the
lock of parent directory must be acquired before the lock on the child
directory.
.Pp
Vnode operations for a file system type generally should not be
called directly from the kernel, but accessed indirectly through the
high-level convenience functions discussed in
.Xr vnsubr 9 .
.Sh FUNCTIONS
.Bl -tag -width compact
.It Fn VOP_LOOKUP "dvp" "vpp" "cnp"
Lookup a single pathname component in a given directory.
The argument
.Fa dvp
is the locked vnode of the directory to search and
.Fa cnp
is the pathname component to be searched for.
If the pathname component is found, the address of the resulting
unlocked vnode is returned in
.Fa vpp .
The operation specified in
.Em cnp->cn_nameiop
indicates
.Fn VOP_LOOKUP
the reason for requesting the lookup and uses it to cache
file system type specific information in the vnode for subsequent
operations.
.Pp
There are three types of lookups: ".", ".." (ISDOTDOT), and regular.
If the pathname component being searched for is ".", then
.Fa dvp
has an extra reference added to it and it is returned in
.Fa *vpp .
For other pathname components,
.Fn VOP_LOOKUP
checks the accessibility of the directory and searches the name cache
for the pathname component.
See
.Xr namecache 9 .
If the pathname is not found in the name cache, the directory is
searched for the pathname.
The resulting unlocked vnode is returned in
.Fa vpp .
.Fa dvp
is always returned locked.
.Pp
On failure
.Fa *vpp
is
.Dv NULL ,
and
.Fa *dvp
is left locked.
If the operation is successful
.Fa *vpp
is unlocked and zero is returned.
Typically, if
.Fa *vpp
and
.Fa dvp
are the same vnode the caller will need to release twice (decrement
the reference count) and unlock once.
.It Fn VOP_CREATE "dvp" "vpp" "cnp" "vap"
Create a new file in a given directory.
The argument
.Fa dvp
is the locked vnode of the directory to create the new file in and
.Fa cnp
is the pathname component of the new file.
The argument
.Fa vap
specifies the attributes that the new file should be created with.
If the file is successfully created, the address of the resulting
unlocked vnode is returned in
.Fa vpp
and zero is returned.
.Pp
This function is called after
.Fn VOP_LOOKUP
when a file is being created.
Normally,
.Fn VOP_LOOKUP
will have set the SAVENAME flag in
.Em cnp->cn_flags
to keep the memory pointed to by
.Em cnp->cn_pnbuf
valid.
If an error is detected when creating the file, this memory is
released.
If the file is created successfully it will be released unless the
SAVESTART flags in specified in
.Em cnp->cn_flags .
.It Fn VOP_MKNOD "dvp" "vpp" "cnp" "vap"
Make a new device-special file in a given directory.
The argument
.Fa dvp
is the locked vnode of the directory to create the new device-special
file in and
.Fa cnp
is the pathname component of the new device-special file.
The argument
.Fa vap
specifies the attributes that the new device-special file should be
created with.
If the file is successfully created, the address of the resulting
unlocked vnode is returned in
.Fa vpp
and zero is returned.
.Pp
This function is called after
.Fn VOP_LOOKUP
when a device-special file is being created.
Normally,
.Fn VOP_LOOKUP
will have set the SAVENAME flag in
.Em cnp->cn_flags
to keep the memory pointed to by
.Em cnp->cn_pnbuf
valid.
If an error is detected when creating the device-special file,
this memory is released.
If the device-special file is created successfully it will be released
unless the SAVESTART flags in specified in
.Em cnp->cn_flags .
.It Fn VOP_OPEN "vp" "mode" "cred"
Open a file.
The argument
.Fa vp
is the vnode of the file to open and
.Fa mode
specifies the access mode required by the calling process.
The calling credentials are specified by
.Fa cred .
The access mode is a set of flags, including FREAD, FWRITE,
O_NONBLOCK, O_APPEND, etc.
.Fn VOP_OPEN
must be called before a file can be accessed by a thread.
The vnode reference count is incremented.
.Pp
.Fn VOP_OPEN
expects the vnode
.Fa vp
to be locked on entry and will leave it locked on return.
If the operation is successful zero is returned, otherwise an
appropriate error code is returned.
.It Fn VOP_CLOSE "vp" "fflag" "cred"
Close a file.
The argument
.Fa vp
is the vnode of the file to close and
.Fa fflag
specifies the access mode by the calling process.
The possible flags are
.Dv FREAD ,
.Dv FWRITE
and
.Dv FNONBLOCK .
The calling credentials are specified by
.Fa cred .
.Fn VOP_CLOSE
frees resources allocated by
.Fn VOP_OPEN .
.Pp
The vnode
.Fa vp
will be locked on entry and should remain locked on return.
.It Fn VOP_ACCESS "vp" "mode" "cred"
Determine the accessibility (permissions) of the file against the
specified credentials.
The argument
.Fa vp
is the vnode of the file to check,
.Fa mode
is the type of access required and
.Fa cred
contains the user credentials to check.
The argument
.Fa mode
is a mask which can contain VREAD, VWRITE or VEXEC.
If the file is accessible in the specified way, zero is returned,
otherwise an appropriate error code is returned.
.Pp
The vnode
.Fa vp
will be locked on entry and should remain locked on return.
.It Fn VOP_GETATTR "vp" "vap" "cred"
Get specific vnode attributes on a file.
The argument
.Fa vp
is the vnode of the file to get the attributes for.
The argument
.Fa cred
specifies the calling credentials.
.Fn VOP_GETATTR
uses the file system type specific data object
.Em vp->v_data
to reference the underlying file attributes.
.\" Attributes associated with the file are collected by setting the
.\" required attribute bits in
.\" .Em vap->va_mask .
The attributes are returned in
.Fa vap .
Attributes which are not available are set to the value VNOVAL.
.Pp
For more information on vnode attributes see
.Xr vattr 9 .
Historically it was considered acceptable to call
.Fn VOP_GETATTR
without first locking the vnode.
This usage is deprecated.
.Pp
The vnode
.Fa vp
will be locked on entry and should remain locked on return.
.It Fn VOP_SETATTR "vp" "vap" "cred"
Set specific vnode attributes on a file.
The argument
.Fa vp
is the locked vnode of the file to set the attributes for.
The argument
.Fa cred
specifies the calling credentials.
.Fn VOP_SETATTR
uses the file system type specific data object
.Em vp->v_data
to reference the underlying file attributes.
The new attributes are defined in
.Fa vap .
.\" Attributes associated with the file are set by setting the required
.\" attribute bits in
.\" .Em vap->va_mask .
Attributes which are not being modified by
.Fn VOP_SETATTR
should be set to the value VNOVAL.
If the operation is successful zero is returned, otherwise an
appropriate error is returned.
.Pp
For more information on vnode attributes see
.Xr vattr 9 .
.It Fn VOP_READ "vp" "uio" "ioflag" "cred"
Read the contents of a file.
The argument
.Fa vp
is the vnode of the file to read from,
.Fa uio
is the location to read the data into,
.Fa ioflag
is a set of flags and
.Fa cred
are the credentials of the calling process.
.Pp
The
.Fa ioflag
argument is used to give directives and hints to the file system.
When attempting a read, the high 16 bits are used to provide a
read-ahead hint (in unit of file system blocks) that the file system
should attempt.
The low 16 bits are a bit mask which can contain the following flags:
.Pp
.Bl -tag -offset indent -width IO_ALTSEMANTICS -compact
.It IO_UNIT
do I/O as atomic unit
.It IO_APPEND
append write to end
.It IO_SYNC
sync I/O file integrity completion
.It IO_NODELOCKED
underlying node already locked
.It IO_NDELAY
FNDELAY flag set in file table
.It IO_DSYNC
sync I/O data integrity completion
.It IO_ALTSEMANTICS
use alternate I/O semantics
.It IO_NORMAL
operate on regular data
.It IO_EXT
operate on extended attributes
.It IO_DIRECT
do not buffer data in the kernel
.El
.Pp
Zero is returned on success, otherwise an error is returned.
The vnode should be locked on entry and remains locked on exit.
.It Fn VOP_WRITE "vp" "uio" "ioflag" "cred"
Write to a file.
The argument
.Fa vp
is the vnode of the file to write to,
.Fa uio
is the location of the data to write,
.Fa ioflag
is a set of flags and
.Fa cred
are the credentials of the calling process.
.Pp
The
.Fa ioflag
argument is used to give directives and hints to the file system.
The low 16 bits are a bit mask which can contain the same flags as
.Fn VOP_READ .
.Pp
Zero is returned on success, otherwise an error is returned.
The vnode should be locked on entry and remains locked on exit.
.It Fn VOP_FALLOCATE "vp" "pos" "len"
Allocate backing store.
The argument
.Fa vp
is the vnode for the file.
The
.Fa pos
and
.Fa len
arguments (specified in bytes) name an extent within the file.
The blocks underlying this range, rounding up at the top and down at
the bottom if needed, are checked; if no physical storage is
allocated, a physical block is allocated and zeroed.
This operation removes
.Dq holes
from files.
.It Fn VOP_FDISCARD "vp" "pos" "len"
Discard backing store.
The argument
.Fa vp
is the vnode for the file.
The
.Fa pos
and
.Fa len
arguments (specified in bytes) name an extent within the file.
The blocks underlying this range, rounding down at the top and up at
the bottom if needed, are checked.
If any physical storage is used, it is deallocated.
This operation creates
.Dq holes
in files.
Discarded blocks of regular files read back afterwards as zeroes.
On devices, the underlying discard-block operation if any (e.g. ATA
TRIM) is issued.
The device handles this as it sees fit.
In particular it is
.Em not
guaranteed that discarded blocks on devices will be zeroed; reading a
discarded block might produce zeros, or ones, or the previously
existing data, or some other data, or trash.
.\" XXX: if you discard part of a block in a regular file, should that
.\" part be explicitly zeroed? Also, how do you find the underlying
.\" block size?
.It Fn VOP_IOCTL "vp" "command" "data" "fflag" "cred"
Perform device-specific I/O.
The argument
.Fa vp
is the vnode of the file, normally representing a device.
The argument
.Fa command
specifies the device-specific operation to perform and
.Fa cnp
provides extra data for the specified operation.
The argument
.Fa fflags
is a set of flags.
The argument
.Fa cred
is the caller's credentials.
If the operation is successful, zero is
returned, otherwise an appropriate error code is returned.
.Pp
Most file systems do not supply a function for
.Fn VOP_IOCTL .
This function implements the
.Xr ioctl 2
system call.
.It Fn VOP_FCNTL "vp" "command" "data" "fflag" "cred"
Perform file control.
The argument
.Fa vp
is the locked vnode of the file.
The argument
.Fa command
specifies the operation to perform and
.Fa cnp
provides extra data for the specified operation.
The argument
.Fa fflags
is a set of flags.
The argument
.Fa cred
is the caller's credentials.
If the operation is successful, zero is returned, otherwise an
appropriate error code is returned.
.It Fn VOP_POLL "vp" "events"
Test if a poll event has occurred.
The argument
.Fa vp
is the vnode of the file to poll.
It returns any events of interest as specified by
.Fa events
that may have occurred for the file.
The argument
.Fa events
is a set of flags as specified by
.Xr poll 2 .
.Pp
The vnode
.Fa vp
remains unlocked throughout the whole operation.
.It Fn VOP_KQFILTER "vp" "kn"
Register a knote
.Fa kn
with the vnode
.Fa vn .
If the operation is successful zero is returned, otherwise an
appropriate error code is returned.
.Pp
The vnode
.Fa vp
remains unlocked throughout the whole operation.
.It Fn VOP_REVOKE "vp" "flags"
Eliminate all activity associated with the vnode
.Fa vp .
The argument
.Fa flags
is a set of flags.
If REVOKEALL is set in
.Fa flags
all vnodes aliased to the vnode
.Fa vp
are also eliminated.
If the operation is successful zero is returned, otherwise an
appropriate error is returned.
.Pp
The vnode
.Fa vp
remains unlocked throughout the whole operation.
.It Fn VOP_MMAP "vp" "prot" "cred"
Inform file system that
.Fa vp
is in the process of being memory mapped.
The argument
.Fa prot
specifies the vm access protection the vnode is going to be mapped with.
The argument
.Fa cred
is the caller's credentials.
If the file system allows the memory mapping, zero is returned, otherwise
an appropriate error code is returned.
.Pp
Most file systems do not supply a function for
.Fn VOP_MMAP
and use
.Fn genfs_mmap
to default for success.
Only file systems which do not integrate with the page cache at all
typically want to disallow memory mapping.
.It Fn VOP_FSYNC "vp" "cred" "flags" "offlo" "offhi"
Flush pending data buffers for a file to disk.
The argument
.Fa vp
is the locked vnode of the file for flush.
The argument
.Fa cred
is the caller's credentials.
The argument
.Fa flags
is a set of flags.
If FSYNC_WAIT is specified in
.Fa flags ,
the function should wait for I/O to complete before returning.
The argument
.Fa offlo
and
.Fa offhi
specify the range of file to flush.
If the operation is successful zero is returned, otherwise an
appropriate error code is returned.
.Pp
This function implements the
.Xr sync 2
and
.Xr fsync 2
system calls.
.It Fn VOP_SEEK "vp" "oldoff" "newoff" "cred"
Test if the file is seekable for the specified offset
.Fa newoff .
The argument
.Fa vp
is the locked vnode of the file to test.
For most file systems this function simply tests if
.Fa newoff
is valid.
If the specified
.Fa newoff
is less than zero, the function returns error code EINVAL.
.It Fn VOP_REMOVE "dvp" "vp" "cnp"
Remove a file.
The argument
.Fa dvp
is the locked vnode of the directory to remove the file from and
.Fa vp
is the locked vnode of the file to remove.
The argument
.Fa cnp
is the pathname component about the file to remove.
If the operation is successful zero is returned, otherwise an
appropriate error code is returned.
Both
.Fa dvp
and
.Fa vp
are locked on entry and are to be unlocked before returning.
.It Fn VOP_LINK "dvp" "vp" "cnp"
Link to a file.
The argument
.Fa dvp
is the locked node of the directory to create the new link and
.Fa vp
is the vnode of the file to be linked.
The argument
.Fa cnp
is the pathname component of the new link.
If the operation is successful zero is returned, otherwise an error
code is returned.
The directory vnode
.Fa dvp
should be locked on entry and will be released and unlocked on return.
The vnode
.Fa vp
should not be locked on entry and will remain unlocked on return.
.It Fn VOP_RENAME "fdvp" "fvp" "fcnp" "tdvp" "tvp" "tcnp"
Rename a file.
The argument
.Fa fdvp
is the vnode of the old parent directory containing in the file to be
renamed and
.Fa fvp
is the vnode of the file to be renamed.
The argument
.Fa fcnp
is the pathname component about the file to be renamed.
The argument
.Fa tdvp
is the vnode of the new directory of the target file and
.Fa tvp
is the vnode of the target file (if it exists).
The argument
.Fa tcnp
is the pathname component about the file's new name.
If the operation is successful zero is returned, otherwise an error
code is returned.
.Pp
The caller must hold the target file system's rename lock.
The source directory and file vnodes should be unlocked and their
reference counts should be incremented before entry.
The target directory and file vnodes should both be locked on entry.
.Fn VOP_RENAME
updates the reference counts prior to returning.
.Pp
Because of the complexity and nastiness of the interface, please do
not write new code that calls
.Fn VOP_RENAME
directly until such time as ongoing cleanup work reaches a point where
the interface has been rendered halfway sane.
.It Fn VOP_MKDIR "dvp" "vpp" "cnp" "vap"
Make a new directory in a given directory.
The argument
.Fa dvp
is the locked vnode of the directory to create the new directory in and
.Fa cnp
is the pathname component of the new directory.
The argument
.Fa vap
specifies the attributes that the new directory should be created
with.
If the file is successfully created, the address of the resulting
unlocked vnode is returned in
.Fa vpp
and zero is returned.
.Pp
This function is called after
.Fn VOP_LOOKUP
when a directory is being created.
Normally,
.Fn VOP_LOOKUP
will have set the SAVENAME flag in
.Em cnp->cn_flags
to keep the memory pointed to by
.Em cnp->cn_pnbuf
valid.
If an error is detected when creating the directory, this memory is
released.
If the directory is created successfully it will be released unless
the SAVESTART flags in specified in
.Em cnp->cn_flags .
.It Fn VOP_RMDIR "dvp" "vp" "cnp"
Remove a directory in a given directory.
The argument
.Fa dvp
is the locked vnode of the directory to remove the directory from and
.Fa vp
is the locked vnode of the directory to remove.
The argument
.Fa cnp
is the pathname component of the directory.
Zero is returned on success, otherwise an error code is returned.
Both
.Fa dvp
and
.Fa vp
should be locked on entry and will be released and unlocked on return.
.It Fn VOP_SYMLINK "dvp" "vpp" "cnp" "vap" "target"
Create a symbolic link in a given directory.
The argument
.Fa dvp
is the locked vnode of the directory to create the symbolic link in
and
.Fa cnp
is the pathname component of the symbolic link.
The argument
.Fa vap
specifies the attributes that the symbolic link should be created
with and
.Fa target
specifies the pathname of the target of the symbolic link.
If the symbolic link is successfully created, the address of the
resulting unlocked vnode is returned in
.Fa vpp
and zero is returned.
.Pp
This function is called after
.Fn VOP_LOOKUP
when a symbolic link is being created.
Normally,
.Fn VOP_LOOKUP
will have set the SAVENAME flag in
.Em cnp->cn_flags
to keep the memory pointed to by
.Em cnp->cn_pnbuf
valid.
If an error is detected when creating the symbolic link, this memory
is released.
If the symbolic link is created successfully it will be released
unless the SAVESTART flags in specified in
.Em cnp->cn_flags .
.It Fn VOP_READDIR "vp" "uio" "cred" "eofflag" "cookies" "ncookies"
Read directory entry.
The argument
.Fa vp
is the vnode of the directory to read the contents of and
.Fa uio
is the destination location to read the contents into.
The argument
.Fa cred
is the caller's credentials.
The argument
.Fa eofflag
is the pointer to a flag which is set by
.Fn VOP_READDIR
to indicate an end-of-file condition.
If
.Fa eofflag
is
.Dv NULL ,
the end-of-file condition is not returned.
The arguments
.Fa cookies
and
.Fa ncookies
specify the addresses for the list and number of directory seek
cookies generated for NFS.
Both
.Fa cookies
and
.Fa ncookies
should be
.Dv NULL
if they aren't required to be returned by
.Fn VOP_READDIR .
The directory contents are read into struct dirent structures and
.Fa uio->uio_offset
is set to the offset of the next unread directory entry.
This offset may be used in a following invocation to continue a
sequential read of the directory contents.
If the operation is successful zero is returned, otherwise an
appropriate error code is returned.
.Pp
The directory should be locked on entry and will remain locked on
return.
.Pp
In case
.Fa ncookies
and
.Fa cookies
are supplied, one cookie should be returned per directory entry.
The value of the cookie for each directory entry should be the offset
within the directory where the on-disk version of the following
directory entry starts.
That is, for each directory entry
.Fa i ,
the corresponding cookie should refer to the offset of directory entry
.Fa i + 1 .
.Pp
Note that the
.Fa cookies
array must be allocated by the callee using the M_TEMP malloc type as
callers of
.Fn VOP_READDIR
must be able to free the allocation.
.It Fn VOP_READLINK "vp" "uio" "cred"
Read the contents of a symbolic link.
The argument
.Fa vp
is the locked vnode of the symlink and
.Fa uio
is the destination location to read the contents into.
The argument
.Fa cred
is the credentials of the caller.
If the operation is successful zero is returned, otherwise an error
code is returned.
.Pp
The vnode should be locked on entry and will remain locked on return.
.It Fn VOP_ABORTOP "dvp" "cnp"
Abort pending operation on vnode
.Fa dvp
and free resources allocated in
.Fa cnp .
.Pp
This operation is rarely implemented in file systems and
.Fn genfs_abortop
is typically used instead.
.It Fn VOP_INACTIVE "vp"
Release the inactive vnode.
.Fn VOP_INACTIVE
is called when the kernel is no longer using the vnode.
This may be because the reference count reaches zero or it may be that
the file system is being forcibly unmounted while there are open
files.
It can be used to reclaim space for open but deleted files.
The argument
.Fa vp
is the locked vnode to be released.
If the operation is successful zero is returned, otherwise an
appropriate error code is returned.
The vnode
.Fa vp
must be locked on entry, and will remain locked on return.
.It Fn VOP_RECLAIM "vp"
Reclaim the vnode for another file system.
.Fn VOP_RECLAIM
is called when a vnode is being reused for a different file system.
Any file system specific resources associated with the vnode should be
freed.
The argument
.Fa vp
is the vnode to be reclaimed.
If the operation is successful zero is returned, otherwise an
appropriate error code is returned.
The vnode
.Fa vp
should be locked on entry, and will be returned unlocked.
.It Fn VOP_LOCK "vp" "flags"
Sleep until vnode lock is free.
The argument
.Fa vp
is the vnode of the file to be locked.
The argument
.Fa flags
is
.Dv LK_EXCLUSIVE
to take the lock exclusively or
.Dv LK_SHARED
to take a shared lock.
If
.Fa flags
contains
.Dv LK_NOWAIT
and the lock is busy, the operation will return immediately with an error code.
If
.Fa flags
contains
.Dv LK_RETRY
this is a hint the caller wants the lock on dead vnodes too.
If the operation is successful zero is returned, otherwise an
appropriate error code is returned.
.Fn VOP_LOCK
is used to serialize access to the file system such as to prevent two
writes to the same file from happening at the same time.
Kernel code should use
.Xr vn_lock 9
to lock a vnode rather than calling
.Fn VOP_LOCK
directly.
.It Fn VOP_UNLOCK "vp"
Wake up process sleeping on lock.
The argument
.Fa vp
is the vnode of the file to be unlocked.
If the operation is successful zero is returned, otherwise an
appropriate error code is returned.
.Fn VOP_UNLOCK
is used to serialize access to the file system such as to prevent two
writes to the same file from happening at the same time.
.It Fn VOP_ISLOCKED "vp"
Test if the vnode
.Fa vp
is locked.
Possible return values are
.Dv LK_EXCLUSIVE ,
.Dv LK_SHARED
or 0 for lock held exclusively by the calling thread, shared lock held
by anyone or unlocked, respectively.
.Pp
This function must never be used to make locking decisions at run time:
it is provided only for diagnostic purposes.
.It Fn VOP_BMAP "vp" "bn" "vpp" "bnp" "runp"
Convert the logical block number
.Fa bn
of a file specified by vnode
.Fa vp
to its physical block number on the disk.
The physical block is returned in
.Fa bnp .
In case the logical block is not allocated, \-1 is used.
.Pp
If
.Fa vpp
is not
.Dv NULL ,
the vnode of the device vnode for the file system is
returned in the address specified by
.Fa vpp .
If
.Fa runp
is not
.Dv NULL ,
the number of contiguous blocks starting from the next block after
the queried block will be returned in
.Fa runp .
.It Fn VOP_PRINT "vp"
Print debugging information.
The argument
.Fa vp
is the vnode to print.
If the operation is successful zero is returned, otherwise an
appropriate error code is returned.
.It Fn VOP_PATHCONF "vp" "name" "retval"
Implement POSIX
.Xr pathconf 2
and
.Xr fpathconf 2
support.
The argument
.Fa vp
is the locked vnode to get information about.
The argument
.Fa name
specified the type of information to return.
The information is returned in the address specified by
.Fa retval .
Valid values for
.Fa name
are:
.Pp
.Bl -tag -offset indent -width _PC_CHOWN_RESTRICTED -compact
.It _PC_LINK_MAX
return the maximum number of links to a file
.It _PC_NAME_MAX
return the maximum number of bytes in a file name
.It _PC_PATH_MAX
return the maximum number of bytes in a pathname
.It _PC_PIPE_BUF
return the maximum number of bytes which will be written atomically to
a pipe
.It _PC_CHOWN_RESTRICTED
return 1 if appropriate privileges are required for the
.Xr chown 2
system call, otherwise zero
.It _PC_NO_TRUNC
return 0 if file names longer than
.Brq Dv NAME_MAX
are silently truncated
.El
.Pp
If
.Fa name
is recognized,
.Fa *retval
is set to the specified value and zero is returned, otherwise an
appropriate error is returned.
.It Fn VOP_ADVLOCK "vp" "id" "op" "fl" "flags"
Manipulate Advisory record locks on a vnode.
The argument
.Fa vp
is the vnode on which locks are manipulated.
The argument
.Fa id
is the id token which is changing the lock and
.Fa op
is the
.Xr fcntl 2
operation to perform.
Valid values are:
.Pp
.Bl -tag -offset indent -width F_UNLCK -compact
.It F_SETLK
set lock
.It F_GETLK
get the first conflicted lock
.It F_UNLCK
clear lock
.El
.Pp
The argument
.Fa fl
is a description of the lock.
In the case of
.Dv SEEK_CUR ,
The caller should add the current file offset to
fl->l_start beforehand.
.Fn VOP_ADVLOCK
treats
.Dv SEEK_CUR
as
.Dv SEEK_SET .
.Pp
The argument
.Fa flags
is the set of flags.
Valid values are:
.Pp
.Bl -tag -offset indent -width F_FLOCK -compact
.It F_WAIT
wait until lock is granted
.It F_FLOCK
use
.Xr flock 2
semantics for lock
.It F_POSIX
use POSIX semantics for lock
.El
.Pp
If the operation is successful zero is returned, otherwise an
appropriate error is returned.
.It Fn VOP_WHITEOUT "dvp" "cnp" "flags"
Whiteout pathname component in directory with vnode
.Fa dvp .
The argument
.Fa cnp
specifies the pathname component to whiteout.
.Pp
The vnode
.Fa dvp
should be locked on entry and will remain locked on return.
.It Fn VOP_GETPAGES "vp" "offset" "m" "count" "centeridx" "access_type" "advice" "flags"
Read VM pages from file.
The argument
.Fa vp
is the locked vnode to read the VM pages from.
The argument
.Fa offset
is offset in the file to start accessing and
.Fa m
is an array of VM pages.
The argument
.Fa count
points a variable that specifies the number of pages to read.
If the operation is successful zero is returned, otherwise an
appropriate error code is returned.
If PGO_LOCKED is specified in
.Em flags ,
.Fn VOP_GETPAGES
might return less pages than requested.
In that case, the variable pointed to by
.Em count
will be updated.
.Pp
This function is primarily used by the page-fault handing mechanism.
.It Fn VOP_PUTPAGES "vp" "offlo" "offhi" "flags"
Write modified (dirty) VM pages to file.
The argument
.Fa vp
is the vnode to write the VM pages to.
The vnode's vm object lock
.Va ( v_uobj.vmobjlock )
must be held by the caller and will be released upon return.
The arguments
.Fa offlo
and
.Fa offhi
specify the range of VM pages to write.
In case
.Fa offhi
is given as 0, all pages at and after the start offset
.Fa offlo
belonging the vnode
.Fa vp
will be written.
The argument
.Fa flags
controls the behavior of the routine and takes the vm pager's
flags
.Dv ( PGO_ -prefixed ) .
If the operation is successful zero is returned, otherwise an
appropriate error code is returned.
.Pp
The function is primarily used by the pageout handling mechanism and
is commonly implemented indirectly
by
.Fn genfs_putpages
with the help of
.Fn VOP_STRATEGY
and
.Fn VOP_BMAP .
.It Fn VOP_STRATEGY "vp" "bp"
Read/write a file system buffer.
The argument
.Fa vp
is the vnode to read/write to.
The argument
.Fa bp
is the buffer to be read or written.
.Fn VOP_STRATEGY
will either read or write data to the file depending on the value of
.Em bp->b_flags .
If the operation is successful zero is returned, otherwise an
appropriate error code is returned.
.It Fn VOP_BWRITE "vp" "bp"
Write a file system buffer.
The argument
.Fa vp
is the vnode to write to.
The argument
.Fa bp
specifies the buffer to be written.
If the operation is successful zero is returned, otherwise an
appropriate error code is returned.
.It Fn VOP_GETEXTATTR "vp" "attrnamespace" "name" "uio" "size" "cred"
Get an extended attribute.
The argument
.Fa vp
is the locked vnode of the file or directory from which to retrieve the
attribute.
The argument
.Fa attrnamespace
specifies the extended attribute namespace.
The argument
.Fa name
is a nul-terminated character string naming the attribute to retrieve.
The argument
.Fa uio ,
if not
.Dv NULL ,
specifies where the extended attribute value is to be written.
The argument
.Fa size ,
if not
.Dv NULL ,
will contain the number of bytes required to read all of
the attribute data upon return.
In most cases,
.Fa uio
will be
.Dv NULL
when
.Fa size
is not, and vice versa.
The argument
.Fa cred
specifies the user credentials to use when authorizing the request.
.It Fn VOP_SETEXTATTR "vp" "attrnamespace" "name" "uio" "cred"
Set an extended attribute.
The argument
.Fa vp
is the locked vnode of the file or directory to which to store the
attribute.
The argument
.Fa namespace
specifies the extended attribute namespace.
The argument
.Fa name
is a nul-terminated character string naming the attribute to store.
The argument
.Fa uio
specifies the source of the extended attribute data.
The argument
.Fa cred
specifies the user credentials to use when authorizing the request.
.It Fn VOP_LISTEXTATTR "vp" "attrnamespace" "uio" "size" "cred"
Retrieve the list of extended attributes.
The argument
.Fa vp
is the locked vnode of the file or directory whose attributes are to be listed.
The argument
.Fa attrnamespace
specifies the extended attribute namespace.
The argument
.Fa uio ,
if not
.Dv NULL ,
specifies where the extended attribute list is to be written.
The argument
.Fa size ,
if not
.Dv NULL ,
will contain the number of bytes required to read all of
the attribute names upon return.
In most cases,
.Fa uio
will be
.Dv NULL
when
.Fa size
is not, and vice versa.
The argument
.Fa cred
specifies the user credentials to use when authorizing the request.
.It Fn VOP_DELETEEXTATTR "vp" "attrnamespace" "name" "cred"
Remove attribute
.Fa name
from file associated with
.Fa vp .
The argument
.Fa attrnamespace
specifies the extended attribute namespace.
If full removal is not supported, the file system should return
.Er EOPNOTSUPP
to allow the caller to zero out the value with
.Fn VOP_SETEXTATTR .
.Pp
The vnode
.Fa vp
should be locked on entry and will remain locked on return.
.El
.Sh FILES
.Pa src/sys/kern/vnode_if.src
contains the list of vnode functions, their definitions and an exact locking
protocol.
.Sh ERRORS
.Bl -tag -width Er
.It Bq Er EACCES
Access for the specified operation is denied.
.It Bq Er EDQUOT
Quota exceeded.
.It Bq Er EINVAL
attempt to read from an illegal offset in the directory; unrecognized
input
.It Bq Er EIO
a read error occurred while reading the directory or reading the
contents of a symbolic link
.It Bq Er EJUSTRETURN
A CREATE or RENAME operation would be successful.
.It Bq Er ENOATTR
The requested attribute is not defined for this vnode.
.It Bq Er ENOENT
The component was not found in the directory.
.It Bq Er ENOSPC
The file system is full.
.It Bq Er ENOTDIR
The vnode does not represent a directory.
.It Bq Er ENOTEMPTY
attempt to remove a directory which is not empty
.It Bq Er EPERM
an attempt was made to change an immutable file
.It Bq Er EROFS
the file system is read-only
.El
.Sh SEE ALSO
.Xr extattr 9 ,
.Xr intro 9 ,
.Xr namei 9 ,
.Xr vattr 9 ,
.Xr vfs 9 ,
.Xr vfsops 9 ,
.Xr vnode 9
.Sh HISTORY
The vnode operations vector, its functions and the corresponding
macros appeared in
.Bx 4.3 .