Training courses

Kernel and Embedded Linux

Bootlin training courses

Embedded Linux, kernel,
Yocto Project, Buildroot, real-time,
graphics, boot time, debugging...

Bootlin logo

Elixir Cross Referencer

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
/* SPDX-License-Identifier: BSD-3-Clause */
/*  Copyright (c) 2020, Intel Corporation
 *  All rights reserved.
 *
 *  Redistribution and use in source and binary forms, with or without
 *  modification, are permitted provided that the following conditions are met:
 *
 *   1. Redistributions of source code must retain the above copyright notice,
 *      this list of conditions and the following disclaimer.
 *
 *   2. Redistributions in binary form must reproduce the above copyright
 *      notice, this list of conditions and the following disclaimer in the
 *      documentation and/or other materials provided with the distribution.
 *
 *   3. Neither the name of the Intel Corporation nor the names of its
 *      contributors may be used to endorse or promote products derived from
 *      this software without specific prior written permission.
 *
 *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 *  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 *  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 *  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 *  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 *  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 *  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 *  POSSIBILITY OF SUCH DAMAGE.
 */
/*$FreeBSD$*/

/**
 * @file ice_common_txrx.h
 * @brief common Tx/Rx utility functions
 *
 * Contains common utility functions for the Tx/Rx hot path.
 *
 * The functions do depend on the if_pkt_info_t structure. A suitable
 * implementation of this structure must be provided if these functions are to
 * be used without the iflib networking stack.
 */

#ifndef _ICE_COMMON_TXRX_H_
#define _ICE_COMMON_TXRX_H_

#include <netinet/udp.h>
#include <netinet/sctp.h>

/**
 * ice_tso_detect_sparse - detect TSO packets with too many segments
 * @pi: packet information
 *
 * Hardware only transmits packets with a maximum of 8 descriptors. For TSO
 * packets, hardware needs to be able to build the split packets using 8 or
 * fewer descriptors. Additionally, the header must be contained within at
 * most 3 descriptors.
 *
 * To verify this, we walk the headers to find out how many descriptors the
 * headers require (usually 1). Then we ensure that, for each TSO segment, its
 * data plus the headers are contained within 8 or fewer descriptors.
 */
static inline int
ice_tso_detect_sparse(if_pkt_info_t pi)
{
	int count, curseg, i, hlen, segsz, seglen, tsolen, hdrs, maxsegs;
	bus_dma_segment_t *segs = pi->ipi_segs;
	int nsegs = pi->ipi_nsegs;

	curseg = hdrs = 0;

	hlen = pi->ipi_ehdrlen + pi->ipi_ip_hlen + pi->ipi_tcp_hlen;
	tsolen = pi->ipi_len - hlen;

	/* First, count the number of descriptors for the header.
	 * Additionally, make sure it does not span more than 3 segments.
	 */
	i = 0;
	curseg = segs[0].ds_len;
	while (hlen > 0) {
		hdrs++;
		if (hdrs > ICE_MAX_TSO_HDR_SEGS)
			return (1);
		if (curseg == 0) {
			i++;
			if (__predict_false(i == nsegs))
				return (1);

			curseg = segs[i].ds_len;
		}
		seglen = min(curseg, hlen);
		curseg -= seglen;
		hlen -= seglen;
	}

	maxsegs = ICE_MAX_TX_SEGS - hdrs;

	/* We must count the headers, in order to verify that they take up
	 * 3 or fewer descriptors. However, we don't need to check the data
	 * if the total segments is small.
	 */
	if (nsegs <= maxsegs)
		return (0);

	count = 0;

	/* Now check the data to make sure that each TSO segment is made up of
	 * no more than maxsegs descriptors. This ensures that hardware will
	 * be capable of performing TSO offload.
	 */
	while (tsolen > 0) {
		segsz = pi->ipi_tso_segsz;
		while (segsz > 0 && tsolen != 0) {
			count++;
			if (count > maxsegs) {
				return (1);
			}
			if (curseg == 0) {
				i++;
				if (__predict_false(i == nsegs)) {
					return (1);
				}
				curseg = segs[i].ds_len;
			}
			seglen = min(curseg, segsz);
			segsz -= seglen;
			curseg -= seglen;
			tsolen -= seglen;
		}
		count = 0;
	}

	return (0);
}

/**
 * ice_tso_setup - Setup a context descriptor to prepare for a TSO packet
 * @txq: the Tx queue to use
 * @pi: the packet info to prepare for
 *
 * Setup a context descriptor in preparation for sending a Tx packet that
 * requires the TSO offload. Returns the index of the descriptor to use when
 * encapsulating the Tx packet data into descriptors.
 */
static inline int
ice_tso_setup(struct ice_tx_queue *txq, if_pkt_info_t pi)
{
	struct ice_tx_ctx_desc		*txd;
	u32				cmd, mss, type, tsolen;
	int				idx;
	u64				type_cmd_tso_mss;

	idx = pi->ipi_pidx;
	txd = (struct ice_tx_ctx_desc *)&txq->tx_base[idx];
	tsolen = pi->ipi_len - (pi->ipi_ehdrlen + pi->ipi_ip_hlen + pi->ipi_tcp_hlen);

	type = ICE_TX_DESC_DTYPE_CTX;
	cmd = ICE_TX_CTX_DESC_TSO;
	/* TSO MSS must not be less than 64 */
	if (pi->ipi_tso_segsz < ICE_MIN_TSO_MSS) {
		txq->stats.mss_too_small++;
		pi->ipi_tso_segsz = ICE_MIN_TSO_MSS;
	}
	mss = pi->ipi_tso_segsz;

	type_cmd_tso_mss = ((u64)type << ICE_TXD_CTX_QW1_DTYPE_S) |
	    ((u64)cmd << ICE_TXD_CTX_QW1_CMD_S) |
	    ((u64)tsolen << ICE_TXD_CTX_QW1_TSO_LEN_S) |
	    ((u64)mss << ICE_TXD_CTX_QW1_MSS_S);
	txd->qw1 = htole64(type_cmd_tso_mss);

	txd->tunneling_params = htole32(0);
	txq->tso++;

	return ((idx + 1) & (txq->desc_count-1));
}

/**
 * ice_tx_setup_offload - Setup register values for performing a Tx offload
 * @txq: The Tx queue, used to track checksum offload stats
 * @pi: the packet info to program for
 * @cmd: the cmd register value to update
 * @off: the off register value to update
 *
 * Based on the packet info provided, update the cmd and off values for
 * enabling Tx offloads. This depends on the packet type and which offloads
 * have been requested.
 *
 * We also track the total number of times that we've requested hardware
 * offload a particular type of checksum for debugging purposes.
 */
static inline void
ice_tx_setup_offload(struct ice_tx_queue *txq, if_pkt_info_t pi, u32 *cmd, u32 *off)
{
	u32 remaining_csum_flags = pi->ipi_csum_flags;

	switch (pi->ipi_etype) {
#ifdef INET
		case ETHERTYPE_IP:
			if (pi->ipi_csum_flags & ICE_CSUM_IP) {
				*cmd |= ICE_TX_DESC_CMD_IIPT_IPV4_CSUM;
				txq->stats.cso[ICE_CSO_STAT_TX_IP4]++;
				remaining_csum_flags &= ~CSUM_IP;
			} else
				*cmd |= ICE_TX_DESC_CMD_IIPT_IPV4;
			break;
#endif
#ifdef INET6
		case ETHERTYPE_IPV6:
			*cmd |= ICE_TX_DESC_CMD_IIPT_IPV6;
			/*
			 * This indicates that the IIPT flag was set to the IPV6 value;
			 * there's no checksum for IPv6 packets.
			 */
			txq->stats.cso[ICE_CSO_STAT_TX_IP6]++;
			break;
#endif
		default:
			txq->stats.cso[ICE_CSO_STAT_TX_L3_ERR]++;
			break;
	}

	*off |= (pi->ipi_ehdrlen >> 1) << ICE_TX_DESC_LEN_MACLEN_S;
	*off |= (pi->ipi_ip_hlen >> 2) << ICE_TX_DESC_LEN_IPLEN_S;

	if (!(remaining_csum_flags & ~ICE_RX_CSUM_FLAGS))
		return;

	switch (pi->ipi_ipproto) {
		case IPPROTO_TCP:
			if (pi->ipi_csum_flags & ICE_CSUM_TCP) {
				*cmd |= ICE_TX_DESC_CMD_L4T_EOFT_TCP;
				*off |= (pi->ipi_tcp_hlen >> 2) <<
				    ICE_TX_DESC_LEN_L4_LEN_S;
				txq->stats.cso[ICE_CSO_STAT_TX_TCP]++;
			}
			break;
		case IPPROTO_UDP:
			if (pi->ipi_csum_flags & ICE_CSUM_UDP) {
				*cmd |= ICE_TX_DESC_CMD_L4T_EOFT_UDP;
				*off |= (sizeof(struct udphdr) >> 2) <<
				    ICE_TX_DESC_LEN_L4_LEN_S;
				txq->stats.cso[ICE_CSO_STAT_TX_UDP]++;
			}
			break;
		case IPPROTO_SCTP:
			if (pi->ipi_csum_flags & ICE_CSUM_SCTP) {
				*cmd |= ICE_TX_DESC_CMD_L4T_EOFT_SCTP;
				*off |= (sizeof(struct sctphdr) >> 2) <<
				    ICE_TX_DESC_LEN_L4_LEN_S;
				txq->stats.cso[ICE_CSO_STAT_TX_SCTP]++;
			}
			break;
		default:
			txq->stats.cso[ICE_CSO_STAT_TX_L4_ERR]++;
			break;
	}
}

/**
 * ice_rx_checksum - verify hardware checksum is valid or not
 * @rxq: the Rx queue structure
 * @flags: checksum flags to update
 * @data: checksum data to update
 * @status0: descriptor status data
 * @ptype: packet type
 *
 * Determine whether the hardware indicated that the Rx checksum is valid. If
 * so, update the checksum flags and data, informing the stack of the status
 * of the checksum so that it does not spend time verifying it manually.
 */
static void
ice_rx_checksum(struct ice_rx_queue *rxq, uint32_t *flags, uint32_t *data,
		u16 status0, u16 ptype)
{
	const u16 l3_error = (BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_IPE_S) |
			      BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S));
	const u16 l4_error = (BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_L4E_S) |
			      BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_S));
	const u16 xsum_errors = (l3_error | l4_error |
				 BIT(ICE_RX_FLEX_DESC_STATUS0_IPV6EXADD_S));
	struct ice_rx_ptype_decoded decoded;
	bool is_ipv4, is_ipv6;

	/* No L3 or L4 checksum was calculated */
	if (!(status0 & BIT(ICE_RX_FLEX_DESC_STATUS0_L3L4P_S))) {
		return;
	}

	decoded = ice_decode_rx_desc_ptype(ptype);
	*flags = 0;

	if (!(decoded.known && decoded.outer_ip))
		return;

	is_ipv4 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) &&
	    (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV4);
	is_ipv6 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) &&
	    (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV6);

	/* No checksum errors were reported */
	if (!(status0 & xsum_errors)) {
		if (is_ipv4)
			*flags |= CSUM_L3_CALC | CSUM_L3_VALID;

		switch (decoded.inner_prot) {
		case ICE_RX_PTYPE_INNER_PROT_TCP:
		case ICE_RX_PTYPE_INNER_PROT_UDP:
		case ICE_RX_PTYPE_INNER_PROT_SCTP:
			*flags |= CSUM_L4_CALC | CSUM_L4_VALID;
			*data |= htons(0xffff);
			break;
		default:
			break;
		}

		return;
	}

	/*
	 * Certain IPv6 extension headers impact the validity of L4 checksums.
	 * If one of these headers exist, hardware will set the IPV6EXADD bit
	 * in the descriptor. If the bit is set then pretend like hardware
	 * didn't checksum this packet.
	 */
	if (is_ipv6 && (status0 & BIT(ICE_RX_FLEX_DESC_STATUS0_IPV6EXADD_S))) {
		rxq->stats.cso[ICE_CSO_STAT_RX_IP6_ERR]++;
		return;
	}

	/*
	 * At this point, status0 must have at least one of the l3_error or
	 * l4_error bits set.
	 */

	if (status0 & l3_error) {
		if (is_ipv4) {
			rxq->stats.cso[ICE_CSO_STAT_RX_IP4_ERR]++;
			*flags |= CSUM_L3_CALC;
		} else {
			/* Hardware indicated L3 error but this isn't IPv4? */
			rxq->stats.cso[ICE_CSO_STAT_RX_L3_ERR]++;
		}
		/* don't bother reporting L4 errors if we got an L3 error */
		return;
	} else if (is_ipv4) {
		*flags |= CSUM_L3_CALC | CSUM_L3_VALID;
	}

	if (status0 & l4_error) {
		switch (decoded.inner_prot) {
		case ICE_RX_PTYPE_INNER_PROT_TCP:
			rxq->stats.cso[ICE_CSO_STAT_RX_TCP_ERR]++;
			*flags |= CSUM_L4_CALC;
			break;
		case ICE_RX_PTYPE_INNER_PROT_UDP:
			rxq->stats.cso[ICE_CSO_STAT_RX_UDP_ERR]++;
			*flags |= CSUM_L4_CALC;
			break;
		case ICE_RX_PTYPE_INNER_PROT_SCTP:
			rxq->stats.cso[ICE_CSO_STAT_RX_SCTP_ERR]++;
			*flags |= CSUM_L4_CALC;
			break;
		default:
			/*
			 * Hardware indicated L4 error, but this isn't one of
			 * the expected protocols.
			 */
			rxq->stats.cso[ICE_CSO_STAT_RX_L4_ERR]++;
		}
	}
}

/**
 * ice_ptype_to_hash - Convert packet type to a hash value
 * @ptype: the packet type to convert
 *
 * Given the packet type, convert to a suitable hashtype to report to the
 * upper stack via the iri_rsstype value of the if_rxd_info_t structure.
 *
 * If the hash type is unknown we'll report M_HASHTYPE_OPAQUE.
 */
static inline int
ice_ptype_to_hash(u16 ptype)
{
	struct ice_rx_ptype_decoded decoded;

	if (ptype >= ARRAY_SIZE(ice_ptype_lkup))
		return M_HASHTYPE_OPAQUE;

	decoded = ice_decode_rx_desc_ptype(ptype);

	if (!decoded.known)
		return M_HASHTYPE_OPAQUE;

	if (decoded.outer_ip == ICE_RX_PTYPE_OUTER_L2)
		return M_HASHTYPE_OPAQUE;

	/* Note: anything that gets to this point is IP */
	if (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV6) {
		switch (decoded.inner_prot) {
		case ICE_RX_PTYPE_INNER_PROT_TCP:
			return M_HASHTYPE_RSS_TCP_IPV6;
		case ICE_RX_PTYPE_INNER_PROT_UDP:
			return M_HASHTYPE_RSS_UDP_IPV6;
		default:
			return M_HASHTYPE_RSS_IPV6;
		}
	}
	if (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV4) {
		switch (decoded.inner_prot) {
		case ICE_RX_PTYPE_INNER_PROT_TCP:
			return M_HASHTYPE_RSS_TCP_IPV4;
		case ICE_RX_PTYPE_INNER_PROT_UDP:
			return M_HASHTYPE_RSS_UDP_IPV4;
		default:
			return M_HASHTYPE_RSS_IPV4;
		}
	}

	/* We should never get here!! */
	return M_HASHTYPE_OPAQUE;
}
#endif