Training courses

Kernel and Embedded Linux

Bootlin training courses

Embedded Linux, kernel,
Yocto Project, Buildroot, real-time,
graphics, boot time, debugging...

Bootlin logo

Elixir Cross Referencer

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
/*-
 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
 *
 * Copyright (C) 2014-2015 Vincenzo Maffione
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *   1. Redistributions of source code must retain the above copyright
 *      notice, this list of conditions and the following disclaimer.
 *   2. Redistributions in binary form must reproduce the above copyright
 *      notice, this list of conditions and the following disclaimer in the
 *      documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/* $FreeBSD$ */

#if defined(__FreeBSD__)
#include <sys/cdefs.h> /* prerequisite */

#include <sys/types.h>
#include <sys/errno.h>
#include <sys/param.h>	/* defines used in kernel.h */
#include <sys/kernel.h>	/* types used in module initialization */
#include <sys/sockio.h>
#include <sys/malloc.h>
#include <sys/socketvar.h>	/* struct socket */
#include <sys/socket.h> /* sockaddrs */
#include <net/if.h>
#include <net/if_var.h>
#include <machine/bus.h>	/* bus_dmamap_* */
#include <sys/endian.h>

#elif defined(linux)

#include "bsd_glue.h"

#elif defined(__APPLE__)

#warning OSX support is only partial
#include "osx_glue.h"

#else

#error	Unsupported platform

#endif /* unsupported */

#include <net/netmap.h>
#include <dev/netmap/netmap_kern.h>



/* This routine is called by bdg_mismatch_datapath() when it finishes
 * accumulating bytes for a segment, in order to fix some fields in the
 * segment headers (which still contain the same content as the header
 * of the original GSO packet). 'pkt' points to the beginning of the IP
 * header of the segment, while 'len' is the length of the IP packet.
 */
static void
gso_fix_segment(uint8_t *pkt, size_t len, u_int ipv4, u_int iphlen, u_int tcp,
		u_int idx, u_int segmented_bytes, u_int last_segment)
{
	struct nm_iphdr *iph = (struct nm_iphdr *)(pkt);
	struct nm_ipv6hdr *ip6h = (struct nm_ipv6hdr *)(pkt);
	uint16_t *check = NULL;
	uint8_t *check_data = NULL;

	if (ipv4) {
		/* Set the IPv4 "Total Length" field. */
		iph->tot_len = htobe16(len);
		nm_prdis("ip total length %u", be16toh(ip->tot_len));

		/* Set the IPv4 "Identification" field. */
		iph->id = htobe16(be16toh(iph->id) + idx);
		nm_prdis("ip identification %u", be16toh(iph->id));

		/* Compute and insert the IPv4 header checksum. */
		iph->check = 0;
		iph->check = nm_os_csum_ipv4(iph);
		nm_prdis("IP csum %x", be16toh(iph->check));
	} else {
		/* Set the IPv6 "Payload Len" field. */
		ip6h->payload_len = htobe16(len-iphlen);
	}

	if (tcp) {
		struct nm_tcphdr *tcph = (struct nm_tcphdr *)(pkt + iphlen);

		/* Set the TCP sequence number. */
		tcph->seq = htobe32(be32toh(tcph->seq) + segmented_bytes);
		nm_prdis("tcp seq %u", be32toh(tcph->seq));

		/* Zero the PSH and FIN TCP flags if this is not the last
		   segment. */
		if (!last_segment)
			tcph->flags &= ~(0x8 | 0x1);
		nm_prdis("last_segment %u", last_segment);

		check = &tcph->check;
		check_data = (uint8_t *)tcph;
	} else { /* UDP */
		struct nm_udphdr *udph = (struct nm_udphdr *)(pkt + iphlen);

		/* Set the UDP 'Length' field. */
		udph->len = htobe16(len-iphlen);

		check = &udph->check;
		check_data = (uint8_t *)udph;
	}

	/* Compute and insert TCP/UDP checksum. */
	*check = 0;
	if (ipv4)
		nm_os_csum_tcpudp_ipv4(iph, check_data, len-iphlen, check);
	else
		nm_os_csum_tcpudp_ipv6(ip6h, check_data, len-iphlen, check);

	nm_prdis("TCP/UDP csum %x", be16toh(*check));
}

static inline int
vnet_hdr_is_bad(struct nm_vnet_hdr *vh)
{
	uint8_t gso_type = vh->gso_type & ~VIRTIO_NET_HDR_GSO_ECN;

	return (
		(gso_type != VIRTIO_NET_HDR_GSO_NONE &&
		 gso_type != VIRTIO_NET_HDR_GSO_TCPV4 &&
		 gso_type != VIRTIO_NET_HDR_GSO_UDP &&
		 gso_type != VIRTIO_NET_HDR_GSO_TCPV6)
		||
		 (vh->flags & ~(VIRTIO_NET_HDR_F_NEEDS_CSUM
			       | VIRTIO_NET_HDR_F_DATA_VALID))
	       );
}

/* The VALE mismatch datapath implementation. */
void
bdg_mismatch_datapath(struct netmap_vp_adapter *na,
		      struct netmap_vp_adapter *dst_na,
		      const struct nm_bdg_fwd *ft_p,
		      struct netmap_ring *dst_ring,
		      u_int *j, u_int lim, u_int *howmany)
{
	struct netmap_slot *dst_slot = NULL;
	struct nm_vnet_hdr *vh = NULL;
	const struct nm_bdg_fwd *ft_end = ft_p + ft_p->ft_frags;

	/* Source and destination pointers. */
	uint8_t *dst, *src;
	size_t src_len, dst_len;

	/* Indices and counters for the destination ring. */
	u_int j_start = *j;
	u_int j_cur = j_start;
	u_int dst_slots = 0;

	if (unlikely(ft_p == ft_end)) {
		nm_prlim(1, "No source slots to process");
		return;
	}

	/* Init source and dest pointers. */
	src = ft_p->ft_buf;
	src_len = ft_p->ft_len;
	dst_slot = &dst_ring->slot[j_cur];
	dst = NMB(&dst_na->up, dst_slot);
	dst_len = src_len;

	/* If the source port uses the offloadings, while destination doesn't,
	 * we grab the source virtio-net header and do the offloadings here.
	 */
	if (na->up.virt_hdr_len && !dst_na->up.virt_hdr_len) {
		vh = (struct nm_vnet_hdr *)src;
		/* Initial sanity check on the source virtio-net header. If
		 * something seems wrong, just drop the packet. */
		if (src_len < na->up.virt_hdr_len) {
			nm_prlim(1, "Short src vnet header, dropping");
			return;
		}
		if (unlikely(vnet_hdr_is_bad(vh))) {
			nm_prlim(1, "Bad src vnet header, dropping");
			return;
		}
	}

	/* We are processing the first input slot and there is a mismatch
	 * between source and destination virt_hdr_len (SHL and DHL).
	 * When the a client is using virtio-net headers, the header length
	 * can be:
	 *    - 10: the header corresponds to the struct nm_vnet_hdr
	 *    - 12: the first 10 bytes correspond to the struct
	 *          virtio_net_hdr, and the last 2 bytes store the
	 *          "mergeable buffers" info, which is an optional
	 *	    hint that can be zeroed for compatibility
	 *
	 * The destination header is therefore built according to the
	 * following table:
	 *
	 * SHL | DHL | destination header
	 * -----------------------------
	 *   0 |  10 | zero
	 *   0 |  12 | zero
	 *  10 |   0 | doesn't exist
	 *  10 |  12 | first 10 bytes are copied from source header, last 2 are zero
	 *  12 |   0 | doesn't exist
	 *  12 |  10 | copied from the first 10 bytes of source header
	 */
	bzero(dst, dst_na->up.virt_hdr_len);
	if (na->up.virt_hdr_len && dst_na->up.virt_hdr_len)
		memcpy(dst, src, sizeof(struct nm_vnet_hdr));
	/* Skip the virtio-net headers. */
	src += na->up.virt_hdr_len;
	src_len -= na->up.virt_hdr_len;
	dst += dst_na->up.virt_hdr_len;
	dst_len = dst_na->up.virt_hdr_len + src_len;

	/* Here it could be dst_len == 0 (which implies src_len == 0),
	 * so we avoid passing a zero length fragment.
	 */
	if (dst_len == 0) {
		ft_p++;
		src = ft_p->ft_buf;
		src_len = ft_p->ft_len;
		dst_len = src_len;
	}

	if (vh && vh->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
		u_int gso_bytes = 0;
		/* Length of the GSO packet header. */
		u_int gso_hdr_len = 0;
		/* Pointer to the GSO packet header. Assume it is in a single fragment. */
		uint8_t *gso_hdr = NULL;
		/* Index of the current segment. */
		u_int gso_idx = 0;
		/* Payload data bytes segmented so far (e.g. TCP data bytes). */
		u_int segmented_bytes = 0;
		/* Is this an IPv4 or IPv6 GSO packet? */
		u_int ipv4 = 0;
		/* Length of the IP header (20 if IPv4, 40 if IPv6). */
		u_int iphlen = 0;
		/* Length of the Ethernet header (18 if 802.1q, otherwise 14). */
		u_int ethhlen = 14;
		/* Is this a TCP or an UDP GSO packet? */
		u_int tcp = ((vh->gso_type & ~VIRTIO_NET_HDR_GSO_ECN)
				== VIRTIO_NET_HDR_GSO_UDP) ? 0 : 1;

		/* Segment the GSO packet contained into the input slots (frags). */
		for (;;) {
			size_t copy;

			if (dst_slots >= *howmany) {
				/* We still have work to do, but we've run out of
				 * dst slots, so we have to drop the packet. */
				nm_prdis(1, "Not enough slots, dropping GSO packet");
				return;
			}

			/* Grab the GSO header if we don't have it. */
			if (!gso_hdr) {
				uint16_t ethertype;

				gso_hdr = src;

				/* Look at the 'Ethertype' field to see if this packet
				 * is IPv4 or IPv6, taking into account VLAN
				 * encapsulation. */
				for (;;) {
					if (src_len < ethhlen) {
						nm_prlim(1, "Short GSO fragment [eth], dropping");
						return;
					}
					ethertype = be16toh(*((uint16_t *)
							    (gso_hdr + ethhlen - 2)));
					if (ethertype != 0x8100) /* not 802.1q */
						break;
					ethhlen += 4;
				}
				switch (ethertype) {
					case 0x0800:  /* IPv4 */
					{
						struct nm_iphdr *iph = (struct nm_iphdr *)
									(gso_hdr + ethhlen);

						if (src_len < ethhlen + 20) {
							nm_prlim(1, "Short GSO fragment "
							      "[IPv4], dropping");
							return;
						}
						ipv4 = 1;
						iphlen = 4 * (iph->version_ihl & 0x0F);
						break;
					}
					case 0x86DD:  /* IPv6 */
						ipv4 = 0;
						iphlen = 40;
						break;
					default:
						nm_prlim(1, "Unsupported ethertype, "
						      "dropping GSO packet");
						return;
				}
				nm_prdis(3, "type=%04x", ethertype);

				if (src_len < ethhlen + iphlen) {
					nm_prlim(1, "Short GSO fragment [IP], dropping");
					return;
				}

				/* Compute gso_hdr_len. For TCP we need to read the
				 * content of the 'Data Offset' field.
				 */
				if (tcp) {
					struct nm_tcphdr *tcph = (struct nm_tcphdr *)
								(gso_hdr + ethhlen + iphlen);

					if (src_len < ethhlen + iphlen + 20) {
						nm_prlim(1, "Short GSO fragment "
								"[TCP], dropping");
						return;
					}
					gso_hdr_len = ethhlen + iphlen +
						      4 * (tcph->doff >> 4);
				} else {
					gso_hdr_len = ethhlen + iphlen + 8; /* UDP */
				}

				if (src_len < gso_hdr_len) {
					nm_prlim(1, "Short GSO fragment [TCP/UDP], dropping");
					return;
				}

				nm_prdis(3, "gso_hdr_len %u gso_mtu %d", gso_hdr_len,
								   dst_na->mfs);

				/* Advance source pointers. */
				src += gso_hdr_len;
				src_len -= gso_hdr_len;
				if (src_len == 0) {
					ft_p++;
					if (ft_p == ft_end)
						break;
					src = ft_p->ft_buf;
					src_len = ft_p->ft_len;
				}
			}

			/* Fill in the header of the current segment. */
			if (gso_bytes == 0) {
				memcpy(dst, gso_hdr, gso_hdr_len);
				gso_bytes = gso_hdr_len;
			}

			/* Fill in data and update source and dest pointers. */
			copy = src_len;
			if (gso_bytes + copy > dst_na->mfs)
				copy = dst_na->mfs - gso_bytes;
			memcpy(dst + gso_bytes, src, copy);
			gso_bytes += copy;
			src += copy;
			src_len -= copy;

			/* A segment is complete or we have processed all the
			   the GSO payload bytes. */
			if (gso_bytes >= dst_na->mfs ||
				(src_len == 0 && ft_p + 1 == ft_end)) {
				/* After raw segmentation, we must fix some header
				 * fields and compute checksums, in a protocol dependent
				 * way. */
				gso_fix_segment(dst + ethhlen, gso_bytes - ethhlen,
						ipv4, iphlen, tcp,
						gso_idx, segmented_bytes,
						src_len == 0 && ft_p + 1 == ft_end);

				nm_prdis("frame %u completed with %d bytes", gso_idx, (int)gso_bytes);
				dst_slot->len = gso_bytes;
				dst_slot->flags = 0;
				dst_slots++;
				segmented_bytes += gso_bytes - gso_hdr_len;

				gso_bytes = 0;
				gso_idx++;

				/* Next destination slot. */
				j_cur = nm_next(j_cur, lim);
				dst_slot = &dst_ring->slot[j_cur];
				dst = NMB(&dst_na->up, dst_slot);
			}

			/* Next input slot. */
			if (src_len == 0) {
				ft_p++;
				if (ft_p == ft_end)
					break;
				src = ft_p->ft_buf;
				src_len = ft_p->ft_len;
			}
		}
		nm_prdis(3, "%d bytes segmented", segmented_bytes);

	} else {
		/* Address of a checksum field into a destination slot. */
		uint16_t *check = NULL;
		/* Accumulator for an unfolded checksum. */
		rawsum_t csum = 0;

		/* Process a non-GSO packet. */

		/* Init 'check' if necessary. */
		if (vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
			if (unlikely(vh->csum_offset + vh->csum_start > src_len))
				nm_prerr("invalid checksum request");
			else
				check = (uint16_t *)(dst + vh->csum_start +
						vh->csum_offset);
		}

		while (ft_p != ft_end) {
			/* Init/update the packet checksum if needed. */
			if (vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
				if (!dst_slots)
					csum = nm_os_csum_raw(src + vh->csum_start,
								src_len - vh->csum_start, 0);
				else
					csum = nm_os_csum_raw(src, src_len, csum);
			}

			/* Round to a multiple of 64 */
			src_len = (src_len + 63) & ~63;

			if (ft_p->ft_flags & NS_INDIRECT) {
				if (copyin(src, dst, src_len)) {
					/* Invalid user pointer, pretend len is 0. */
					dst_len = 0;
				}
			} else {
				memcpy(dst, src, (int)src_len);
			}
			dst_slot->len = dst_len;
			dst_slots++;

			/* Next destination slot. */
			j_cur = nm_next(j_cur, lim);
			dst_slot = &dst_ring->slot[j_cur];
			dst = NMB(&dst_na->up, dst_slot);

			/* Next source slot. */
			ft_p++;
			src = ft_p->ft_buf;
			dst_len = src_len = ft_p->ft_len;
		}

		/* Finalize (fold) the checksum if needed. */
		if (check && vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
			*check = nm_os_csum_fold(csum);
		}
		nm_prdis(3, "using %u dst_slots", dst_slots);

		/* A second pass on the destination slots to set the slot flags,
		 * using the right number of destination slots.
		 */
		while (j_start != j_cur) {
			dst_slot = &dst_ring->slot[j_start];
			dst_slot->flags = (dst_slots << 8)| NS_MOREFRAG;
			j_start = nm_next(j_start, lim);
		}
		/* Clear NS_MOREFRAG flag on last entry. */
		dst_slot->flags = (dst_slots << 8);
	}

	/* Update howmany and j. This is to commit the use of
	 * those slots in the destination ring. */
	if (unlikely(dst_slots > *howmany)) {
		nm_prerr("bug: slot allocation error");
	}
	*j = j_cur;
	*howmany -= dst_slots;
}