xref: /illumos-kvm-cmd/net/vnic.c (revision 844e23ee)
1 /*
2  * QEMU System Emulator
3  * illumos VNIC/vnd support
4  *
5  * Copyright 2016 Joyent, Inc.
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a copy
8  * of this software and associated documentation files (the "Software"), to deal
9  * in the Software without restriction, including without limitation the rights
10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the Software, and to permit persons to whom the Software is
12  * furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23  * THE SOFTWARE.
24  */
25 
26 #include <assert.h>
27 #include <errno.h>
28 #include <libdlpi.h>
29 #include <string.h>
30 #include <stdio.h>
31 #include <stropts.h>
32 #include <stdlib.h>
33 #include <unistd.h>
34 
35 #include <netpacket/packet.h>
36 #include <assert.h>
37 #include <net/if_dl.h>
38 #include <sys/ethernet.h>
39 #include <sys/types.h>
40 #include <sys/stat.h>
41 #include <fcntl.h>
42 #include <libvnd.h>
43 #include <sys/vnd.h>
44 #include <sys/frameio.h>
45 
46 #include "net/vnic.h"
47 #include "net/vnic-dhcp.h"
48 
49 #include "qemu-common.h"
50 #include "qemu-error.h"
51 #include "qemu-option.h"
52 #include "qemu-char.h"
53 
54 /*
55  * XXX We should determine a good way to get this buffer size. 64k feels like
56  * such an arbitrary number...
57  */
58 #define	VNIC_BUFSIZE	65536
59 
60 typedef struct VNICState {
61 	VLANClientState	vns_nc;
62 	int		vns_fd;
63 	unsigned int	vns_rpoll;
64 	unsigned int	vns_wpoll;
65 	uint8_t		vns_buf[VNIC_BUFSIZE];
66 	uint8_t		vns_txbuf[VNIC_BUFSIZE];
67 	uint_t		vns_sap;
68 	vnd_handle_t	*vns_hdl;
69 	VNICDHCPState	vns_ds;
70 	frameio_t	*vns_rfio;
71 	frameio_t	*vns_wfio;
72 } VNICState;
73 
74 static void vnic_update_fd_handler(VNICState *);
75 
76 static void
vnic_read_poll(VNICState * vsp,int enable)77 vnic_read_poll(VNICState *vsp, int enable)
78 {
79 	vsp->vns_rpoll = enable;
80 	vnic_update_fd_handler(vsp);
81 }
82 
83 static void
vnic_write_poll(VNICState * vsp,int enable)84 vnic_write_poll(VNICState *vsp, int enable)
85 {
86 	vsp->vns_wpoll = enable;
87 	vnic_update_fd_handler(vsp);
88 }
89 
90 static void
vnic_poll(VLANClientState * ncp,bool enable)91 vnic_poll(VLANClientState *ncp, bool enable)
92 {
93 	VNICState *vsp = DO_UPCAST(VNICState, vns_nc, ncp);
94 	vnic_read_poll(vsp, 1);
95 	vnic_write_poll(vsp, 1);
96 }
97 
98 /*
99  * Because this is a single packet API, just read(2). If QEMU's net backend were
100  * better we could send more packets at once.
101  */
102 static int
vnic_read_packet(VNICState * vsp,uint8_t * buf,int len)103 vnic_read_packet(VNICState *vsp, uint8_t *buf, int len)
104 {
105 	int ret;
106 
107 	do {
108 		ret = read(vsp->vns_fd, buf, len);
109 	} while (ret == -1 && errno == EINTR);
110 
111 	if (ret == -1 && errno == EAGAIN) {
112 		vnic_read_poll(vsp, 1);
113 		return (0);
114 	}
115 
116 	return (ret);
117 }
118 
119 /*
120  * For a single packet, just use write(2).
121  */
122 static int
vnic_write_packet(VNICState * vsp,const uint8_t * buf,int len)123 vnic_write_packet(VNICState *vsp, const uint8_t *buf, int len)
124 {
125 	int ret;
126 
127 	do {
128 		ret = write(vsp->vns_fd, buf, len);
129 	} while (ret == -1 && errno == EINTR);
130 
131 	if (ret == -1 && errno == EAGAIN) {
132 		vnic_write_poll(vsp, 1);
133 		return (0);
134 	}
135 
136 	return (ret);
137 }
138 
139 static int
vnic_can_send(void * opaque)140 vnic_can_send(void *opaque)
141 {
142 	VNICState *vsp = opaque;
143 	return (qemu_can_send_packet(&vsp->vns_nc));
144 }
145 
146 static void
vnic_send_completed(VLANClientState * nc,ssize_t len)147 vnic_send_completed(VLANClientState *nc, ssize_t len)
148 {
149 	VNICState *vsp = DO_UPCAST(VNICState, vns_nc, nc);
150 	vnic_read_poll(vsp, 1);
151 }
152 
153 /* outside world -> VM */
154 static void
vnic_send(void * opaque)155 vnic_send(void *opaque)
156 {
157 	VNICState *vsp = opaque;
158 	int ret;
159 
160 	do {
161 		ret = vnic_read_packet(vsp, vsp->vns_buf,
162 		    sizeof (vsp->vns_buf));
163 		if (ret <= 0)
164 			break;
165 
166 		ret = qemu_send_packet_async(&vsp->vns_nc, vsp->vns_buf, ret,
167 		    vnic_send_completed);
168 
169 		if (ret == 0)
170 			vnic_read_poll(vsp, 0);
171 
172 	} while (ret > 0 && qemu_can_send_packet(&vsp->vns_nc));
173 }
174 
175 static void
vnic_writable(void * opaque)176 vnic_writable(void *opaque)
177 {
178 	VNICState *vsp = opaque;
179 	vnic_write_poll(vsp, 0);
180 	qemu_flush_queued_packets(&vsp->vns_nc);
181 }
182 
183 /* VM -> outside world */
184 static ssize_t
vnic_receive(VLANClientState * ncp,const uint8_t * buf,size_t size)185 vnic_receive(VLANClientState *ncp, const uint8_t *buf, size_t size)
186 {
187 	uint16_t ethtype;
188 	VNICState *vsp = DO_UPCAST(VNICState, vns_nc, ncp);
189 
190 	if (vsp->vns_ds.vnds_enabled && get_ethertype(buf, size, &ethtype)) {
191 		VNICDHCPState *vdsp = &vsp->vns_ds;
192 		int ret;
193 		switch (ethtype) {
194 		case ETH_P_ARP:
195 			if (!is_arp_request(buf, size, vdsp))
196 				goto send;
197 			ret = create_arp_response(buf, size, vdsp);
198 			break;
199 		case ETH_P_IP:
200 			if (!is_dhcp_request(buf, size))
201 				goto send;
202 			ret = create_dhcp_response(buf, size, vdsp);
203 			break;
204 		default:
205 			goto send;
206 		}
207 
208 		if (!ret)
209 			return (size);
210 
211 		ret = qemu_send_packet_async(&vsp->vns_nc,
212 		    vdsp->vnds_buf, ret, vnic_send_completed);
213 		if (ret == 0)
214 			vnic_read_poll(vsp, 0);
215 
216 		return (size);
217 	}
218 
219 send:
220 	return (vnic_write_packet(vsp, buf, size));
221 }
222 
223 static ssize_t
vnic_receive_iov(VLANClientState * ncp,const struct iovec * iov,int iovcnt)224 vnic_receive_iov(VLANClientState *ncp, const struct iovec *iov,
225     int iovcnt)
226 {
227 	int ret, i;
228 	uint16_t ethtype;
229 	size_t total, altsize;
230 	VNICState *vsp = DO_UPCAST(VNICState, vns_nc, ncp);
231 
232 	for (total = 0, i = 0; i < iovcnt; i++) {
233 		total += (iov + i)->iov_len;
234 	}
235 
236 	if (vsp->vns_ds.vnds_enabled && get_ethertypev(iov, iovcnt, &ethtype)) {
237 		/*
238 		 * Basically drop the packet because we can't send a
239 		 * reply at this time. It's unfortunate, but we don't
240 		 * really have the proper infrastructure to do something
241 		 * else with this at this time.
242 		 */
243 		if (!vnic_can_send(vsp))
244 			return (total);
245 
246 		VNICDHCPState *vdsp = &vsp->vns_ds;
247 
248 		switch (ethtype) {
249 		case ETH_P_ARP:
250 			if (!is_arp_requestv(iov, iovcnt, vdsp))
251 				goto send;
252 			ret = create_arp_responsev(iov, iovcnt, vdsp);
253 			break;
254 		case ETH_P_IP:
255 			if (!is_dhcp_requestv(iov, iovcnt))
256 				goto send;
257 			ret = create_dhcp_responsev(iov, iovcnt, vdsp);
258 			break;
259 		default:
260 			goto send;
261 		}
262 
263 		/* This failed, drop it and continue */
264 		if (ret == 0)
265 			return (total);
266 
267 		ret = qemu_send_packet_async(&vsp->vns_nc,
268 		    vdsp->vnds_buf, ret, vnic_send_completed);
269 		/*
270 		 * qemu has told us that it can't receive any more data
271 		 * at this time for the guest (host->guest traffic) so
272 		 * turn off our read poll until we get that the send has
273 		 * completed.
274 		 */
275 		if (ret == 0)
276 			vnic_read_poll(vsp, 0);
277 		return (total);
278 	}
279 
280 send:
281 	/*
282 	 * Copy the iovcs to our write frameio. Be on the lookout for someone
283 	 * giving us more vectors than we support in frameio. In that case,
284 	 * let's go ahead and just simply concat the rest.
285 	 */
286 	for (i = 0; i < MIN(iovcnt, FRAMEIO_NVECS_MAX - 1); i++, iov++) {
287 		vsp->vns_wfio->fio_vecs[i].fv_buf = iov->iov_base;
288 		vsp->vns_wfio->fio_vecs[i].fv_buflen = iov->iov_len;
289 	}
290 
291 	altsize = 0;
292 	for (i = MIN(iovcnt, FRAMEIO_NVECS_MAX - 1); i != iovcnt; i++, iov++) {
293 		/*
294 		 * The packet is too large. We're goin to silently drop it...
295 		 */
296 		if (altsize + iov->iov_len > VNIC_BUFSIZE)
297 			return (total);
298 
299 		bcopy(iov->iov_base, vsp->vns_txbuf + altsize, iov->iov_len);
300 		altsize += iov->iov_len;
301 	}
302 	if (altsize != 0) {
303 		vsp->vns_wfio->fio_vecs[FRAMEIO_NVECS_MAX-1].fv_buf =
304 		    vsp->vns_txbuf;
305 		vsp->vns_wfio->fio_vecs[FRAMEIO_NVECS_MAX-1].fv_buflen =
306 		    altsize;
307 	}
308 
309 	vsp->vns_wfio->fio_nvecs = MIN(iovcnt, FRAMEIO_NVECS_MAX);
310 	vsp->vns_wfio->fio_nvpf = MIN(iovcnt, FRAMEIO_NVECS_MAX);
311 	do {
312 		ret = vnd_frameio_write(vsp->vns_hdl, vsp->vns_wfio);
313 	} while (ret == -1 && errno == EINTR);
314 
315 	if (ret == -1 && errno == EAGAIN) {
316 		vnic_write_poll(vsp, 1);
317 		return (0);
318 	} else if (ret == -1) {
319 		abort();
320 	}
321 
322 	total = 0;
323 	for (i = 0; i < vsp->vns_wfio->fio_nvecs; i++) {
324 		if (vsp->vns_wfio->fio_vecs[i].fv_actlen == 0 &&
325 		    vsp->vns_wfio->fio_vecs[i].fv_buflen == 0)
326 			break;
327 
328 		total += vsp->vns_wfio->fio_vecs[i].fv_actlen;
329 	}
330 
331 	return (total);
332 }
333 
334 static void
vnic_cleanup(VLANClientState * ncp)335 vnic_cleanup(VLANClientState *ncp)
336 {
337 	VNICState *vsp;
338 
339 	vsp = DO_UPCAST(VNICState, vns_nc, ncp);
340 
341 	qemu_purge_queued_packets(ncp);
342 
343 	vnd_close(vsp->vns_hdl);
344 }
345 
346 static void
vnic_update_fd_handler(VNICState * vsp)347 vnic_update_fd_handler(VNICState *vsp)
348 {
349 	qemu_set_fd_handler2(vsp->vns_fd,
350 	    vsp->vns_rpoll ? vnic_can_send : NULL,
351 	    vsp->vns_rpoll ? vnic_send : NULL,
352 	    vsp->vns_wpoll ? vnic_writable : NULL,
353 	    vsp);
354 }
355 
356 static NetClientInfo net_vnic_info = {
357 	.type = NET_CLIENT_TYPE_VNIC,
358 	.size = sizeof (VNICState),
359 	.receive = vnic_receive,
360 	.receive_iov = vnic_receive_iov,
361 	.poll = vnic_poll,
362 	.cleanup = vnic_cleanup
363 };
364 
365 /*
366  * Set up all the known values for our frame I/O devices.
367  */
368 static int
vnic_frameio_init(VNICState * vsp)369 vnic_frameio_init(VNICState *vsp)
370 {
371 	vsp->vns_rfio = qemu_mallocz(sizeof (frameio_t) +
372 	    sizeof (framevec_t) * FRAMEIO_NVECS_MAX);
373 	if (vsp->vns_rfio == NULL)
374 		return (1);
375 	vsp->vns_wfio = qemu_mallocz(sizeof (frameio_t) +
376 	    sizeof (framevec_t) * FRAMEIO_NVECS_MAX);
377 	if (vsp->vns_wfio == NULL)
378 		return (1);
379 	vsp->vns_rfio->fio_version = FRAMEIO_CURRENT_VERSION;
380 	vsp->vns_rfio->fio_nvpf = 1;
381 	vsp->vns_wfio->fio_version = FRAMEIO_CURRENT_VERSION;
382 	vsp->vns_wfio->fio_nvpf = 1;
383 	return (0);
384 }
385 
386 int
net_init_vnic(QemuOpts * opts,Monitor * mon,const char * name,VLANState * vlan)387 net_init_vnic(QemuOpts *opts, Monitor *mon, const char *name, VLANState *vlan)
388 {
389 	int fd, len, vnderr, syserr;
390 	const char *ifname, *mac;
391 	uchar_t *macaddr;
392 	VLANClientState *ncp;
393 	VNICState *vsp;
394 	vnd_prop_buf_t vib;
395 
396 	if ((ifname = qemu_opt_get(opts, "ifname")) == NULL) {
397 		error_report("missing ifname required for vnic\n");
398 		return (-1);
399 	}
400 
401 	mac = qemu_opt_get(opts, "macaddr");
402 
403 	if (mac != NULL) {
404 		macaddr = _link_aton(mac, &len);
405 		if (macaddr == NULL || len != ETHERADDRL) {
406 			error_report("invalid macaddr for vnic: %s\n", mac);
407 			return (-1);
408 		}
409 	}
410 
411 	ncp = qemu_new_net_client(&net_vnic_info, vlan, NULL, "vnic", name);
412 	vsp = DO_UPCAST(VNICState, vns_nc, ncp);
413 
414 
415 	vsp->vns_hdl = vnd_open(NULL, ifname, &vnderr, &syserr);
416 	if (vsp->vns_hdl == NULL) {
417 		const char *err = vnderr != VND_E_SYS ?
418 		    vnd_strerror(vnderr) : vnd_strsyserror(syserr);
419 		error_report("vnic: failed to open interface %s - %s\n",
420 		    ifname, err);
421 		return (-1);
422 	}
423 
424 	vib.vpb_size = 1024 * 1024 * 4; 	/* 4 MB */
425 	if (vnd_prop_set(vsp->vns_hdl, VND_PROP_RXBUF, &vib,
426 	    sizeof (vib)) != 0) {
427 		const char *err = vnderr != VND_E_SYS ?
428 		    vnd_strerror(vnderr) : vnd_strsyserror(syserr);
429 		error_report("failed to change rx buf size: %s\n", err);
430 		return (-1);
431 	}
432 
433 	vib.vpb_size = 1024 * 1024 * 4; 	/* 4 MB */
434 	if (vnd_prop_set(vsp->vns_hdl, VND_PROP_TXBUF, &vib,
435 	    sizeof (vib)) != 0) {
436 		const char *err = vnderr != VND_E_SYS ?
437 		    vnd_strerror(vnderr) : vnd_strsyserror(syserr);
438 		error_report("failed to change tx buf size: %s\n", err);
439 		return (-1);
440 	}
441 
442 
443 	fd = vnd_pollfd(vsp->vns_hdl);
444 	if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0) {
445 		error_report("vnic: failed to set fd on interface %s to "
446 		    "non-blocking: %s\n", ifname, strerror(errno));
447 		return (-1);
448 	}
449 
450 	vsp->vns_fd = fd;
451 
452 	snprintf(vsp->vns_nc.info_str, sizeof (vsp->vns_nc.info_str),
453 	    "ifname=%s", qemu_opt_get(opts, "ifname"));
454 
455 	if (vnic_dhcp_init(&vsp->vns_ds, opts) == 0)
456 		return (-1);
457 
458 	if (vnic_frameio_init(vsp) != 0) {
459 		error_report("vnic: failed initialize frameio: %s\n",
460 		    strerror(errno));
461 		return (-1);
462 	}
463 
464 	/* We have to manually intialize the polling for read */
465 	vnic_read_poll(vsp, 1);
466 
467 	return (0);
468 }
469