xref: /illumos-kvm-cmd/qemu_mdb.c (revision 41e7c3e1)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2019 Joyent, Inc.
23  */
24 
25 #include <sys/mdb_modapi.h>
26 
27 #include "hw.h"
28 #include "pci.h"
29 #include "net.h"
30 #include "hw/virtio.h"
31 #include "hw/virtio-net.h"
32 #include "pci_internals.h"
33 #include "qemu-queue.h"
34 
35 /*
36  * Sigh, this isn't of course defined in any header file, so we just have to
37  * #include this ourselves.
38  */
39 struct PCIHostBus {
40 	int domain;
41 	struct PCIBus *bus;
42 	QLIST_ENTRY(PCIHostBus) next;
43 };
44 
45 typedef struct {
46 	PCIDevice pci_dev;
47 	VirtIODevice *vdev;
48 	uint32_t flags;
49 	uint32_t addr;
50 	uint32_t class_code;
51 	uint32_t nvectors;
52 	BlockConf block;
53 	NICConf nic;
54 	uint32_t host_features;
55 #ifdef CONFIG_LINUX
56 	V9fsConf fsconf;
57 #endif
58 	/* Max. number of ports we can have for a the virtio-serial device */
59 	uint32_t max_virtserial_ports;
60 	virtio_net_conf net;
61 	bool ioeventfd_disabled;
62 	bool ioeventfd_started;
63 } VirtIOPCIProxy;
64 
65 typedef struct RAMBlock {
66 	uint8_t *host;
67 	ram_addr_t offset;
68 	ram_addr_t length;
69 	char idstr[256];
70 	QLIST_ENTRY(RAMBlock) next;
71 #if defined(__linux__) && !defined(TARGET_S390X)
72 	int fd;
73 #endif
74 } RAMBlock;
75 
76 typedef struct RAMList {
77 	uint8_t *phys_dirty;
78 	QLIST_HEAD(ram, RAMBlock) blocks;
79 } RAMList;
80 
81 typedef struct VRingDesc
82 {
83     uint64_t addr;
84     uint32_t len;
85     uint16_t flags;
86     uint16_t next;
87 } VRingDesc;
88 
89 typedef struct VRingAvail
90 {
91     uint16_t flags;
92     uint16_t idx;
93     uint16_t ring[0];
94 } VRingAvail;
95 
96 typedef struct VRingUsedElem
97 {
98     uint32_t id;
99     uint32_t len;
100 } VRingUsedElem;
101 
102 typedef struct VRingUsed
103 {
104     uint16_t flags;
105     uint16_t idx;
106     VRingUsedElem ring[0];
107 } VRingUsed;
108 
109 typedef struct VRing
110 {
111     unsigned int num;
112     target_phys_addr_t desc;
113     target_phys_addr_t avail;
114     target_phys_addr_t used;
115 } VRing;
116 
117 /* Sigh More definitions ... */
118 typedef enum rein_act {
119 	REIN_INJECT,
120 	REIN_DEADMAN,
121 	REIN_RUN
122 } rein_act_t;
123 
124 #define	REIN_RING_MAX	64
125 
126 typedef struct rein_event {
127 	rein_act_t 	re_act;
128 	hrtime_t	re_time;
129 	uint64_t	re_other;
130 	struct timeval	re_tval;
131 } rein_event_t;
132 
133 typedef struct VirtIONet
134 {
135 	VirtIODevice vdev;
136 	uint8_t mac[ETH_ALEN];
137 	uint16_t status;
138 	VirtQueue *rx_vq;
139 	VirtQueue *tx_vq;
140 	VirtQueue *ctrl_vq;
141 	NICState *nic;
142 	QEMUTimer *tx_timer;
143 	QEMUBH *tx_bh;
144 	uint32_t tx_timeout;
145 	int32_t tx_burst;
146 	int tx_waiting;
147 	uint32_t has_vnet_hdr;
148 	uint8_t has_ufo;
149 	struct {
150 		VirtQueueElement elem;
151 		ssize_t len;
152 	} async_tx;
153 	int mergeable_rx_bufs;
154 	uint8_t promisc;
155 	uint8_t allmulti;
156 	uint8_t alluni;
157 	uint8_t nomulti;
158 	uint8_t nouni;
159 	uint8_t nobcast;
160 	uint8_t vhost_started;
161 	struct {
162 		int in_use;
163 		int first_multi;
164 		uint8_t multi_overflow;
165 		uint8_t uni_overflow;
166 		uint8_t *macs;
167 	} mac_table;
168 	uint32_t *vlans;
169 	DeviceState *qdev;
170 	QEMUTimer *rein_timer;
171 	uint32_t rein_timer_ticks;
172 	uint8_t rein_timer_act;
173 	uint32_t rein_ring_idx;
174 	rein_event_t rein_ring[REIN_RING_MAX];
175 	uint64_t rein_n_dead;
176 	uint64_t rein_n_inject;
177 	uint64_t rein_n_rerun;
178 } VirtIONet;
179 
180 /*
181  * NDEVICES comes from the PCIDevice structure and should be changed if this
182  * does ever change.
183  */
184 #define	NDEVICES	256
185 typedef struct pci_dev_wdata {
186 	struct PCIDevice	*pdw_devs[NDEVICES];
187 	int			pdw_idx;
188 } pci_dev_wdata_t;
189 
190 static int
qemu_mdb_host_bus_init(mdb_walk_state_t * wsp)191 qemu_mdb_host_bus_init(mdb_walk_state_t *wsp)
192 {
193 	struct PCIHostBus *head;
194 	GElf_Sym sym;
195 
196 	if (wsp->walk_addr != (uintptr_t)NULL) {
197 		mdb_printf("qemu_host_bus does not support local walks");
198 		return (WALK_ERR);
199 	}
200 
201 	/*
202 	 * The root of the host busses is defined in QEMU as:
203 	 * static QLIST_HEAD(, PCIHostBus) host_buses;
204 	 *
205 	 * However we don't really get a type, it's basically an anoynmous
206 	 * struct to mdb. So instead, since the head of the queue list points to
207 	 * the first one, we tread it as a struct PCIHOSTBus**.
208 	 */
209 	if (mdb_lookup_by_name("host_buses", &sym) != 0) {
210 		mdb_warn("unable to locate host_buse");
211 		return (WALK_ERR);
212 	}
213 
214 	if (mdb_vread(&head, sizeof (head), sym.st_value) != sizeof (head)) {
215 		mdb_warn("failed to read host_buses");
216 		return (WALK_ERR);
217 	}
218 
219 	wsp->walk_addr = (uintptr_t)head;
220 	if (head == NULL)
221 		return (WALK_DONE);
222 
223 	return (WALK_NEXT);
224 }
225 
226 
227 static int
qemu_mdb_host_bus_step(mdb_walk_state_t * wsp)228 qemu_mdb_host_bus_step(mdb_walk_state_t *wsp)
229 {
230 	struct PCIHostBus bus;
231 	uintptr_t addr = wsp->walk_addr;
232 
233 	if (addr == (uintptr_t)NULL)
234 		return (WALK_DONE);
235 
236 	if (mdb_vread(&bus, sizeof (bus), addr) != sizeof (bus)) {
237 		mdb_warn("failed to read struct PCIHostBus");
238 		return (WALK_ERR);
239 	}
240 
241 	wsp->walk_addr = (uintptr_t)bus.next.le_next;
242 	return (wsp->walk_callback(addr, &bus, wsp->walk_cbdata));
243 }
244 
245 static int
qemu_mdb_pci_device_init(mdb_walk_state_t * wsp)246 qemu_mdb_pci_device_init(mdb_walk_state_t *wsp)
247 {
248 	int ii;
249 	struct PCIBus bus;
250 	struct PCIHostBus host;
251 	pci_dev_wdata_t *pdw;
252 	struct PCIHostBus *headp;
253 	GElf_Sym sym;
254 	uintptr_t baddr;
255 
256 	/*
257 	 * We're going to make some great assumptions here, which in practice
258 	 * have been proven true in so far as we care about. Basically that
259 	 * there is only one HostBus and that that HostBus in reality only has
260 	 * one PCIBus which is the one we care about. So that's what we do here.
261 	 */
262 	if (wsp->walk_addr == (uintptr_t)NULL) {
263 		if (mdb_lookup_by_name("host_buses", &sym) != 0) {
264 			mdb_warn("unable to locate host_buse");
265 			return (WALK_ERR);
266 		}
267 
268 		if (mdb_vread(&headp, sizeof (headp), sym.st_value) !=
269 		    sizeof (headp)) {
270 			mdb_warn("failed to read host_buses");
271 			return (WALK_ERR);
272 		}
273 
274 		if (mdb_vread(&host, sizeof (host), (uintptr_t)headp) !=
275 		    sizeof (host)) {
276 			mdb_warn("failed to read host bus");
277 			return (WALK_ERR);
278 		}
279 
280 		baddr = (uintptr_t)host.bus;
281 	} else {
282 		baddr = wsp->walk_addr;
283 	}
284 
285 	if (mdb_vread(&bus, sizeof (bus), baddr) != sizeof (bus)) {
286 		mdb_warn("failed to read PCIBus\n");
287 		return (WALK_ERR);
288 	}
289 
290 	pdw = mdb_zalloc(sizeof (pci_dev_wdata_t), UM_SLEEP | UM_GC);
291 	(void) bcopy(bus.devices, pdw->pdw_devs, sizeof (bus.devices));
292 
293 	/*
294 	 * Find the first device.
295 	 */
296 	for (ii = 0; ii < NDEVICES; ii++)
297 		if (pdw->pdw_devs[ii] != NULL)
298 			break;
299 
300 	if (ii == NDEVICES)
301 		return (WALK_DONE);
302 
303 	pdw->pdw_idx = ii;
304 	wsp->walk_addr = (uintptr_t)pdw->pdw_devs[ii];
305 	wsp->walk_data = pdw;
306 
307 	return (WALK_NEXT);
308 }
309 
310 static int
qemu_mdb_pci_device_step(mdb_walk_state_t * wsp)311 qemu_mdb_pci_device_step(mdb_walk_state_t *wsp)
312 {
313 	PCIDevice dev;
314 	pci_dev_wdata_t *pdw = wsp->walk_data;
315 	uintptr_t addr = wsp->walk_addr;
316 	int ii;
317 
318 	if (pdw->pdw_idx == NDEVICES)
319 		return (WALK_DONE);
320 
321 	if (mdb_vread(&dev, sizeof (dev), addr) != sizeof (dev)) {
322 		mdb_warn("couldn't read PCIDevice at %p", addr);
323 		return (WALK_ERR);
324 	}
325 
326 	for (ii = pdw->pdw_idx + 1; ii < NDEVICES; ii++)
327 		if (pdw->pdw_devs[ii] != NULL)
328 			break;
329 
330 	pdw->pdw_idx = ii;
331 	if (ii == NDEVICES)
332 		wsp->walk_addr = (uintptr_t)NULL;
333 	else
334 		wsp->walk_addr = (uintptr_t)pdw->pdw_devs[ii];
335 
336 	return (wsp->walk_callback(addr, &dev, wsp->walk_cbdata));
337 }
338 
339 
340 static int
qemu_mdb_pci_dev_type_init(mdb_walk_state_t * wsp)341 qemu_mdb_pci_dev_type_init(mdb_walk_state_t *wsp)
342 {
343 	if (wsp->walk_addr != (uintptr_t)NULL) {
344 		mdb_warn("local walks not supported");
345 		return (WALK_ERR);
346 	}
347 
348 	if (wsp->walk_arg == NULL) {
349 		mdb_warn("called into qemu_mdb_pci_dev_type_init with no arg");
350 		return (WALK_ERR);
351 	}
352 
353 	if (mdb_layered_walk("qemu_pci_device", wsp) == -1) {
354 		mdb_warn("failed to init layered walk");
355 		return (WALK_ERR);
356 	}
357 
358 	return (WALK_NEXT);
359 }
360 
361 static int
qemu_mdb_pci_dev_type_step(mdb_walk_state_t * wsp)362 qemu_mdb_pci_dev_type_step(mdb_walk_state_t *wsp)
363 {
364 	PCIDevice dev;
365 
366 	if (wsp->walk_addr == (uintptr_t)NULL) {
367 		mdb_warn("found unexpected null device pointer");
368 		return (WALK_ERR);
369 	}
370 
371 	if (mdb_vread(&dev, sizeof (dev), wsp->walk_addr) != sizeof (dev)) {
372 		mdb_warn("failed to read device: %p", wsp->walk_addr);
373 		return (WALK_ERR);
374 	}
375 
376 	if (strcmp(wsp->walk_arg, dev.name) != 0)
377 		return (WALK_NEXT);
378 
379 	return (wsp->walk_callback(wsp->walk_addr, &dev, wsp->walk_cbdata));
380 }
381 
382 
383 /*
384  * XXX There is a subtle mdb memory leak here. We're duping the string name for
385  * the walkers as initial arguments so we can use it as a filter when doing the
386  * larger walk. This is fine, but right now we're being rather lazy and not
387  * cleaning up that these exist which means that we need some way to keep track
388  * of them at some point and free it when we unload.
389  */
390 /*ARGSUSED*/
391 static int
qemu_mdb_init_walkers(uintptr_t addr,const PCIDevice * d,void * ignored)392 qemu_mdb_init_walkers(uintptr_t addr, const PCIDevice *d, void *ignored)
393 {
394 	mdb_walker_t w;
395 	size_t len;
396 	char *ndup;
397 	char wname[64];
398 	char descr[64];
399 
400 	(void) mdb_snprintf(descr, sizeof (descr),
401 	    "walk the qemu %s devices", d->name);
402 	(void) mdb_snprintf(wname, sizeof (wname),
403 	    "qemu_%s", d->name);
404 
405 	/* Don't forget your null terminator */
406 	len = strlen(d->name) + 1;
407 	ndup = mdb_alloc(sizeof (char) * len, UM_SLEEP);
408 	(void) strcpy(ndup, d->name);
409 	w.walk_name = wname;
410 	w.walk_descr = descr;
411 	w.walk_init = qemu_mdb_pci_dev_type_init;
412 	w.walk_step = qemu_mdb_pci_dev_type_step;
413 	w.walk_fini = NULL;
414 	w.walk_init_arg = (void *)ndup;
415 
416 	/*
417 	 * XXX Normally this failure would be bad, but we're purposefully being
418 	 * lazy and recreating walkers with the same name as ones which already
419 	 * exist, e.g. when we have multiple devices of the same general type.
420 	 *
421 	 * Unfortunately, errno's aren't part of the module API so we have no
422 	 * way of distinguishing them. So we just swallow all of them for now.
423 	 */
424 	if (mdb_add_walker(&w) == -1)
425 		mdb_free(ndup, len * sizeof (char));
426 
427 	return (0);
428 }
429 
430 /*
431  * This is a generic function for different types of nics that exist. Walkers
432  * are created from this as part of mdb initialization.
433  */
434 static int
qemu_mdb_nic_state_walk_init(mdb_walk_state_t * wsp)435 qemu_mdb_nic_state_walk_init(mdb_walk_state_t *wsp)
436 {
437 	assert(wsp->walk_arg != NULL);
438 
439 	if (wsp->walk_addr != (uintptr_t)NULL) {
440 		mdb_warn("local walks are not supported\n");
441 		return (WALK_ERR);
442 	}
443 
444 	if (mdb_layered_walk("qemu_vlan_clients", wsp) == -1) {
445 		mdb_warn("failed to walk 'qemu_vlan_clients'");
446 		return (WALK_ERR);
447 	}
448 
449 	return (WALK_NEXT);
450 }
451 
452 static int
qemu_mdb_nic_state_walk_step(mdb_walk_state_t * wsp)453 qemu_mdb_nic_state_walk_step(mdb_walk_state_t *wsp)
454 {
455 	VLANClientState v;
456 	char buf[128];
457 
458 	assert(wsp->walk_addr != (uintptr_t)NULL);
459 
460 	if (mdb_vread(&v, sizeof (v), wsp->walk_addr) != sizeof (v)) {
461 		mdb_warn("failed to read VLANClient %p", wsp->walk_addr);
462 		return (WALK_ERR);
463 	}
464 
465 	if (mdb_readstr(buf, sizeof (buf), (uintptr_t)v.model) == -1) {
466 		mdb_warn("failed to read model string at %p", v.model);
467 		return (WALK_ERR);
468 	}
469 
470 	if (strcmp(wsp->walk_arg, buf) != 0)
471 		return (WALK_NEXT);
472 
473 	return (wsp->walk_callback(wsp->walk_addr, &v, wsp->walk_cbdata));
474 }
475 
476 #define	QEMU_N_NIC_TYPES	2
477 static char *qemu_nic_types[] = { "e1000", "vnic" };
478 
479 static int
qemu_init_nics(void)480 qemu_init_nics(void)
481 {
482 	const char *nic;
483 	mdb_walker_t w;
484 	char wname[64];
485 	char descr[64];
486 	int i;
487 
488 	nic = qemu_nic_types[0];
489 	for (i = 0; i < QEMU_N_NIC_TYPES; i++) {
490 		nic = qemu_nic_types[i];
491 		(void) mdb_snprintf(descr, sizeof (descr),
492 		    "walk the qemu %s nic state", nic);
493 		(void) mdb_snprintf(wname, sizeof (descr),
494 		    "qemu_nics_%s", nic);
495 		w.walk_name = wname;
496 		w.walk_descr = descr;
497 		w.walk_init = qemu_mdb_nic_state_walk_init;
498 		w.walk_step = qemu_mdb_nic_state_walk_step;
499 		w.walk_fini = NULL;
500 		w.walk_init_arg = (void *)nic;
501 		if (mdb_add_walker(&w) == -1)
502 			return (-1);
503 		nic++;
504 	}
505 
506 	return (0);
507 }
508 
509 static int
qemu_mdb_init(void)510 qemu_mdb_init(void)
511 {
512 	mdb_walker_t w = { "qemu_pci_device",
513 		"walk a PCI Bus's attached devices", qemu_mdb_pci_device_init,
514 		qemu_mdb_pci_device_step, NULL };
515 
516 	if (mdb_add_walker(&w) == -1) {
517 		mdb_warn("failed to add qemu_pci_device walker");
518 		return (-1);
519 	}
520 
521 	if (qemu_init_nics() == -1) {
522 		mdb_warn("failed to add nic state walkers");
523 		return (-1);
524 	}
525 
526 	(void) mdb_walk("qemu_pci_device", (mdb_walk_cb_t)qemu_mdb_init_walkers,
527 	    NULL);
528 
529 	return (0);
530 }
531 
532 static int
qemu_mdb_pcidev2virtio(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)533 qemu_mdb_pcidev2virtio(uintptr_t addr, uint_t flags, int argc,
534     const mdb_arg_t *argv)
535 {
536 	VirtIOPCIProxy v;
537 
538 	if (!(flags & DCMD_ADDRSPEC))
539 		return (DCMD_USAGE);
540 
541 	if (argc > 1)
542 		return (DCMD_USAGE);
543 
544 	if (mdb_vread(&v, sizeof (v), addr) != sizeof (v)) {
545 		mdb_warn("failed to read Virtio Proxy structure");
546 		return (DCMD_ERR);
547 	}
548 
549 	mdb_printf("%lr\n", v.vdev);
550 
551 	return (DCMD_OK);
552 }
553 
554 /*
555  * These are a series of definitions that we need for qemu_mdb_tpa2qva. Note
556  * that while most of them have the same name, unofrutnately qemu has a #pragma
557  * poinson on some of them that prevents us from using them without changing the
558  * name.
559  */
560 typedef struct PhysPageDesc {
561 	/* offset in host memory of the page + io_index in the low bits */
562 	ram_addr_t phys_offset;
563 	ram_addr_t region_offset;
564 } PhysPageDesc;
565 
566 #define	MDB_TARGET_PAGE_BITS 12
567 #define	MDB_TARGET_PAGE_SIZE (1 << MDB_TARGET_PAGE_BITS)
568 #define	MDB_TARGET_PAGE_MASK ~(MDB_TARGET_PAGE_SIZE - 1)
569 #define	TARGET_VIRT_ADDR_SPACE_BITS 47
570 #define	TARGET_PHYS_ADDR_SPACE_BITS 52
571 #define	L2_BITS 10
572 #define	L2_SIZE (1 << L2_BITS)
573 #define	P_L1_BITS_REM \
574 	((TARGET_PHYS_ADDR_SPACE_BITS - MDB_TARGET_PAGE_BITS) % L2_BITS)
575 #if P_L1_BITS_REM < 4
576 #define	P_L1_BITS  (P_L1_BITS_REM + L2_BITS)
577 #else
578 #define	P_L1_BITS  P_L1_BITS_REM
579 #endif
580 #define	P_L1_SIZE  ((uintptr_t)1 << P_L1_BITS)
581 #define	P_L1_SHIFT (TARGET_PHYS_ADDR_SPACE_BITS - MDB_TARGET_PAGE_BITS - \
582     P_L1_BITS)
583 
584 static uintptr_t
qemu_mdb_get_ram_ptr(uintptr_t addr)585 qemu_mdb_get_ram_ptr(uintptr_t addr)
586 {
587 	GElf_Sym sym;
588 	RAMList rl;
589 	uintptr_t rbp;
590 	RAMBlock rb;
591 
592 	if (mdb_lookup_by_name("ram_list", &sym) != 0) {
593 		mdb_warn("failed to look up ram_list");
594 		return (0);
595 	}
596 
597 	if (mdb_vread(&rl, sizeof (rl), sym.st_value) != sizeof (rl)) {
598 		mdb_warn("failed to read ram_list");
599 		return (0);
600 	}
601 
602 	rbp = (uintptr_t)rl.blocks.lh_first;
603 	for (;;) {
604 		if (rbp == (uintptr_t)NULL) {
605 			mdb_warn("failed to find RAMBlock for address");
606 			return (0);
607 		}
608 
609 		if (mdb_vread(&rb, sizeof (rb), rbp) != sizeof (rb)) {
610 			mdb_warn("failed to read RAMBlock %p", rbp);
611 			return (0);
612 		}
613 
614 		if (addr - rb.offset < rb.length)
615 			break;
616 
617 		rbp = (uintptr_t)rb.next.le_next;
618 	}
619 
620 	return ((uintptr_t)(rb.host + (addr - rb.offset)));
621 }
622 
623 static int
internal_tpa2qva(uintptr_t addr,uintptr_t * res)624 internal_tpa2qva(uintptr_t addr, uintptr_t *res)
625 {
626 	GElf_Sym sym;
627 	void **lp, **p;
628 	int ii;
629 	PhysPageDesc *pdp, pd;
630 	uintptr_t paddr, pfaddr, vptr;
631 
632 	if (mdb_lookup_by_name("l1_phys_map", &sym) != 0) {
633 		mdb_warn("unable to locate host_buse");
634 		return (DCMD_ERR);
635 	}
636 
637 	lp = (void **)sym.st_value;
638 	pfaddr = addr >> MDB_TARGET_PAGE_BITS;
639 	lp += ((pfaddr >> P_L1_SHIFT) & (P_L1_SIZE - 1));
640 
641 	for (ii = P_L1_SHIFT / L2_BITS - 1; ii > 0; ii--) {
642 		if (mdb_vread(&p, sizeof (p), (uintptr_t)lp) != sizeof (p)) {
643 			mdb_warn("failed to read into l1 page table");
644 			return (DCMD_ERR);
645 		}
646 
647 		if (p == NULL) {
648 			mdb_warn("found a null entry, bailing");
649 			return (DCMD_ERR);
650 		}
651 
652 		lp = p + ((pfaddr >> (ii * L2_BITS)) & (L2_SIZE - 1));
653 	}
654 
655 	if (mdb_vread(&pdp, sizeof (pdp), (uintptr_t)lp) != sizeof (pdp)) {
656 		mdb_warn("failed to read into the PhysPageDesc");
657 		return (DCMD_ERR);
658 	}
659 
660 	if (pdp == NULL) {
661 		mdb_warn("found null PhysPageDesc, bailing");
662 		return (DCMD_ERR);
663 	}
664 
665 	pdp += (pfaddr & (L2_SIZE - 1));
666 	if (mdb_vread(&pd, sizeof (pd), (uintptr_t)pdp) != sizeof (pd)) {
667 		mdb_warn("failed to read pdp");
668 		return (DCMD_ERR);
669 	}
670 
671 	paddr = pd.phys_offset;
672 	if ((paddr & ~MDB_TARGET_PAGE_MASK) > IO_MEM_ROM &&
673 	    !(paddr & IO_MEM_ROMD)) {
674 		mdb_printf("Address is in I/O space. Not touching it.");
675 		return (DCMD_OK);
676 	}
677 
678 	vptr = qemu_mdb_get_ram_ptr(paddr & MDB_TARGET_PAGE_MASK);
679 	if (vptr == 0)
680 		return (DCMD_ERR);
681 	vptr += addr & ~MDB_TARGET_PAGE_MASK;
682 	*res = vptr;
683 
684 	return (DCMD_OK);
685 }
686 
687 static int
qemu_mdb_tpa2qva(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)688 qemu_mdb_tpa2qva(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
689 {
690 	uintptr_t vptr;
691 
692 	if (!(flags & DCMD_ADDRSPEC))
693 		return (DCMD_USAGE);
694 
695 	if (argc > 1)
696 		return (DCMD_USAGE);
697 
698 	if (internal_tpa2qva(addr, &vptr) != DCMD_OK)
699 		return (DCMD_ERR);
700 
701 	mdb_printf("%lr\n", vptr);
702 
703 	return (DCMD_OK);
704 }
705 
706 static int
qemu_mdb_vrused(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)707 qemu_mdb_vrused(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
708 {
709 	VRing ring;
710 	uintptr_t avaddr;
711 	uint16_t index;
712 
713 	if (!(flags & DCMD_ADDRSPEC))
714 		return (DCMD_USAGE);
715 
716 	if (argc > 1)
717 		return (DCMD_USAGE);
718 
719 	if (mdb_vread(&ring, sizeof (ring), addr) != sizeof (ring)) {
720 		mdb_warn("failed to read VRing");
721 		return (DCMD_ERR);
722 	}
723 
724 	if (internal_tpa2qva(ring.avail, &avaddr) != DCMD_OK) {
725 		mdb_warn("failed to translate available ring to VA");
726 		return (DCMD_ERR);
727 	}
728 
729 	/* Account for offset */
730 	avaddr += ring.num * sizeof (uint16_t) + 0x4;
731 	if (mdb_vread(&index, sizeof (index), avaddr) != sizeof (index)) {
732 		mdb_warn("failed to read index value");
733 		return (DCMD_ERR);
734 	}
735 
736 	mdb_printf("%lr\n", index);
737 	return (DCMD_OK);
738 }
739 
740 static int
qemu_mdb_vravail(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)741 qemu_mdb_vravail(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
742 {
743 	VRing ring;
744 	uintptr_t avaddr;
745 	uint16_t index;
746 
747 	if (!(flags & DCMD_ADDRSPEC))
748 		return (DCMD_USAGE);
749 
750 	if (argc > 1)
751 		return (DCMD_USAGE);
752 
753 	if (mdb_vread(&ring, sizeof (ring), addr) != sizeof (ring)) {
754 		mdb_warn("failed to read VRing");
755 		return (DCMD_ERR);
756 	}
757 
758 	if (internal_tpa2qva(ring.used, &avaddr) != DCMD_OK) {
759 		mdb_warn("failed to translate available ring to VA");
760 		return (DCMD_ERR);
761 	}
762 
763 	/* Account for offset */
764 	avaddr += ring.num * sizeof (uint64_t) + 0x4;
765 	if (mdb_vread(&index, sizeof (index), avaddr) != sizeof (index)) {
766 		mdb_warn("failed to read index value");
767 		return (DCMD_ERR);
768 	}
769 
770 	mdb_printf("%lr\n", index);
771 	return (DCMD_OK);
772 }
773 
774 static const char *reintostr[] = {
775 	"INJECT",
776 	"DEADMAN",
777 	"RUN"
778 };
779 
780 static int
qemu_mdb_nic_reinject(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)781 qemu_mdb_nic_reinject(uintptr_t addr, uint_t flags, int argc,
782     const mdb_arg_t *argv)
783 {
784 	VirtIONet *n;
785 	uint32_t ii, end;
786 	rein_event_t *rep;
787 
788 	if (!(flags & DCMD_ADDRSPEC))
789 		return (DCMD_USAGE);
790 
791 	if (argc > 1)
792 		return (DCMD_USAGE);
793 
794 	n = mdb_alloc(sizeof (VirtIONet), UM_SLEEP | UM_GC);
795 
796 	if (mdb_vread(n, sizeof (VirtIONet), addr) != sizeof (VirtIONet)) {
797 		mdb_warn("failed to read VirtIONet");
798 		return (DCMD_ERR);
799 	}
800 
801 	if (n->rein_ring_idx == 0)
802 		end = REIN_RING_MAX;
803 	else
804 		end = n->rein_ring_idx - 1;
805 
806 	mdb_printf("%-?s %-10s %s\n", "TIMESTAMP", "ACTION", "OTHER");
807 	ii = n->rein_ring_idx;
808 	for (;;) {
809 		rep = n->rein_ring + ii;
810 		if (rep->re_time == 0 && rep->re_other == 0)
811 			break;
812 
813 		mdb_printf("%-?p %-10s ", rep->re_time, reintostr[rep->re_act]);
814 		if (rep->re_other == 0)
815 			mdb_printf("\n", " - ");
816 		else
817 			mdb_printf("%d\n", rep->re_other);
818 		if (ii + 1 == end)
819 			break;
820 		ii = (ii + 1) % REIN_RING_MAX;
821 	}
822 
823 	return (DCMD_OK);
824 }
825 
826 
827 static int
qemu_mdb_ramblock_walk_init(mdb_walk_state_t * wsp)828 qemu_mdb_ramblock_walk_init(mdb_walk_state_t *wsp)
829 {
830 	GElf_Sym sym;
831 	RAMList rl;
832 
833 	if (wsp->walk_addr != (uintptr_t)NULL) {
834 		mdb_warn("qemu_ramblock does not support local walks\n");
835 		return (WALK_ERR);
836 	}
837 
838 	if (mdb_lookup_by_name("ram_list", &sym) == -1) {
839 		mdb_warn("lookup_by_name failed to find ram_list");
840 		return (WALK_ERR);
841 	}
842 
843 	if (mdb_vread(&rl, sizeof (rl), sym.st_value) != sizeof (rl)) {
844 		mdb_warn("failed to read ram_list");
845 		return (WALK_ERR);
846 	}
847 
848 	wsp->walk_addr = (uintptr_t)rl.blocks.lh_first;
849 	if (wsp->walk_addr == (uintptr_t)NULL)
850 		return (WALK_DONE);
851 
852 	return (WALK_NEXT);
853 }
854 
855 static int
qemu_mdb_ramblock_walk_step(mdb_walk_state_t * wsp)856 qemu_mdb_ramblock_walk_step(mdb_walk_state_t *wsp)
857 {
858 	RAMBlock rb;
859 	uintptr_t addr = wsp->walk_addr;
860 
861 	if (addr == (uintptr_t)NULL)
862 		return (WALK_DONE);
863 
864 	if (mdb_vread(&rb, sizeof (rb), addr) != sizeof (rb)) {
865 		mdb_warn("failed to read RAMBlock %p", addr);
866 		return (WALK_ERR);
867 	}
868 
869 	wsp->walk_addr = (uintptr_t)rb.next.le_next;
870 
871 	return (wsp->walk_callback(addr, &rb, wsp->walk_cbdata));
872 }
873 
874 static int
qemu_mdb_biosptr_cb(uintptr_t addr,const RAMBlock * rb,void * v)875 qemu_mdb_biosptr_cb(uintptr_t addr, const RAMBlock *rb, void *v)
876 {
877 	uintptr_t *res = v;
878 	if (strcmp("pc.bios", rb->idstr) == 0)
879 		*res = (uintptr_t)rb->host;
880 
881 	return (0);
882 }
883 
884 
885 static int
qemu_mdb_biosptr(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)886 qemu_mdb_biosptr(uintptr_t addr, uint_t flags, int argc,
887     const mdb_arg_t *argv)
888 {
889 	uintptr_t out = -1;
890 
891 	if (flags & DCMD_ADDRSPEC)
892 		return (DCMD_USAGE);
893 
894 	if (argc > 1)
895 		return (DCMD_USAGE);
896 
897 	mdb_walk("qemu_ramblock", (mdb_walk_cb_t)qemu_mdb_biosptr_cb,
898 	    (void *)&out);
899 
900 	if (out == -1) {
901 		mdb_warn("failed to find pc.bios\n");
902 		return (DCMD_ERR);
903 	}
904 
905 	mdb_printf("%x\n", out);
906 
907 	return (DCMD_OK);
908 }
909 
910 /*
911  * QEMU uses an anonymous structure for the start of the vlans. Which is really
912  * not as nice as it could be. As such we replicate that here.
913  */
914 typedef struct qemu_vlan_header {
915 	struct VLANState *tqh_first;
916 	struct VLANState **tqh_last;
917 } qemu_vlan_header_t;
918 
919 static int
qemu_mdb_vlan_walk_init(mdb_walk_state_t * wsp)920 qemu_mdb_vlan_walk_init(mdb_walk_state_t *wsp)
921 {
922 	GElf_Sym sym;
923 	qemu_vlan_header_t v;
924 
925 	if (wsp->walk_addr != (uintptr_t)NULL) {
926 		mdb_warn("qemu_vlan does not support local walks\n");
927 		return (WALK_ERR);
928 	}
929 
930 	if (mdb_lookup_by_name("vlans", &sym) == -1) {
931 		mdb_warn("lookup_by_name failed to find vlans");
932 		return (WALK_ERR);
933 	}
934 
935 	if (mdb_vread(&v, sizeof (v), sym.st_value) != sizeof (v)) {
936 		mdb_warn("failed to read vlan header");
937 		return (WALK_ERR);
938 	}
939 
940 	wsp->walk_addr = (uintptr_t)v.tqh_first;
941 
942 	return (WALK_NEXT);
943 }
944 
945 static int
qemu_mdb_vlan_walk_step(mdb_walk_state_t * wsp)946 qemu_mdb_vlan_walk_step(mdb_walk_state_t *wsp)
947 {
948 	VLANState v;
949 	uintptr_t addr = wsp->walk_addr;
950 
951 	if (addr == (uintptr_t)NULL)
952 		return (WALK_DONE);
953 
954 	if (mdb_vread(&v, sizeof (v), addr) != sizeof (v)) {
955 		mdb_warn("failed to read the VLanState %p", addr);
956 		return (WALK_ERR);
957 	}
958 
959 	wsp->walk_addr = (uintptr_t)v.next.tqe_next;
960 
961 	return (wsp->walk_callback(addr, &v, wsp->walk_cbdata));
962 }
963 
964 static int
qemu_mdb_vlan_clients_walk_init(mdb_walk_state_t * wsp)965 qemu_mdb_vlan_clients_walk_init(mdb_walk_state_t *wsp)
966 {
967 	if (wsp->walk_addr != (uintptr_t)NULL) {
968 		mdb_warn("qemu_vlan does not support local walks\n");
969 		return (WALK_ERR);
970 	}
971 
972 	if (mdb_layered_walk("qemu_vlans", wsp) == -1) {
973 		mdb_warn("couldn't walk 'qemu_vlans'");
974 		return (WALK_ERR);
975 	}
976 
977 	return (WALK_NEXT);
978 }
979 
980 static int
qemu_mdb_vlan_clients_walk_step(mdb_walk_state_t * wsp)981 qemu_mdb_vlan_clients_walk_step(mdb_walk_state_t *wsp)
982 {
983 	uintptr_t addr;
984 	VLANClientState v;
985 	int rval;
986 
987 	addr = (uintptr_t)((VLANState *)wsp->walk_layer)->clients.tqh_first;
988 
989 	while (addr != (uintptr_t)NULL) {
990 		if (mdb_vread(&v, sizeof (v), addr) != sizeof (v)) {
991 			mdb_warn("couldn't read VLANClient at %p", addr);
992 			return (WALK_ERR);
993 		}
994 
995 		rval = wsp->walk_callback(addr, &v, wsp->walk_cbdata);
996 
997 		if (rval != WALK_NEXT)
998 			return (rval);
999 
1000 		addr = (uintptr_t)v.next.tqe_next;
1001 	}
1002 
1003 	return (WALK_NEXT);
1004 }
1005 
1006 
1007 static const mdb_dcmd_t qemu_dcmds[] = {
1008 	{ "pcidev2virtio", NULL, "translate a virtio PCI device to its "
1009 		"virtio equivalent", qemu_mdb_pcidev2virtio },
1010 	{ "qemu_tpa2qva", NULL, "translate a target physical address to a "
1011 		"QEMU virtual address", qemu_mdb_tpa2qva },
1012 	{ "qemu_vrused", NULL, "Spit out the used event of the vring",
1013 		qemu_mdb_vrused },
1014 	{ "qemu_vravail", NULL, "Spit out the avail event of the vring",
1015 		qemu_mdb_vravail },
1016 	{ "qemu_nic_reinject", NULL, "Print all of the reinject events",
1017 		qemu_mdb_nic_reinject },
1018 	{ "qemu_biosptr", NULL, "Spit out a pointer to the bios memory",
1019 		qemu_mdb_biosptr },
1020 	{ NULL }
1021 };
1022 
1023 static const mdb_walker_t qemu_walkers[] = {
1024 	{ "qemu_host_bus", "walk qemu PCIHostBus structures",
1025 		qemu_mdb_host_bus_init, qemu_mdb_host_bus_step, NULL },
1026 	{ "qemu_ramblock", "walk qemu ramblock structures",
1027 		qemu_mdb_ramblock_walk_init, qemu_mdb_ramblock_walk_step,
1028 		NULL },
1029 	{ "qemu_vlans", "walk qemu vlan structures",
1030 		qemu_mdb_vlan_walk_init, qemu_mdb_vlan_walk_step, NULL },
1031 	{ "qemu_vlan_clients", "walk qemu vlan client structures",
1032 		qemu_mdb_vlan_clients_walk_init,
1033 		qemu_mdb_vlan_clients_walk_step, NULL },
1034 	{ NULL }
1035 };
1036 
1037 static const mdb_modinfo_t qemu_mdb_modinfo = { MDB_API_VERSION, qemu_dcmds,
1038 	qemu_walkers };
1039 
1040 const mdb_modinfo_t *
_mdb_init(void)1041 _mdb_init(void)
1042 {
1043 	if (qemu_mdb_init() != 0)
1044 		return (NULL);
1045 
1046 	return (&qemu_mdb_modinfo);
1047 }
1048