xref: /illumos-kvm-cmd/target-i386/kvm.c (revision e0ffedd6)
1 /*
2  * QEMU KVM support
3  *
4  * Copyright (C) 2006-2008 Qumranet Technologies
5  * Copyright IBM, Corp. 2008
6  *
7  * Authors:
8  *  Anthony Liguori   <aliguori@us.ibm.com>
9  *
10  * This work is licensed under the terms of the GNU GPL, version 2 or later.
11  * See the COPYING file in the top-level directory.
12  *
13  * Portions Copyright 2018 Joyent, Inc.
14  */
15 
16 #include <sys/types.h>
17 #include <sys/ioctl.h>
18 #include <sys/mman.h>
19 #include <sys/utsname.h>
20 
21 #ifdef __sun__
22 #include <sys/kvm.h>
23 #else
24 #include <linux/kvm.h>
25 #endif
26 
27 #include "qemu-common.h"
28 #include "sysemu.h"
29 #include "kvm.h"
30 #include "cpu.h"
31 #include "gdbstub.h"
32 #include "host-utils.h"
33 #include "hw/pc.h"
34 #include "hw/apic.h"
35 #include "ioport.h"
36 #include "kvm_x86.h"
37 
38 #ifdef CONFIG_KVM_PARA
39 #include <sys/kvm_para.h>
40 #endif
41 
42 #ifdef __sun__
43 #define	__u64	uint64_t
44 #endif
45 
46 //
47 //#define DEBUG_KVM
48 
49 #ifdef DEBUG_KVM
50 #define DPRINTF(fmt, ...) \
51     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
52 #else
53 #define DPRINTF(fmt, ...) \
54     do { } while (0)
55 #endif
56 
57 #define MSR_KVM_WALL_CLOCK  0x11
58 #define MSR_KVM_SYSTEM_TIME 0x12
59 
60 #ifndef BUS_MCEERR_AR
61 #define BUS_MCEERR_AR 4
62 #endif
63 #ifndef BUS_MCEERR_AO
64 #define BUS_MCEERR_AO 5
65 #endif
66 
67 #ifdef OBSOLETE_KVM_IMPL
68 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
69     KVM_CAP_INFO(SET_TSS_ADDR),
70     KVM_CAP_INFO(EXT_CPUID),
71     KVM_CAP_INFO(MP_STATE),
72     KVM_CAP_LAST_INFO
73 };
74 #endif
75 
76 static bool has_msr_star;
77 static bool has_msr_hsave_pa;
78 #if defined(CONFIG_KVM_PARA) && defined(KVM_CAP_ASYNC_PF)
79 static bool has_msr_async_pf_en;
80 #endif
81 static int lm_capable_kernel;
82 
try_get_cpuid(KVMState * s,int max)83 static struct kvm_cpuid2 *try_get_cpuid(KVMState *s, int max)
84 {
85     struct kvm_cpuid2 *cpuid;
86     int r, size;
87 
88     size = sizeof(*cpuid) + max * sizeof(*cpuid->entries);
89     cpuid = (struct kvm_cpuid2 *)qemu_mallocz(size);
90     cpuid->nent = max;
91     r = kvm_ioctl(s, KVM_GET_SUPPORTED_CPUID, cpuid);
92     if (r == 0 && cpuid->nent >= max) {
93         r = -E2BIG;
94     }
95     if (r < 0) {
96         if (r == -E2BIG) {
97             qemu_free(cpuid);
98             return NULL;
99         } else {
100             fprintf(stderr, "KVM_GET_SUPPORTED_CPUID failed: %s\n",
101                     strerror(-r));
102             exit(1);
103         }
104     }
105     return cpuid;
106 }
107 
kvm_arch_get_supported_cpuid(CPUState * env,uint32_t function,uint32_t index,int reg)108 uint32_t kvm_arch_get_supported_cpuid(CPUState *env, uint32_t function,
109                                       uint32_t index, int reg)
110 {
111     struct kvm_cpuid2 *cpuid;
112     int i, max;
113     uint32_t ret = 0;
114     uint32_t cpuid_1_edx;
115 
116     max = 1;
117     while ((cpuid = try_get_cpuid(env->kvm_state, max)) == NULL) {
118         max *= 2;
119     }
120 
121     for (i = 0; i < cpuid->nent; ++i) {
122         if (cpuid->entries[i].function == function &&
123             cpuid->entries[i].index == index) {
124             switch (reg) {
125             case R_EAX:
126                 ret = cpuid->entries[i].eax;
127                 break;
128             case R_EBX:
129                 ret = cpuid->entries[i].ebx;
130                 break;
131             case R_ECX:
132                 ret = cpuid->entries[i].ecx;
133                 break;
134             case R_EDX:
135                 ret = cpuid->entries[i].edx;
136                 switch (function) {
137                 case 1:
138                     /* KVM before 2.6.30 misreports the following features */
139                     ret |= CPUID_MTRR | CPUID_PAT | CPUID_MCE | CPUID_MCA;
140                     break;
141                 case 0x80000001:
142                     /* On Intel, kvm returns cpuid according to the Intel spec,
143                      * so add missing bits according to the AMD spec:
144                      */
145                     cpuid_1_edx = kvm_arch_get_supported_cpuid(env, 1, 0, R_EDX);
146                     ret |= cpuid_1_edx & 0x183f7ff;
147                     break;
148                 }
149                 break;
150             }
151         }
152     }
153 
154     qemu_free(cpuid);
155 
156     return ret;
157 }
158 
159 #ifdef CONFIG_KVM_PARA
160 struct kvm_para_features {
161     int cap;
162     int feature;
163 } para_features[] = {
164     /*
165      * This is a little hackish.  Upstream KVM has been refactored to make the
166      * process of exposing these related features more straightforward.  Rather
167      * than pull in all that delta, we just repeat the loop to set the
168      * additional needed bits.
169      */
170     { KVM_CAP_CLOCKSOURCE, KVM_FEATURE_CLOCKSOURCE },
171     { KVM_CAP_CLOCKSOURCE, KVM_FEATURE_CLOCKSOURCE2 },
172     { KVM_CAP_CLOCKSOURCE, KVM_FEATURE_CLOCKSOURCE_STABLE_BIT },
173 
174     { KVM_CAP_NOP_IO_DELAY, KVM_FEATURE_NOP_IO_DELAY },
175     { KVM_CAP_PV_MMU, KVM_FEATURE_MMU_OP },
176 #ifdef KVM_CAP_ASYNC_PF
177     { KVM_CAP_ASYNC_PF, KVM_FEATURE_ASYNC_PF },
178 #endif
179     { -1, -1 }
180 };
181 
get_para_features(CPUState * env)182 static int get_para_features(CPUState *env)
183 {
184     int i, features = 0;
185 
186     for (i = 0; i < ARRAY_SIZE(para_features) - 1; i++) {
187         if (kvm_check_extension(env->kvm_state, para_features[i].cap)) {
188             features |= (1 << para_features[i].feature);
189         }
190     }
191 #ifdef KVM_CAP_ASYNC_PF
192     has_msr_async_pf_en = features & (1 << KVM_FEATURE_ASYNC_PF);
193 #endif
194     return features;
195 }
196 #endif
197 
198 #ifdef KVM_CAP_MCE
kvm_get_mce_cap_supported(KVMState * s,uint64_t * mce_cap,int * max_banks)199 static int kvm_get_mce_cap_supported(KVMState *s, uint64_t *mce_cap,
200                                      int *max_banks)
201 {
202     int r;
203 
204     r = kvm_check_extension(s, KVM_CAP_MCE);
205     if (r > 0) {
206         *max_banks = r;
207         return kvm_ioctl(s, KVM_X86_GET_MCE_CAP_SUPPORTED, mce_cap);
208     }
209     return -ENOSYS;
210 }
211 
kvm_setup_mce(CPUState * env,uint64_t * mcg_cap)212 static int kvm_setup_mce(CPUState *env, uint64_t *mcg_cap)
213 {
214     return kvm_vcpu_ioctl(env, KVM_X86_SETUP_MCE, mcg_cap);
215 }
216 
kvm_set_mce(CPUState * env,struct kvm_x86_mce * m)217 static int kvm_set_mce(CPUState *env, struct kvm_x86_mce *m)
218 {
219     return kvm_vcpu_ioctl(env, KVM_X86_SET_MCE, m);
220 }
221 
kvm_get_msr(CPUState * env,struct kvm_msr_entry * msrs,int n)222 static int kvm_get_msr(CPUState *env, struct kvm_msr_entry *msrs, int n)
223 {
224     struct kvm_msrs *kmsrs = qemu_malloc(sizeof *kmsrs + n * sizeof *msrs);
225     int r;
226 
227     kmsrs->nmsrs = n;
228     memcpy(kmsrs->entries, msrs, n * sizeof *msrs);
229     r = kvm_vcpu_ioctl(env, KVM_GET_MSRS, kmsrs);
230     memcpy(msrs, kmsrs->entries, n * sizeof *msrs);
231     free(kmsrs);
232     return r;
233 }
234 
235 /* FIXME: kill this and kvm_get_msr, use env->mcg_status instead */
kvm_mce_in_progress(CPUState * env)236 static int kvm_mce_in_progress(CPUState *env)
237 {
238     struct kvm_msr_entry msr_mcg_status = {
239         .index = MSR_MCG_STATUS,
240     };
241     int r;
242 
243     r = kvm_get_msr(env, &msr_mcg_status, 1);
244     if (r == -1 || r == 0) {
245         fprintf(stderr, "Failed to get MCE status\n");
246         return 0;
247     }
248     return !!(msr_mcg_status.data & MCG_STATUS_MCIP);
249 }
250 
251 struct kvm_x86_mce_data
252 {
253     CPUState *env;
254     struct kvm_x86_mce *mce;
255     int abort_on_error;
256 };
257 
kvm_do_inject_x86_mce(void * _data)258 static void kvm_do_inject_x86_mce(void *_data)
259 {
260     struct kvm_x86_mce_data *data = _data;
261     int r;
262 
263     /* If there is an MCE exception being processed, ignore this SRAO MCE */
264     if ((data->env->mcg_cap & MCG_SER_P) &&
265         !(data->mce->status & MCI_STATUS_AR)) {
266         if (kvm_mce_in_progress(data->env)) {
267             return;
268         }
269     }
270 
271     r = kvm_set_mce(data->env, data->mce);
272     if (r < 0) {
273         perror("kvm_set_mce FAILED");
274         if (data->abort_on_error) {
275             abort();
276         }
277     }
278 }
279 
kvm_inject_x86_mce_on(CPUState * env,struct kvm_x86_mce * mce,int flag)280 static void kvm_inject_x86_mce_on(CPUState *env, struct kvm_x86_mce *mce,
281                                   int flag)
282 {
283     struct kvm_x86_mce_data data = {
284         .env = env,
285         .mce = mce,
286         .abort_on_error = (flag & ABORT_ON_ERROR),
287     };
288 
289     if (!env->mcg_cap) {
290         fprintf(stderr, "MCE support is not enabled!\n");
291         return;
292     }
293 
294     on_vcpu(env, kvm_do_inject_x86_mce, &data);
295 }
296 
297 static void kvm_mce_broadcast_rest(CPUState *env);
298 #endif
299 
kvm_inject_x86_mce(CPUState * cenv,int bank,uint64_t status,uint64_t mcg_status,uint64_t addr,uint64_t misc,int flag)300 void kvm_inject_x86_mce(CPUState *cenv, int bank, uint64_t status,
301                         uint64_t mcg_status, uint64_t addr, uint64_t misc,
302                         int flag)
303 {
304 #ifdef KVM_CAP_MCE
305     struct kvm_x86_mce mce = {
306         .bank = bank,
307         .status = status,
308         .mcg_status = mcg_status,
309         .addr = addr,
310         .misc = misc,
311     };
312 
313     if (flag & MCE_BROADCAST) {
314         kvm_mce_broadcast_rest(cenv);
315     }
316 
317     kvm_inject_x86_mce_on(cenv, &mce, flag);
318 #else
319     if (flag & ABORT_ON_ERROR) {
320         abort();
321     }
322 #endif
323 }
324 
325 static int _kvm_arch_init_vcpu(CPUState *env);
326 
kvm_arch_init_vcpu(CPUState * env)327 int kvm_arch_init_vcpu(CPUState *env)
328 {
329     int r;
330     struct {
331         struct kvm_cpuid2 cpuid;
332         struct kvm_cpuid_entry2 entries[100];
333     } __attribute__((packed)) cpuid_data;
334     uint32_t limit, i, j, cpuid_i;
335     uint32_t unused;
336     struct kvm_cpuid_entry2 *c;
337 #ifdef CONFIG_KVM_PARA
338     uint32_t signature[3];
339 #endif
340 
341     r = _kvm_arch_init_vcpu(env);
342     if (r < 0) {
343         return r;
344     }
345 
346     env->cpuid_features &= kvm_arch_get_supported_cpuid(env, 1, 0, R_EDX);
347 
348     i = env->cpuid_ext_features & CPUID_EXT_HYPERVISOR;
349     env->cpuid_ext_features &= kvm_arch_get_supported_cpuid(env, 1, 0, R_ECX);
350     env->cpuid_ext_features |= i;
351 
352     env->cpuid_ext2_features &= kvm_arch_get_supported_cpuid(env, 0x80000001,
353                                                              0, R_EDX);
354     env->cpuid_ext3_features &= kvm_arch_get_supported_cpuid(env, 0x80000001,
355                                                              0, R_ECX);
356     env->cpuid_svm_features  &= kvm_arch_get_supported_cpuid(env, 0x8000000A,
357                                                              0, R_EDX);
358 
359 
360     cpuid_i = 0;
361 
362 #ifdef CONFIG_KVM_PARA
363     /* Paravirtualization CPUIDs */
364     memcpy(signature, "KVMKVMKVM\0\0\0", 12);
365     c = &cpuid_data.entries[cpuid_i++];
366     memset(c, 0, sizeof(*c));
367     c->function = KVM_CPUID_SIGNATURE;
368     c->eax = KVM_CPUID_FEATURES;
369     c->ebx = signature[0];
370     c->ecx = signature[1];
371     c->edx = signature[2];
372 
373     c = &cpuid_data.entries[cpuid_i++];
374     memset(c, 0, sizeof(*c));
375     c->function = KVM_CPUID_FEATURES;
376     c->eax = env->cpuid_kvm_features & get_para_features(env);
377 #endif
378 
379     cpu_x86_cpuid(env, 0, 0, &limit, &unused, &unused, &unused);
380 
381     for (i = 0; i <= limit; i++) {
382         c = &cpuid_data.entries[cpuid_i++];
383 
384         switch (i) {
385         case 2: {
386             /* Keep reading function 2 till all the input is received */
387             int times;
388 
389             c->function = i;
390             c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC |
391                        KVM_CPUID_FLAG_STATE_READ_NEXT;
392             cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
393             times = c->eax & 0xff;
394 
395             for (j = 1; j < times; ++j) {
396                 c = &cpuid_data.entries[cpuid_i++];
397                 c->function = i;
398                 c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC;
399                 cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
400             }
401             break;
402         }
403         case 4:
404         case 0xb:
405         case 0xd:
406             for (j = 0; ; j++) {
407                 c->function = i;
408                 c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
409                 c->index = j;
410                 cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx);
411 
412                 if (i == 4 && c->eax == 0) {
413                     break;
414                 }
415                 if (i == 0xb && !(c->ecx & 0xff00)) {
416                     break;
417                 }
418                 if (i == 0xd && c->eax == 0) {
419                     break;
420                 }
421                 c = &cpuid_data.entries[cpuid_i++];
422             }
423             break;
424         default:
425             c->function = i;
426             c->flags = 0;
427             cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
428             break;
429         }
430     }
431     cpu_x86_cpuid(env, 0x80000000, 0, &limit, &unused, &unused, &unused);
432 
433     for (i = 0x80000000; i <= limit; i++) {
434         c = &cpuid_data.entries[cpuid_i++];
435 
436         c->function = i;
437         c->flags = 0;
438         cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
439     }
440 
441     cpuid_data.cpuid.nent = cpuid_i;
442 
443 #ifdef KVM_CAP_MCE
444     if (((env->cpuid_version >> 8)&0xF) >= 6
445         && (env->cpuid_features&(CPUID_MCE|CPUID_MCA)) == (CPUID_MCE|CPUID_MCA)
446         && kvm_check_extension(env->kvm_state, KVM_CAP_MCE) > 0) {
447         uint64_t mcg_cap;
448         int banks;
449 
450         if (kvm_get_mce_cap_supported(env->kvm_state, &mcg_cap, &banks)) {
451             perror("kvm_get_mce_cap_supported FAILED");
452         } else {
453             if (banks > MCE_BANKS_DEF)
454                 banks = MCE_BANKS_DEF;
455             mcg_cap &= MCE_CAP_DEF;
456             mcg_cap |= banks;
457             if (kvm_setup_mce(env, &mcg_cap)) {
458                 perror("kvm_setup_mce FAILED");
459             } else {
460                 env->mcg_cap = mcg_cap;
461             }
462         }
463     }
464 #endif
465 
466     return kvm_vcpu_ioctl(env, KVM_SET_CPUID2, &cpuid_data);
467 }
468 
kvm_clear_vapic(CPUState * env)469 static void kvm_clear_vapic(CPUState *env)
470 {
471 #ifdef KVM_SET_VAPIC_ADDR
472     struct kvm_vapic_addr va = {
473         .vapic_addr = 0,
474     };
475 
476     kvm_vcpu_ioctl(env, KVM_SET_VAPIC_ADDR, &va);
477 #endif
478 }
479 
kvm_arch_reset_vcpu(CPUState * env)480 void kvm_arch_reset_vcpu(CPUState *env)
481 {
482     kvm_clear_vapic(env);
483     env->exception_injected = -1;
484     env->interrupt_injected = -1;
485     env->xcr0 = 1;
486     if (kvm_irqchip_in_kernel()) {
487         env->mp_state = cpu_is_bsp(env) ? KVM_MP_STATE_RUNNABLE :
488                                           KVM_MP_STATE_UNINITIALIZED;
489     } else {
490         env->mp_state = KVM_MP_STATE_RUNNABLE;
491     }
492 }
493 
494 
kvm_get_supported_msrs(KVMState * s)495 static int kvm_get_supported_msrs(KVMState *s)
496 {
497     static int kvm_supported_msrs;
498     int ret = 0;
499 
500     /* first time */
501     if (kvm_supported_msrs == 0) {
502         struct kvm_msr_list msr_list, *kvm_msr_list;
503 
504         kvm_supported_msrs = -1;
505 
506         /* Obtain MSR list from KVM.  These are the MSRs that we must
507          * save/restore */
508         msr_list.nmsrs = 0;
509         ret = kvm_ioctl(s, KVM_GET_MSR_INDEX_LIST, &msr_list);
510         if (ret < 0 && ret != -E2BIG) {
511             return ret;
512         }
513         /* Old kernel modules had a bug and could write beyond the provided
514            memory. Allocate at least a safe amount of 1K. */
515         kvm_msr_list = qemu_mallocz(MAX(1024, sizeof(msr_list) +
516                                               msr_list.nmsrs *
517                                               sizeof(msr_list.indices[0])));
518 
519         kvm_msr_list->nmsrs = msr_list.nmsrs;
520         ret = kvm_ioctl(s, KVM_GET_MSR_INDEX_LIST, kvm_msr_list);
521         if (ret >= 0) {
522             int i;
523 
524             for (i = 0; i < kvm_msr_list->nmsrs; i++) {
525                 if (kvm_msr_list->indices[i] == MSR_STAR) {
526                     has_msr_star = true;
527                     continue;
528                 }
529                 if (kvm_msr_list->indices[i] == MSR_VM_HSAVE_PA) {
530                     has_msr_hsave_pa = true;
531                     continue;
532                 }
533             }
534         }
535 
536         free(kvm_msr_list);
537     }
538 
539     return ret;
540 }
541 
542 #ifdef OBSOLETE_KVM_IMPL
543 
kvm_arch_init(KVMState * s)544 int kvm_arch_init(KVMState *s)
545 {
546     uint64_t identity_base = 0xfffbc000;
547     int ret;
548     struct utsname utsname;
549 
550     ret = kvm_get_supported_msrs(s);
551     if (ret < 0) {
552         return ret;
553     }
554 
555     uname(&utsname);
556     lm_capable_kernel = strcmp(utsname.machine, "x86_64") == 0;
557 
558     /*
559      * On older Intel CPUs, KVM uses vm86 mode to emulate 16-bit code directly.
560      * In order to use vm86 mode, an EPT identity map and a TSS  are needed.
561      * Since these must be part of guest physical memory, we need to allocate
562      * them, both by setting their start addresses in the kernel and by
563      * creating a corresponding e820 entry. We need 4 pages before the BIOS.
564      *
565      * Older KVM versions may not support setting the identity map base. In
566      * that case we need to stick with the default, i.e. a 256K maximum BIOS
567      * size.
568      */
569 #ifdef KVM_CAP_SET_IDENTITY_MAP_ADDR
570     if (kvm_check_extension(s, KVM_CAP_SET_IDENTITY_MAP_ADDR)) {
571         /* Allows up to 16M BIOSes. */
572         identity_base = 0xfeffc000;
573 
574         ret = kvm_vm_ioctl(s, KVM_SET_IDENTITY_MAP_ADDR, &identity_base);
575         if (ret < 0) {
576             return ret;
577         }
578     }
579 #endif
580     /* Set TSS base one page after EPT identity map. */
581     ret = kvm_vm_ioctl(s, KVM_SET_TSS_ADDR, identity_base + 0x1000);
582     if (ret < 0) {
583         return ret;
584     }
585 
586     /* Tell fw_cfg to notify the BIOS to reserve the range. */
587     ret = e820_add_entry(identity_base, 0x4000, E820_RESERVED);
588     if (ret < 0) {
589         fprintf(stderr, "e820_add_entry() table is full\n");
590         return ret;
591     }
592 
593     return 0;
594 }
595 
596 #endif
597 
set_v8086_seg(struct kvm_segment * lhs,const SegmentCache * rhs)598 static void set_v8086_seg(struct kvm_segment *lhs, const SegmentCache *rhs)
599 {
600     lhs->selector = rhs->selector;
601     lhs->base = rhs->base;
602     lhs->limit = rhs->limit;
603     lhs->type = 3;
604     lhs->present = 1;
605     lhs->dpl = 3;
606     lhs->db = 0;
607     lhs->s = 1;
608     lhs->l = 0;
609     lhs->g = 0;
610     lhs->avl = 0;
611     lhs->unusable = 0;
612 }
613 
set_seg(struct kvm_segment * lhs,const SegmentCache * rhs)614 static void set_seg(struct kvm_segment *lhs, const SegmentCache *rhs)
615 {
616     unsigned flags = rhs->flags;
617     lhs->selector = rhs->selector;
618     lhs->base = rhs->base;
619     lhs->limit = rhs->limit;
620     lhs->type = (flags >> DESC_TYPE_SHIFT) & 15;
621     lhs->present = (flags & DESC_P_MASK) != 0;
622     lhs->dpl = (flags >> DESC_DPL_SHIFT) & 3;
623     lhs->db = (flags >> DESC_B_SHIFT) & 1;
624     lhs->s = (flags & DESC_S_MASK) != 0;
625     lhs->l = (flags >> DESC_L_SHIFT) & 1;
626     lhs->g = (flags & DESC_G_MASK) != 0;
627     lhs->avl = (flags & DESC_AVL_MASK) != 0;
628     lhs->unusable = 0;
629 }
630 
get_seg(SegmentCache * lhs,const struct kvm_segment * rhs)631 static void get_seg(SegmentCache *lhs, const struct kvm_segment *rhs)
632 {
633     lhs->selector = rhs->selector;
634     lhs->base = rhs->base;
635     lhs->limit = rhs->limit;
636     lhs->flags = (rhs->type << DESC_TYPE_SHIFT) |
637                  (rhs->present * DESC_P_MASK) |
638                  (rhs->dpl << DESC_DPL_SHIFT) |
639                  (rhs->db << DESC_B_SHIFT) |
640                  (rhs->s * DESC_S_MASK) |
641                  (rhs->l << DESC_L_SHIFT) |
642                  (rhs->g * DESC_G_MASK) |
643                  (rhs->avl * DESC_AVL_MASK);
644 }
645 
646 
kvm_getput_reg(__u64 * kvm_reg,target_ulong * qemu_reg,int set)647 static void kvm_getput_reg(__u64 *kvm_reg, target_ulong *qemu_reg, int set)
648 {
649     if (set) {
650         *kvm_reg = *qemu_reg;
651     } else {
652         *qemu_reg = *kvm_reg;
653     }
654 }
655 
kvm_getput_regs(CPUState * env,int set)656 static int kvm_getput_regs(CPUState *env, int set)
657 {
658     struct kvm_regs regs;
659     int ret = 0;
660 
661     if (!set) {
662         ret = kvm_vcpu_ioctl(env, KVM_GET_REGS, &regs);
663         if (ret < 0) {
664             return ret;
665         }
666     }
667 
668     kvm_getput_reg(&regs.rax, &env->regs[R_EAX], set);
669     kvm_getput_reg(&regs.rbx, &env->regs[R_EBX], set);
670     kvm_getput_reg(&regs.rcx, &env->regs[R_ECX], set);
671     kvm_getput_reg(&regs.rdx, &env->regs[R_EDX], set);
672     kvm_getput_reg(&regs.rsi, &env->regs[R_ESI], set);
673     kvm_getput_reg(&regs.rdi, &env->regs[R_EDI], set);
674     kvm_getput_reg(&regs.rsp, &env->regs[R_ESP], set);
675     kvm_getput_reg(&regs.rbp, &env->regs[R_EBP], set);
676 #ifdef TARGET_X86_64
677     kvm_getput_reg(&regs.r8, &env->regs[8], set);
678     kvm_getput_reg(&regs.r9, &env->regs[9], set);
679     kvm_getput_reg(&regs.r10, &env->regs[10], set);
680     kvm_getput_reg(&regs.r11, &env->regs[11], set);
681     kvm_getput_reg(&regs.r12, &env->regs[12], set);
682     kvm_getput_reg(&regs.r13, &env->regs[13], set);
683     kvm_getput_reg(&regs.r14, &env->regs[14], set);
684     kvm_getput_reg(&regs.r15, &env->regs[15], set);
685 #endif
686 
687     kvm_getput_reg(&regs.rflags, &env->eflags, set);
688     kvm_getput_reg(&regs.rip, &env->eip, set);
689 
690     if (set) {
691         ret = kvm_vcpu_ioctl(env, KVM_SET_REGS, &regs);
692     }
693 
694     return ret;
695 }
696 
kvm_put_fpu(CPUState * env)697 static int kvm_put_fpu(CPUState *env)
698 {
699     struct kvm_fpu fpu;
700     int i;
701 
702     memset(&fpu, 0, sizeof fpu);
703     fpu.fsw = env->fpus & ~(7 << 11);
704     fpu.fsw |= (env->fpstt & 7) << 11;
705     fpu.fcw = env->fpuc;
706     for (i = 0; i < 8; ++i) {
707         fpu.ftwx |= (!env->fptags[i]) << i;
708     }
709     memcpy(fpu.fpr, env->fpregs, sizeof env->fpregs);
710     memcpy(fpu.xmm, env->xmm_regs, sizeof env->xmm_regs);
711     fpu.mxcsr = env->mxcsr;
712 
713     return kvm_vcpu_ioctl(env, KVM_SET_FPU, &fpu);
714 }
715 
716 #ifdef KVM_CAP_XSAVE
717 #define XSAVE_CWD_RIP     2
718 #define XSAVE_CWD_RDP     4
719 #define XSAVE_MXCSR       6
720 #define XSAVE_ST_SPACE    8
721 #define XSAVE_XMM_SPACE   40
722 #define XSAVE_XSTATE_BV   128
723 #define XSAVE_YMMH_SPACE  144
724 #endif
725 
kvm_put_xsave(CPUState * env)726 static int kvm_put_xsave(CPUState *env)
727 {
728 #ifdef KVM_CAP_XSAVE
729     int i, r;
730     struct kvm_xsave* xsave;
731     uint16_t cwd, swd, twd, fop;
732 
733     if (!kvm_has_xsave()) {
734         return kvm_put_fpu(env);
735     }
736 
737     xsave = qemu_memalign(4096, sizeof(struct kvm_xsave));
738     memset(xsave, 0, sizeof(struct kvm_xsave));
739     cwd = swd = twd = fop = 0;
740     swd = env->fpus & ~(7 << 11);
741     swd |= (env->fpstt & 7) << 11;
742     cwd = env->fpuc;
743     for (i = 0; i < 8; ++i) {
744         twd |= (!env->fptags[i]) << i;
745     }
746     xsave->region[0] = (uint32_t)(swd << 16) + cwd;
747     xsave->region[1] = (uint32_t)(fop << 16) + twd;
748     memcpy(&xsave->region[XSAVE_ST_SPACE], env->fpregs,
749             sizeof env->fpregs);
750     memcpy(&xsave->region[XSAVE_XMM_SPACE], env->xmm_regs,
751             sizeof env->xmm_regs);
752     xsave->region[XSAVE_MXCSR] = env->mxcsr;
753     *(uint64_t *)&xsave->region[XSAVE_XSTATE_BV] = env->xstate_bv;
754     memcpy(&xsave->region[XSAVE_YMMH_SPACE], env->ymmh_regs,
755             sizeof env->ymmh_regs);
756     r = kvm_vcpu_ioctl(env, KVM_SET_XSAVE, xsave);
757     qemu_free(xsave);
758     return r;
759 #else
760     return kvm_put_fpu(env);
761 #endif
762 }
763 
kvm_put_xcrs(CPUState * env)764 static int kvm_put_xcrs(CPUState *env)
765 {
766 #ifdef KVM_CAP_XCRS
767     struct kvm_xcrs xcrs;
768 
769     if (!kvm_has_xcrs()) {
770         return 0;
771     }
772 
773     xcrs.nr_xcrs = 1;
774     xcrs.flags = 0;
775     xcrs.xcrs[0].xcr = 0;
776     xcrs.xcrs[0].value = env->xcr0;
777     return kvm_vcpu_ioctl(env, KVM_SET_XCRS, &xcrs);
778 #else
779     return 0;
780 #endif
781 }
782 
kvm_put_sregs(CPUState * env)783 static int kvm_put_sregs(CPUState *env)
784 {
785     struct kvm_sregs sregs;
786 
787     memset(sregs.interrupt_bitmap, 0, sizeof(sregs.interrupt_bitmap));
788     if (env->interrupt_injected >= 0) {
789         sregs.interrupt_bitmap[env->interrupt_injected / 64] |=
790                 (uint64_t)1 << (env->interrupt_injected % 64);
791     }
792 
793     if ((env->eflags & VM_MASK)) {
794         set_v8086_seg(&sregs.cs, &env->segs[R_CS]);
795         set_v8086_seg(&sregs.ds, &env->segs[R_DS]);
796         set_v8086_seg(&sregs.es, &env->segs[R_ES]);
797         set_v8086_seg(&sregs.fs, &env->segs[R_FS]);
798         set_v8086_seg(&sregs.gs, &env->segs[R_GS]);
799         set_v8086_seg(&sregs.ss, &env->segs[R_SS]);
800     } else {
801         set_seg(&sregs.cs, &env->segs[R_CS]);
802         set_seg(&sregs.ds, &env->segs[R_DS]);
803         set_seg(&sregs.es, &env->segs[R_ES]);
804         set_seg(&sregs.fs, &env->segs[R_FS]);
805         set_seg(&sregs.gs, &env->segs[R_GS]);
806         set_seg(&sregs.ss, &env->segs[R_SS]);
807     }
808 
809     set_seg(&sregs.tr, &env->tr);
810     set_seg(&sregs.ldt, &env->ldt);
811 
812     sregs.idt.limit = env->idt.limit;
813     sregs.idt.base = env->idt.base;
814     sregs.gdt.limit = env->gdt.limit;
815     sregs.gdt.base = env->gdt.base;
816 
817     sregs.cr0 = env->cr[0];
818     sregs.cr2 = env->cr[2];
819     sregs.cr3 = env->cr[3];
820     sregs.cr4 = env->cr[4];
821 
822     sregs.cr8 = cpu_get_apic_tpr(env->apic_state);
823     sregs.apic_base = cpu_get_apic_base(env->apic_state);
824 
825     sregs.efer = env->efer;
826 
827     return kvm_vcpu_ioctl(env, KVM_SET_SREGS, &sregs);
828 }
829 
kvm_msr_entry_set(struct kvm_msr_entry * entry,uint32_t index,uint64_t value)830 static void kvm_msr_entry_set(struct kvm_msr_entry *entry,
831                               uint32_t index, uint64_t value)
832 {
833     entry->index = index;
834     entry->data = value;
835 }
836 
kvm_put_msrs(CPUState * env,int level)837 static int kvm_put_msrs(CPUState *env, int level)
838 {
839     struct {
840         struct kvm_msrs info;
841         struct kvm_msr_entry entries[100];
842     } msr_data;
843     struct kvm_msr_entry *msrs = msr_data.entries;
844     int n = 0;
845 
846     kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_CS, env->sysenter_cs);
847     kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_ESP, env->sysenter_esp);
848     kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_EIP, env->sysenter_eip);
849     if (has_msr_star) {
850         kvm_msr_entry_set(&msrs[n++], MSR_STAR, env->star);
851     }
852     if (has_msr_hsave_pa) {
853         kvm_msr_entry_set(&msrs[n++], MSR_VM_HSAVE_PA, env->vm_hsave);
854     }
855 #ifdef TARGET_X86_64
856     if (lm_capable_kernel) {
857         kvm_msr_entry_set(&msrs[n++], MSR_CSTAR, env->cstar);
858         kvm_msr_entry_set(&msrs[n++], MSR_KERNELGSBASE, env->kernelgsbase);
859         kvm_msr_entry_set(&msrs[n++], MSR_FMASK, env->fmask);
860         kvm_msr_entry_set(&msrs[n++], MSR_LSTAR, env->lstar);
861     }
862 #endif
863     if (level == KVM_PUT_FULL_STATE) {
864         /*
865          * KVM is yet unable to synchronize TSC values of multiple VCPUs on
866          * writeback. Until this is fixed, we only write the offset to SMP
867          * guests after migration, desynchronizing the VCPUs, but avoiding
868          * huge jump-backs that would occur without any writeback at all.
869          */
870         if (smp_cpus == 1 || env->tsc != 0) {
871             kvm_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc);
872         }
873     }
874     /*
875      * The following paravirtual MSRs have side effects on the guest or are
876      * too heavy for normal writeback. Limit them to reset or full state
877      * updates.
878      */
879     if (level >= KVM_PUT_RESET_STATE) {
880         kvm_msr_entry_set(&msrs[n++], MSR_KVM_SYSTEM_TIME,
881                           env->system_time_msr);
882         kvm_msr_entry_set(&msrs[n++], MSR_KVM_WALL_CLOCK, env->wall_clock_msr);
883 #if defined(CONFIG_KVM_PARA) && defined(KVM_CAP_ASYNC_PF)
884         if (has_msr_async_pf_en) {
885             kvm_msr_entry_set(&msrs[n++], MSR_KVM_ASYNC_PF_EN,
886                               env->async_pf_en_msr);
887         }
888 #endif
889     }
890 #ifdef KVM_CAP_MCE
891     if (env->mcg_cap) {
892         int i;
893 
894         if (level == KVM_PUT_RESET_STATE) {
895             kvm_msr_entry_set(&msrs[n++], MSR_MCG_STATUS, env->mcg_status);
896         } else if (level == KVM_PUT_FULL_STATE) {
897             kvm_msr_entry_set(&msrs[n++], MSR_MCG_STATUS, env->mcg_status);
898             kvm_msr_entry_set(&msrs[n++], MSR_MCG_CTL, env->mcg_ctl);
899             for (i = 0; i < (env->mcg_cap & 0xff) * 4; i++) {
900                 kvm_msr_entry_set(&msrs[n++], MSR_MC0_CTL + i, env->mce_banks[i]);
901             }
902         }
903     }
904 #endif
905 
906     msr_data.info.nmsrs = n;
907 
908     return kvm_vcpu_ioctl(env, KVM_SET_MSRS, &msr_data);
909 
910 }
911 
kvm_get_fpu(CPUState * env)912 static int kvm_get_fpu(CPUState *env)
913 {
914     struct kvm_fpu fpu;
915     int i, ret;
916 
917     ret = kvm_vcpu_ioctl(env, KVM_GET_FPU, &fpu);
918     if (ret < 0) {
919         return ret;
920     }
921 
922     env->fpstt = (fpu.fsw >> 11) & 7;
923     env->fpus = fpu.fsw;
924     env->fpuc = fpu.fcw;
925     for (i = 0; i < 8; ++i) {
926         env->fptags[i] = !((fpu.ftwx >> i) & 1);
927     }
928     memcpy(env->fpregs, fpu.fpr, sizeof env->fpregs);
929     memcpy(env->xmm_regs, fpu.xmm, sizeof env->xmm_regs);
930     env->mxcsr = fpu.mxcsr;
931 
932     return 0;
933 }
934 
kvm_get_xsave(CPUState * env)935 static int kvm_get_xsave(CPUState *env)
936 {
937 #ifdef KVM_CAP_XSAVE
938     struct kvm_xsave* xsave;
939     int ret, i;
940     uint16_t cwd, swd, twd, fop;
941 
942     if (!kvm_has_xsave()) {
943         return kvm_get_fpu(env);
944     }
945 
946     xsave = qemu_memalign(4096, sizeof(struct kvm_xsave));
947     ret = kvm_vcpu_ioctl(env, KVM_GET_XSAVE, xsave);
948     if (ret < 0) {
949         qemu_free(xsave);
950         return ret;
951     }
952 
953     cwd = (uint16_t)xsave->region[0];
954     swd = (uint16_t)(xsave->region[0] >> 16);
955     twd = (uint16_t)xsave->region[1];
956     fop = (uint16_t)(xsave->region[1] >> 16);
957     env->fpstt = (swd >> 11) & 7;
958     env->fpus = swd;
959     env->fpuc = cwd;
960     for (i = 0; i < 8; ++i) {
961         env->fptags[i] = !((twd >> i) & 1);
962     }
963     env->mxcsr = xsave->region[XSAVE_MXCSR];
964     memcpy(env->fpregs, &xsave->region[XSAVE_ST_SPACE],
965             sizeof env->fpregs);
966     memcpy(env->xmm_regs, &xsave->region[XSAVE_XMM_SPACE],
967             sizeof env->xmm_regs);
968     env->xstate_bv = *(uint64_t *)&xsave->region[XSAVE_XSTATE_BV];
969     memcpy(env->ymmh_regs, &xsave->region[XSAVE_YMMH_SPACE],
970             sizeof env->ymmh_regs);
971     qemu_free(xsave);
972     return 0;
973 #else
974     return kvm_get_fpu(env);
975 #endif
976 }
977 
kvm_get_xcrs(CPUState * env)978 static int kvm_get_xcrs(CPUState *env)
979 {
980 #ifdef KVM_CAP_XCRS
981     int i, ret;
982     struct kvm_xcrs xcrs;
983 
984     if (!kvm_has_xcrs()) {
985         return 0;
986     }
987 
988     ret = kvm_vcpu_ioctl(env, KVM_GET_XCRS, &xcrs);
989     if (ret < 0) {
990         return ret;
991     }
992 
993     for (i = 0; i < xcrs.nr_xcrs; i++) {
994         /* Only support xcr0 now */
995         if (xcrs.xcrs[0].xcr == 0) {
996             env->xcr0 = xcrs.xcrs[0].value;
997             break;
998         }
999     }
1000     return 0;
1001 #else
1002     return 0;
1003 #endif
1004 }
1005 
kvm_get_sregs(CPUState * env)1006 static int kvm_get_sregs(CPUState *env)
1007 {
1008     struct kvm_sregs sregs;
1009     uint32_t hflags;
1010     int bit, i, ret;
1011 
1012     ret = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs);
1013     if (ret < 0) {
1014         return ret;
1015     }
1016 
1017     /* There can only be one pending IRQ set in the bitmap at a time, so try
1018        to find it and save its number instead (-1 for none). */
1019     env->interrupt_injected = -1;
1020     for (i = 0; i < ARRAY_SIZE(sregs.interrupt_bitmap); i++) {
1021         if (sregs.interrupt_bitmap[i]) {
1022             bit = ctz64(sregs.interrupt_bitmap[i]);
1023             env->interrupt_injected = i * 64 + bit;
1024             break;
1025         }
1026     }
1027 
1028     get_seg(&env->segs[R_CS], &sregs.cs);
1029     get_seg(&env->segs[R_DS], &sregs.ds);
1030     get_seg(&env->segs[R_ES], &sregs.es);
1031     get_seg(&env->segs[R_FS], &sregs.fs);
1032     get_seg(&env->segs[R_GS], &sregs.gs);
1033     get_seg(&env->segs[R_SS], &sregs.ss);
1034 
1035     get_seg(&env->tr, &sregs.tr);
1036     get_seg(&env->ldt, &sregs.ldt);
1037 
1038     env->idt.limit = sregs.idt.limit;
1039     env->idt.base = sregs.idt.base;
1040     env->gdt.limit = sregs.gdt.limit;
1041     env->gdt.base = sregs.gdt.base;
1042 
1043     env->cr[0] = sregs.cr0;
1044     env->cr[2] = sregs.cr2;
1045     env->cr[3] = sregs.cr3;
1046     env->cr[4] = sregs.cr4;
1047 
1048     cpu_set_apic_base(env->apic_state, sregs.apic_base);
1049 
1050     env->efer = sregs.efer;
1051     //cpu_set_apic_tpr(env->apic_state, sregs.cr8);
1052 
1053 #define HFLAG_COPY_MASK \
1054     ~( HF_CPL_MASK | HF_PE_MASK | HF_MP_MASK | HF_EM_MASK | \
1055        HF_TS_MASK | HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK | \
1056        HF_OSFXSR_MASK | HF_LMA_MASK | HF_CS32_MASK | \
1057        HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK)
1058 
1059     hflags = (env->segs[R_CS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK;
1060     hflags |= (env->cr[0] & CR0_PE_MASK) << (HF_PE_SHIFT - CR0_PE_SHIFT);
1061     hflags |= (env->cr[0] << (HF_MP_SHIFT - CR0_MP_SHIFT)) &
1062                 (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK);
1063     hflags |= (env->eflags & (HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK));
1064     hflags |= (env->cr[4] & CR4_OSFXSR_MASK) <<
1065                 (HF_OSFXSR_SHIFT - CR4_OSFXSR_SHIFT);
1066 
1067     if (env->efer & MSR_EFER_LMA) {
1068         hflags |= HF_LMA_MASK;
1069     }
1070 
1071     if ((hflags & HF_LMA_MASK) && (env->segs[R_CS].flags & DESC_L_MASK)) {
1072         hflags |= HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK;
1073     } else {
1074         hflags |= (env->segs[R_CS].flags & DESC_B_MASK) >>
1075                     (DESC_B_SHIFT - HF_CS32_SHIFT);
1076         hflags |= (env->segs[R_SS].flags & DESC_B_MASK) >>
1077                     (DESC_B_SHIFT - HF_SS32_SHIFT);
1078         if (!(env->cr[0] & CR0_PE_MASK) || (env->eflags & VM_MASK) ||
1079             !(hflags & HF_CS32_MASK)) {
1080             hflags |= HF_ADDSEG_MASK;
1081         } else {
1082             hflags |= ((env->segs[R_DS].base | env->segs[R_ES].base |
1083                         env->segs[R_SS].base) != 0) << HF_ADDSEG_SHIFT;
1084         }
1085     }
1086     env->hflags = (env->hflags & HFLAG_COPY_MASK) | hflags;
1087 
1088     return 0;
1089 }
1090 
kvm_get_msrs(CPUState * env)1091 static int kvm_get_msrs(CPUState *env)
1092 {
1093     struct {
1094         struct kvm_msrs info;
1095         struct kvm_msr_entry entries[100];
1096     } msr_data;
1097     struct kvm_msr_entry *msrs = msr_data.entries;
1098     int ret, i, n;
1099 
1100     n = 0;
1101     msrs[n++].index = MSR_IA32_SYSENTER_CS;
1102     msrs[n++].index = MSR_IA32_SYSENTER_ESP;
1103     msrs[n++].index = MSR_IA32_SYSENTER_EIP;
1104     if (has_msr_star) {
1105         msrs[n++].index = MSR_STAR;
1106     }
1107     if (has_msr_hsave_pa) {
1108         msrs[n++].index = MSR_VM_HSAVE_PA;
1109     }
1110     msrs[n++].index = MSR_IA32_TSC;
1111 #ifdef TARGET_X86_64
1112     if (lm_capable_kernel) {
1113         msrs[n++].index = MSR_CSTAR;
1114         msrs[n++].index = MSR_KERNELGSBASE;
1115         msrs[n++].index = MSR_FMASK;
1116         msrs[n++].index = MSR_LSTAR;
1117     }
1118 #endif
1119     msrs[n++].index = MSR_KVM_SYSTEM_TIME;
1120     msrs[n++].index = MSR_KVM_WALL_CLOCK;
1121 #if defined(CONFIG_KVM_PARA) && defined(KVM_CAP_ASYNC_PF)
1122     if (has_msr_async_pf_en) {
1123         msrs[n++].index = MSR_KVM_ASYNC_PF_EN;
1124     }
1125 #endif
1126 
1127 #ifdef KVM_CAP_MCE
1128     if (env->mcg_cap) {
1129         msrs[n++].index = MSR_MCG_STATUS;
1130         msrs[n++].index = MSR_MCG_CTL;
1131         for (i = 0; i < (env->mcg_cap & 0xff) * 4; i++) {
1132             msrs[n++].index = MSR_MC0_CTL + i;
1133         }
1134     }
1135 #endif
1136 
1137     msr_data.info.nmsrs = n;
1138     ret = kvm_vcpu_ioctl(env, KVM_GET_MSRS, &msr_data);
1139     if (ret < 0) {
1140         return ret;
1141     }
1142 
1143     for (i = 0; i < ret; i++) {
1144         switch (msrs[i].index) {
1145         case MSR_IA32_SYSENTER_CS:
1146             env->sysenter_cs = msrs[i].data;
1147             break;
1148         case MSR_IA32_SYSENTER_ESP:
1149             env->sysenter_esp = msrs[i].data;
1150             break;
1151         case MSR_IA32_SYSENTER_EIP:
1152             env->sysenter_eip = msrs[i].data;
1153             break;
1154         case MSR_STAR:
1155             env->star = msrs[i].data;
1156             break;
1157 #ifdef TARGET_X86_64
1158         case MSR_CSTAR:
1159             env->cstar = msrs[i].data;
1160             break;
1161         case MSR_KERNELGSBASE:
1162             env->kernelgsbase = msrs[i].data;
1163             break;
1164         case MSR_FMASK:
1165             env->fmask = msrs[i].data;
1166             break;
1167         case MSR_LSTAR:
1168             env->lstar = msrs[i].data;
1169             break;
1170 #endif
1171         case MSR_IA32_TSC:
1172             env->tsc = msrs[i].data;
1173             break;
1174         case MSR_VM_HSAVE_PA:
1175             env->vm_hsave = msrs[i].data;
1176             break;
1177         case MSR_KVM_SYSTEM_TIME:
1178             env->system_time_msr = msrs[i].data;
1179             break;
1180         case MSR_KVM_WALL_CLOCK:
1181             env->wall_clock_msr = msrs[i].data;
1182             break;
1183 #ifdef KVM_CAP_MCE
1184         case MSR_MCG_STATUS:
1185             env->mcg_status = msrs[i].data;
1186             break;
1187         case MSR_MCG_CTL:
1188             env->mcg_ctl = msrs[i].data;
1189             break;
1190 #endif
1191         default:
1192 #ifdef KVM_CAP_MCE
1193             if (msrs[i].index >= MSR_MC0_CTL &&
1194                 msrs[i].index < MSR_MC0_CTL + (env->mcg_cap & 0xff) * 4) {
1195                 env->mce_banks[msrs[i].index - MSR_MC0_CTL] = msrs[i].data;
1196             }
1197 #endif
1198             break;
1199 #if defined(CONFIG_KVM_PARA) && defined(KVM_CAP_ASYNC_PF)
1200         case MSR_KVM_ASYNC_PF_EN:
1201             env->async_pf_en_msr = msrs[i].data;
1202             break;
1203 #endif
1204         }
1205     }
1206 
1207     return 0;
1208 }
1209 
1210 #ifdef OBSOLETE_KVM_IMPL
kvm_put_mp_state(CPUState * env)1211 static int kvm_put_mp_state(CPUState *env)
1212 {
1213     struct kvm_mp_state mp_state = { .mp_state = env->mp_state };
1214 
1215     return kvm_vcpu_ioctl(env, KVM_SET_MP_STATE, &mp_state);
1216 }
1217 
kvm_get_mp_state(CPUState * env)1218 static int kvm_get_mp_state(CPUState *env)
1219 {
1220     struct kvm_mp_state mp_state;
1221     int ret;
1222 
1223     ret = kvm_vcpu_ioctl(env, KVM_GET_MP_STATE, &mp_state);
1224     if (ret < 0) {
1225         return ret;
1226     }
1227     env->mp_state = mp_state.mp_state;
1228     if (kvm_irqchip_in_kernel()) {
1229         env->halted = (mp_state.mp_state == KVM_MP_STATE_HALTED);
1230     }
1231     return 0;
1232 }
1233 #endif
1234 
kvm_put_vcpu_events(CPUState * env,int level)1235 static int kvm_put_vcpu_events(CPUState *env, int level)
1236 {
1237 #ifdef KVM_CAP_VCPU_EVENTS
1238     struct kvm_vcpu_events events;
1239 
1240     if (!kvm_has_vcpu_events()) {
1241         return 0;
1242     }
1243 
1244     events.exception.injected = (env->exception_injected >= 0);
1245     events.exception.nr = env->exception_injected;
1246     events.exception.has_error_code = env->has_error_code;
1247     events.exception.error_code = env->error_code;
1248 
1249     events.interrupt.injected = (env->interrupt_injected >= 0);
1250     events.interrupt.nr = env->interrupt_injected;
1251     events.interrupt.soft = env->soft_interrupt;
1252 
1253     events.nmi.injected = env->nmi_injected;
1254     events.nmi.pending = env->nmi_pending;
1255     events.nmi.masked = !!(env->hflags2 & HF2_NMI_MASK);
1256 
1257     events.sipi_vector = env->sipi_vector;
1258 
1259     events.flags = 0;
1260     if (level >= KVM_PUT_RESET_STATE) {
1261         events.flags |=
1262             KVM_VCPUEVENT_VALID_NMI_PENDING | KVM_VCPUEVENT_VALID_SIPI_VECTOR;
1263     }
1264 
1265     return kvm_vcpu_ioctl(env, KVM_SET_VCPU_EVENTS, &events);
1266 #else
1267     return 0;
1268 #endif
1269 }
1270 
kvm_get_vcpu_events(CPUState * env)1271 static int kvm_get_vcpu_events(CPUState *env)
1272 {
1273 #ifdef KVM_CAP_VCPU_EVENTS
1274     struct kvm_vcpu_events events;
1275     int ret;
1276 
1277     if (!kvm_has_vcpu_events()) {
1278         return 0;
1279     }
1280 
1281     ret = kvm_vcpu_ioctl(env, KVM_GET_VCPU_EVENTS, &events);
1282     if (ret < 0) {
1283        return ret;
1284     }
1285     env->exception_injected =
1286        events.exception.injected ? events.exception.nr : -1;
1287     env->has_error_code = events.exception.has_error_code;
1288     env->error_code = events.exception.error_code;
1289 
1290     env->interrupt_injected =
1291         events.interrupt.injected ? events.interrupt.nr : -1;
1292     env->soft_interrupt = events.interrupt.soft;
1293 
1294     env->nmi_injected = events.nmi.injected;
1295     env->nmi_pending = events.nmi.pending;
1296     if (events.nmi.masked) {
1297         env->hflags2 |= HF2_NMI_MASK;
1298     } else {
1299         env->hflags2 &= ~HF2_NMI_MASK;
1300     }
1301 
1302     env->sipi_vector = events.sipi_vector;
1303 #endif
1304 
1305     return 0;
1306 }
1307 
kvm_guest_debug_workarounds(CPUState * env)1308 static int kvm_guest_debug_workarounds(CPUState *env)
1309 {
1310     int ret = 0;
1311 #ifdef KVM_CAP_SET_GUEST_DEBUG
1312     unsigned long reinject_trap = 0;
1313 
1314     if (!kvm_has_vcpu_events()) {
1315         if (env->exception_injected == 1) {
1316             reinject_trap = KVM_GUESTDBG_INJECT_DB;
1317         } else if (env->exception_injected == 3) {
1318             reinject_trap = KVM_GUESTDBG_INJECT_BP;
1319         }
1320         env->exception_injected = -1;
1321     }
1322 
1323     /*
1324      * Kernels before KVM_CAP_X86_ROBUST_SINGLESTEP overwrote flags.TF
1325      * injected via SET_GUEST_DEBUG while updating GP regs. Work around this
1326      * by updating the debug state once again if single-stepping is on.
1327      * Another reason to call kvm_update_guest_debug here is a pending debug
1328      * trap raise by the guest. On kernels without SET_VCPU_EVENTS we have to
1329      * reinject them via SET_GUEST_DEBUG.
1330      */
1331     if (reinject_trap ||
1332         (!kvm_has_robust_singlestep() && env->singlestep_enabled)) {
1333         ret = kvm_update_guest_debug(env, reinject_trap);
1334     }
1335 #endif /* KVM_CAP_SET_GUEST_DEBUG */
1336     return ret;
1337 }
1338 
kvm_put_debugregs(CPUState * env)1339 static int kvm_put_debugregs(CPUState *env)
1340 {
1341 #ifdef KVM_CAP_DEBUGREGS
1342     struct kvm_debugregs dbgregs;
1343     int i;
1344 
1345     if (!kvm_has_debugregs()) {
1346         return 0;
1347     }
1348 
1349     for (i = 0; i < 4; i++) {
1350         dbgregs.db[i] = env->dr[i];
1351     }
1352     dbgregs.dr6 = env->dr[6];
1353     dbgregs.dr7 = env->dr[7];
1354     dbgregs.flags = 0;
1355 
1356     return kvm_vcpu_ioctl(env, KVM_SET_DEBUGREGS, &dbgregs);
1357 #else
1358     return 0;
1359 #endif
1360 }
1361 
kvm_get_debugregs(CPUState * env)1362 static int kvm_get_debugregs(CPUState *env)
1363 {
1364 #ifdef KVM_CAP_DEBUGREGS
1365     struct kvm_debugregs dbgregs;
1366     int i, ret;
1367 
1368     if (!kvm_has_debugregs()) {
1369         return 0;
1370     }
1371 
1372     ret = kvm_vcpu_ioctl(env, KVM_GET_DEBUGREGS, &dbgregs);
1373     if (ret < 0) {
1374         return ret;
1375     }
1376     for (i = 0; i < 4; i++) {
1377         env->dr[i] = dbgregs.db[i];
1378     }
1379     env->dr[4] = env->dr[6] = dbgregs.dr6;
1380     env->dr[5] = env->dr[7] = dbgregs.dr7;
1381 #endif
1382 
1383     return 0;
1384 }
1385 
1386 #ifdef OBSOLETE_KVM_IMPL
kvm_arch_put_registers(CPUState * env,int level)1387 int kvm_arch_put_registers(CPUState *env, int level)
1388 {
1389     int ret;
1390 
1391     assert(cpu_is_stopped(env) || qemu_cpu_self(env));
1392 
1393     ret = kvm_getput_regs(env, 1);
1394     if (ret < 0) {
1395         return ret;
1396     }
1397     ret = kvm_put_xsave(env);
1398     if (ret < 0) {
1399         return ret;
1400     }
1401     ret = kvm_put_xcrs(env);
1402     if (ret < 0) {
1403         return ret;
1404     }
1405     ret = kvm_put_sregs(env);
1406     if (ret < 0) {
1407         return ret;
1408     }
1409     ret = kvm_put_msrs(env, level);
1410     if (ret < 0) {
1411         return ret;
1412     }
1413     if (level >= KVM_PUT_RESET_STATE) {
1414         ret = kvm_put_mp_state(env);
1415         if (ret < 0) {
1416             return ret;
1417         }
1418     }
1419     ret = kvm_put_vcpu_events(env, level);
1420     if (ret < 0) {
1421         return ret;
1422     }
1423     ret = kvm_put_debugregs(env);
1424     if (ret < 0) {
1425         return ret;
1426     }
1427     /* must be last */
1428     ret = kvm_guest_debug_workarounds(env);
1429     if (ret < 0) {
1430         return ret;
1431     }
1432     return 0;
1433 }
1434 
kvm_arch_get_registers(CPUState * env)1435 int kvm_arch_get_registers(CPUState *env)
1436 {
1437     int ret;
1438 
1439     assert(cpu_is_stopped(env) || qemu_cpu_self(env));
1440 
1441     ret = kvm_getput_regs(env, 0);
1442     if (ret < 0) {
1443         return ret;
1444     }
1445     ret = kvm_get_xsave(env);
1446     if (ret < 0) {
1447         return ret;
1448     }
1449     ret = kvm_get_xcrs(env);
1450     if (ret < 0) {
1451         return ret;
1452     }
1453     ret = kvm_get_sregs(env);
1454     if (ret < 0) {
1455         return ret;
1456     }
1457     ret = kvm_get_msrs(env);
1458     if (ret < 0) {
1459         return ret;
1460     }
1461     ret = kvm_get_mp_state(env);
1462     if (ret < 0) {
1463         return ret;
1464     }
1465     ret = kvm_get_vcpu_events(env);
1466     if (ret < 0) {
1467         return ret;
1468     }
1469     ret = kvm_get_debugregs(env);
1470     if (ret < 0) {
1471         return ret;
1472     }
1473     return 0;
1474 }
1475 
kvm_arch_pre_run(CPUState * env,struct kvm_run * run)1476 int kvm_arch_pre_run(CPUState *env, struct kvm_run *run)
1477 {
1478     /* Inject NMI */
1479     if (env->interrupt_request & CPU_INTERRUPT_NMI) {
1480         env->interrupt_request &= ~CPU_INTERRUPT_NMI;
1481         DPRINTF("injected NMI\n");
1482         kvm_vcpu_ioctl(env, KVM_NMI);
1483     }
1484 
1485     /* Try to inject an interrupt if the guest can accept it */
1486     if (run->ready_for_interrupt_injection &&
1487         (env->interrupt_request & CPU_INTERRUPT_HARD) &&
1488         (env->eflags & IF_MASK)) {
1489         int irq;
1490 
1491         env->interrupt_request &= ~CPU_INTERRUPT_HARD;
1492         irq = cpu_get_pic_interrupt(env);
1493         if (irq >= 0) {
1494             struct kvm_interrupt intr;
1495             intr.irq = irq;
1496             /* FIXME: errors */
1497             DPRINTF("injected interrupt %d\n", irq);
1498             kvm_vcpu_ioctl(env, KVM_INTERRUPT, &intr);
1499         }
1500     }
1501 
1502     /* If we have an interrupt but the guest is not ready to receive an
1503      * interrupt, request an interrupt window exit.  This will
1504      * cause a return to userspace as soon as the guest is ready to
1505      * receive interrupts. */
1506     if ((env->interrupt_request & CPU_INTERRUPT_HARD)) {
1507         run->request_interrupt_window = 1;
1508     } else {
1509         run->request_interrupt_window = 0;
1510     }
1511 
1512     DPRINTF("setting tpr\n");
1513     run->cr8 = cpu_get_apic_tpr(env->apic_state);
1514 
1515     return 0;
1516 }
1517 #endif
1518 
kvm_arch_post_run(CPUState * env,struct kvm_run * run)1519 int kvm_arch_post_run(CPUState *env, struct kvm_run *run)
1520 {
1521     if (run->if_flag) {
1522         env->eflags |= IF_MASK;
1523     } else {
1524         env->eflags &= ~IF_MASK;
1525     }
1526     cpu_set_apic_tpr(env->apic_state, run->cr8);
1527     cpu_set_apic_base(env->apic_state, run->apic_base);
1528 
1529     return 0;
1530 }
1531 
1532 #ifdef OBSOLETE_KVM_IMPL
1533 
kvm_arch_process_irqchip_events(CPUState * env)1534 int kvm_arch_process_irqchip_events(CPUState *env)
1535 {
1536     if (env->interrupt_request & CPU_INTERRUPT_INIT) {
1537         kvm_cpu_synchronize_state(env);
1538         do_cpu_init(env);
1539         env->exception_index = EXCP_HALTED;
1540     }
1541 
1542     if (env->interrupt_request & CPU_INTERRUPT_SIPI) {
1543         kvm_cpu_synchronize_state(env);
1544         do_cpu_sipi(env);
1545     }
1546 
1547     return env->halted;
1548 }
1549 
kvm_handle_halt(CPUState * env)1550 static int kvm_handle_halt(CPUState *env)
1551 {
1552     if (!((env->interrupt_request & CPU_INTERRUPT_HARD) &&
1553           (env->eflags & IF_MASK)) &&
1554         !(env->interrupt_request & CPU_INTERRUPT_NMI)) {
1555         env->halted = 1;
1556         env->exception_index = EXCP_HLT;
1557         return 0;
1558     }
1559 
1560     return 1;
1561 }
1562 
host_supports_vmx(void)1563 static bool host_supports_vmx(void)
1564 {
1565     uint32_t ecx, unused;
1566 
1567     host_cpuid(1, 0, &unused, &unused, &ecx, &unused);
1568     return ecx & CPUID_EXT_VMX;
1569 }
1570 
1571 #define VMX_INVALID_GUEST_STATE 0x80000021
1572 
kvm_arch_handle_exit(CPUState * env,struct kvm_run * run)1573 int kvm_arch_handle_exit(CPUState *env, struct kvm_run *run)
1574 {
1575     uint64_t code;
1576     int ret = 0;
1577 
1578     switch (run->exit_reason) {
1579     case KVM_EXIT_HLT:
1580         DPRINTF("handle_hlt\n");
1581         ret = kvm_handle_halt(env);
1582         break;
1583     case KVM_EXIT_SET_TPR:
1584         ret = 1;
1585         break;
1586     case KVM_EXIT_FAIL_ENTRY:
1587         code = run->fail_entry.hardware_entry_failure_reason;
1588         fprintf(stderr, "KVM: entry failed, hardware error 0x%" PRIx64 "\n",
1589                 code);
1590         if (host_supports_vmx() && code == VMX_INVALID_GUEST_STATE) {
1591             fprintf(stderr,
1592                     "\nIf you're runnning a guest on an Intel machine without "
1593                         "unrestricted mode\n"
1594                     "support, the failure can be most likely due to the guest "
1595                         "entering an invalid\n"
1596                     "state for Intel VT. For example, the guest maybe running "
1597                         "in big real mode\n"
1598                     "which is not supported on less recent Intel processors."
1599                         "\n\n");
1600         }
1601         ret = -1;
1602         break;
1603     case KVM_EXIT_EXCEPTION:
1604         fprintf(stderr, "KVM: exception %d exit (error code 0x%x)\n",
1605                 run->ex.exception, run->ex.error_code);
1606         ret = -1;
1607         break;
1608     default:
1609         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1610         ret = -1;
1611         break;
1612     }
1613 
1614     return ret;
1615 }
1616 #endif
1617 
1618 #ifdef KVM_CAP_SET_GUEST_DEBUG
kvm_arch_insert_sw_breakpoint(CPUState * env,struct kvm_sw_breakpoint * bp)1619 int kvm_arch_insert_sw_breakpoint(CPUState *env, struct kvm_sw_breakpoint *bp)
1620 {
1621     static const uint8_t int3 = 0xcc;
1622 
1623     if (cpu_memory_rw_debug(env, bp->pc, (uint8_t *)&bp->saved_insn, 1, 0) ||
1624         cpu_memory_rw_debug(env, bp->pc, (uint8_t *)&int3, 1, 1)) {
1625         return -EINVAL;
1626     }
1627     return 0;
1628 }
1629 
kvm_arch_remove_sw_breakpoint(CPUState * env,struct kvm_sw_breakpoint * bp)1630 int kvm_arch_remove_sw_breakpoint(CPUState *env, struct kvm_sw_breakpoint *bp)
1631 {
1632     uint8_t int3;
1633 
1634     if (cpu_memory_rw_debug(env, bp->pc, &int3, 1, 0) || int3 != 0xcc ||
1635         cpu_memory_rw_debug(env, bp->pc, (uint8_t *)&bp->saved_insn, 1, 1)) {
1636         return -EINVAL;
1637     }
1638     return 0;
1639 }
1640 
1641 static struct {
1642     target_ulong addr;
1643     int len;
1644     int type;
1645 } hw_breakpoint[4];
1646 
1647 static int nb_hw_breakpoint;
1648 
find_hw_breakpoint(target_ulong addr,int len,int type)1649 static int find_hw_breakpoint(target_ulong addr, int len, int type)
1650 {
1651     int n;
1652 
1653     for (n = 0; n < nb_hw_breakpoint; n++) {
1654         if (hw_breakpoint[n].addr == addr && hw_breakpoint[n].type == type &&
1655             (hw_breakpoint[n].len == len || len == -1)) {
1656             return n;
1657         }
1658     }
1659     return -1;
1660 }
1661 
kvm_arch_insert_hw_breakpoint(target_ulong addr,target_ulong len,int type)1662 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1663                                   target_ulong len, int type)
1664 {
1665     switch (type) {
1666     case GDB_BREAKPOINT_HW:
1667         len = 1;
1668         break;
1669     case GDB_WATCHPOINT_WRITE:
1670     case GDB_WATCHPOINT_ACCESS:
1671         switch (len) {
1672         case 1:
1673             break;
1674         case 2:
1675         case 4:
1676         case 8:
1677             if (addr & (len - 1)) {
1678                 return -EINVAL;
1679             }
1680             break;
1681         default:
1682             return -EINVAL;
1683         }
1684         break;
1685     default:
1686         return -ENOSYS;
1687     }
1688 
1689     if (nb_hw_breakpoint == 4) {
1690         return -ENOBUFS;
1691     }
1692     if (find_hw_breakpoint(addr, len, type) >= 0) {
1693         return -EEXIST;
1694     }
1695     hw_breakpoint[nb_hw_breakpoint].addr = addr;
1696     hw_breakpoint[nb_hw_breakpoint].len = len;
1697     hw_breakpoint[nb_hw_breakpoint].type = type;
1698     nb_hw_breakpoint++;
1699 
1700     return 0;
1701 }
1702 
kvm_arch_remove_hw_breakpoint(target_ulong addr,target_ulong len,int type)1703 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1704                                   target_ulong len, int type)
1705 {
1706     int n;
1707 
1708     n = find_hw_breakpoint(addr, (type == GDB_BREAKPOINT_HW) ? 1 : len, type);
1709     if (n < 0) {
1710         return -ENOENT;
1711     }
1712     nb_hw_breakpoint--;
1713     hw_breakpoint[n] = hw_breakpoint[nb_hw_breakpoint];
1714 
1715     return 0;
1716 }
1717 
kvm_arch_remove_all_hw_breakpoints(void)1718 void kvm_arch_remove_all_hw_breakpoints(void)
1719 {
1720     nb_hw_breakpoint = 0;
1721 }
1722 
1723 static CPUWatchpoint hw_watchpoint;
1724 
kvm_arch_debug(struct kvm_debug_exit_arch * arch_info)1725 int kvm_arch_debug(struct kvm_debug_exit_arch *arch_info)
1726 {
1727     int handle = 0;
1728     int n;
1729 
1730     if (arch_info->exception == 1) {
1731         if (arch_info->dr6 & (1 << 14)) {
1732             if (cpu_single_env->singlestep_enabled) {
1733                 handle = 1;
1734             }
1735         } else {
1736             for (n = 0; n < 4; n++) {
1737                 if (arch_info->dr6 & (1 << n)) {
1738                     switch ((arch_info->dr7 >> (16 + n*4)) & 0x3) {
1739                     case 0x0:
1740                         handle = 1;
1741                         break;
1742                     case 0x1:
1743                         handle = 1;
1744                         cpu_single_env->watchpoint_hit = &hw_watchpoint;
1745                         hw_watchpoint.vaddr = hw_breakpoint[n].addr;
1746                         hw_watchpoint.flags = BP_MEM_WRITE;
1747                         break;
1748                     case 0x3:
1749                         handle = 1;
1750                         cpu_single_env->watchpoint_hit = &hw_watchpoint;
1751                         hw_watchpoint.vaddr = hw_breakpoint[n].addr;
1752                         hw_watchpoint.flags = BP_MEM_ACCESS;
1753                         break;
1754                     }
1755                 }
1756             }
1757         }
1758     } else if (kvm_find_sw_breakpoint(cpu_single_env, arch_info->pc)) {
1759         handle = 1;
1760     }
1761     if (!handle) {
1762         cpu_synchronize_state(cpu_single_env);
1763         assert(cpu_single_env->exception_injected == -1);
1764 
1765         cpu_single_env->exception_injected = arch_info->exception;
1766         cpu_single_env->has_error_code = 0;
1767     }
1768 
1769     return handle;
1770 }
1771 
kvm_arch_update_guest_debug(CPUState * env,struct kvm_guest_debug * dbg)1772 void kvm_arch_update_guest_debug(CPUState *env, struct kvm_guest_debug *dbg)
1773 {
1774     const uint8_t type_code[] = {
1775         [GDB_BREAKPOINT_HW] = 0x0,
1776         [GDB_WATCHPOINT_WRITE] = 0x1,
1777         [GDB_WATCHPOINT_ACCESS] = 0x3
1778     };
1779     const uint8_t len_code[] = {
1780         [1] = 0x0, [2] = 0x1, [4] = 0x3, [8] = 0x2
1781     };
1782     int n;
1783 
1784     if (kvm_sw_breakpoints_active(env)) {
1785         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1786     }
1787     if (nb_hw_breakpoint > 0) {
1788         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1789         dbg->arch.debugreg[7] = 0x0600;
1790         for (n = 0; n < nb_hw_breakpoint; n++) {
1791             dbg->arch.debugreg[n] = hw_breakpoint[n].addr;
1792             dbg->arch.debugreg[7] |= (2 << (n * 2)) |
1793                 (type_code[hw_breakpoint[n].type] << (16 + n*4)) |
1794                 ((uint32_t)len_code[hw_breakpoint[n].len] << (18 + n*4));
1795         }
1796     }
1797 }
1798 #endif /* KVM_CAP_SET_GUEST_DEBUG */
1799 
kvm_arch_stop_on_emulation_error(CPUState * env)1800 bool kvm_arch_stop_on_emulation_error(CPUState *env)
1801 {
1802     return !(env->cr[0] & CR0_PE_MASK) ||
1803            ((env->segs[R_CS].selector  & 3) != 3);
1804 }
1805 
hardware_memory_error(void)1806 static void hardware_memory_error(void)
1807 {
1808     fprintf(stderr, "Hardware memory error!\n");
1809     exit(1);
1810 }
1811 
1812 #ifdef KVM_CAP_MCE
kvm_mce_broadcast_rest(CPUState * env)1813 static void kvm_mce_broadcast_rest(CPUState *env)
1814 {
1815     struct kvm_x86_mce mce = {
1816         .bank = 1,
1817         .status = MCI_STATUS_VAL | MCI_STATUS_UC,
1818         .mcg_status = MCG_STATUS_MCIP | MCG_STATUS_RIPV,
1819         .addr = 0,
1820         .misc = 0,
1821     };
1822     CPUState *cenv;
1823 
1824     /* Broadcast MCA signal for processor version 06H_EH and above */
1825     if (cpu_x86_support_mca_broadcast(env)) {
1826         for (cenv = first_cpu; cenv != NULL; cenv = cenv->next_cpu) {
1827             if (cenv == env) {
1828                 continue;
1829             }
1830             kvm_inject_x86_mce_on(cenv, &mce, ABORT_ON_ERROR);
1831         }
1832     }
1833 }
1834 
kvm_mce_inj_srar_dataload(CPUState * env,target_phys_addr_t paddr)1835 static void kvm_mce_inj_srar_dataload(CPUState *env, target_phys_addr_t paddr)
1836 {
1837     struct kvm_x86_mce mce = {
1838         .bank = 9,
1839         .status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN
1840                   | MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S
1841                   | MCI_STATUS_AR | 0x134,
1842         .mcg_status = MCG_STATUS_MCIP | MCG_STATUS_EIPV,
1843         .addr = paddr,
1844         .misc = (MCM_ADDR_PHYS << 6) | 0xc,
1845     };
1846     int r;
1847 
1848     r = kvm_set_mce(env, &mce);
1849     if (r < 0) {
1850         fprintf(stderr, "kvm_set_mce: %s\n", strerror(errno));
1851         abort();
1852     }
1853     kvm_mce_broadcast_rest(env);
1854 }
1855 
kvm_mce_inj_srao_memscrub(CPUState * env,target_phys_addr_t paddr)1856 static void kvm_mce_inj_srao_memscrub(CPUState *env, target_phys_addr_t paddr)
1857 {
1858     struct kvm_x86_mce mce = {
1859         .bank = 9,
1860         .status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN
1861                   | MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S
1862                   | 0xc0,
1863         .mcg_status = MCG_STATUS_MCIP | MCG_STATUS_RIPV,
1864         .addr = paddr,
1865         .misc = (MCM_ADDR_PHYS << 6) | 0xc,
1866     };
1867     int r;
1868 
1869     r = kvm_set_mce(env, &mce);
1870     if (r < 0) {
1871         fprintf(stderr, "kvm_set_mce: %s\n", strerror(errno));
1872         abort();
1873     }
1874     kvm_mce_broadcast_rest(env);
1875 }
1876 
kvm_mce_inj_srao_memscrub2(CPUState * env,target_phys_addr_t paddr)1877 static void kvm_mce_inj_srao_memscrub2(CPUState *env, target_phys_addr_t paddr)
1878 {
1879     struct kvm_x86_mce mce = {
1880         .bank = 9,
1881         .status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN
1882                   | MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S
1883                   | 0xc0,
1884         .mcg_status = MCG_STATUS_MCIP | MCG_STATUS_RIPV,
1885         .addr = paddr,
1886         .misc = (MCM_ADDR_PHYS << 6) | 0xc,
1887     };
1888 
1889     kvm_inject_x86_mce_on(env, &mce, ABORT_ON_ERROR);
1890     kvm_mce_broadcast_rest(env);
1891 }
1892 
1893 #endif
1894 
kvm_on_sigbus_vcpu(CPUState * env,int code,void * addr)1895 int kvm_on_sigbus_vcpu(CPUState *env, int code, void *addr)
1896 {
1897 #if defined(KVM_CAP_MCE)
1898     void *vaddr;
1899     ram_addr_t ram_addr;
1900     target_phys_addr_t paddr;
1901 
1902     if ((env->mcg_cap & MCG_SER_P) && addr
1903         && (code == BUS_MCEERR_AR
1904             || code == BUS_MCEERR_AO)) {
1905         vaddr = (void *)addr;
1906         if (qemu_ram_addr_from_host(vaddr, &ram_addr) ||
1907             !kvm_physical_memory_addr_from_ram(env->kvm_state, ram_addr, &paddr)) {
1908             fprintf(stderr, "Hardware memory error for memory used by "
1909                     "QEMU itself instead of guest system!\n");
1910             /* Hope we are lucky for AO MCE */
1911             if (code == BUS_MCEERR_AO) {
1912                 return 0;
1913             } else {
1914                 hardware_memory_error();
1915             }
1916         }
1917 
1918         if (code == BUS_MCEERR_AR) {
1919             /* Fake an Intel architectural Data Load SRAR UCR */
1920             kvm_mce_inj_srar_dataload(env, paddr);
1921         } else {
1922             /*
1923              * If there is an MCE excpetion being processed, ignore
1924              * this SRAO MCE
1925              */
1926             if (!kvm_mce_in_progress(env)) {
1927                 /* Fake an Intel architectural Memory scrubbing UCR */
1928                 kvm_mce_inj_srao_memscrub(env, paddr);
1929             }
1930         }
1931     } else
1932 #endif
1933     {
1934         if (code == BUS_MCEERR_AO) {
1935             return 0;
1936         } else if (code == BUS_MCEERR_AR) {
1937             hardware_memory_error();
1938         } else {
1939             return 1;
1940         }
1941     }
1942     return 0;
1943 }
1944 
kvm_on_sigbus(int code,void * addr)1945 int kvm_on_sigbus(int code, void *addr)
1946 {
1947 #if defined(KVM_CAP_MCE)
1948     if ((first_cpu->mcg_cap & MCG_SER_P) && addr && code == BUS_MCEERR_AO) {
1949         void *vaddr;
1950         ram_addr_t ram_addr;
1951         target_phys_addr_t paddr;
1952 
1953         /* Hope we are lucky for AO MCE */
1954         vaddr = addr;
1955         if (qemu_ram_addr_from_host(vaddr, &ram_addr) ||
1956             !kvm_physical_memory_addr_from_ram(first_cpu->kvm_state, ram_addr, &paddr)) {
1957             fprintf(stderr, "Hardware memory error for memory used by "
1958                     "QEMU itself instead of guest system!: %p\n", addr);
1959             return 0;
1960         }
1961         kvm_mce_inj_srao_memscrub2(first_cpu, paddr);
1962     } else
1963 #endif
1964     {
1965         if (code == BUS_MCEERR_AO) {
1966             return 0;
1967         } else if (code == BUS_MCEERR_AR) {
1968             hardware_memory_error();
1969         } else {
1970             return 1;
1971         }
1972     }
1973     return 0;
1974 }
1975 
1976 #include "qemu-kvm-x86.c"
1977