xref: /illumos-kvm-cmd/tcg/i386/tcg-target.c (revision 68396ea9)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #ifndef NDEBUG
26 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
27 #if TCG_TARGET_REG_BITS == 64
28     "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
29     "%r8",  "%r9",  "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
30 #else
31     "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
32 #endif
33 };
34 #endif
35 
36 static const int tcg_target_reg_alloc_order[] = {
37 #if TCG_TARGET_REG_BITS == 64
38     TCG_REG_RBP,
39     TCG_REG_RBX,
40     TCG_REG_R12,
41     TCG_REG_R13,
42     TCG_REG_R14,
43     TCG_REG_R15,
44     TCG_REG_R10,
45     TCG_REG_R11,
46     TCG_REG_R9,
47     TCG_REG_R8,
48     TCG_REG_RCX,
49     TCG_REG_RDX,
50     TCG_REG_RSI,
51     TCG_REG_RDI,
52     TCG_REG_RAX,
53 #else
54     TCG_REG_EBX,
55     TCG_REG_ESI,
56     TCG_REG_EDI,
57     TCG_REG_EBP,
58     TCG_REG_ECX,
59     TCG_REG_EDX,
60     TCG_REG_EAX,
61 #endif
62 };
63 
64 static const int tcg_target_call_iarg_regs[] = {
65 #if TCG_TARGET_REG_BITS == 64
66     TCG_REG_RDI,
67     TCG_REG_RSI,
68     TCG_REG_RDX,
69     TCG_REG_RCX,
70     TCG_REG_R8,
71     TCG_REG_R9,
72 #else
73     TCG_REG_EAX,
74     TCG_REG_EDX,
75     TCG_REG_ECX
76 #endif
77 };
78 
79 static const int tcg_target_call_oarg_regs[2] = {
80     TCG_REG_EAX,
81     TCG_REG_EDX
82 };
83 
84 static uint8_t *tb_ret_addr;
85 
patch_reloc(uint8_t * code_ptr,int type,tcg_target_long value,tcg_target_long addend)86 static void patch_reloc(uint8_t *code_ptr, int type,
87                         tcg_target_long value, tcg_target_long addend)
88 {
89     value += addend;
90     switch(type) {
91     case R_386_PC32:
92         value -= (uintptr_t)code_ptr;
93         if (value != (int32_t)value) {
94             tcg_abort();
95         }
96         *(uint32_t *)code_ptr = value;
97         break;
98     case R_386_PC8:
99         value -= (uintptr_t)code_ptr;
100         if (value != (int8_t)value) {
101             tcg_abort();
102         }
103         *(uint8_t *)code_ptr = value;
104         break;
105     default:
106         tcg_abort();
107     }
108 }
109 
110 /* maximum number of register used for input function arguments */
tcg_target_get_call_iarg_regs_count(int flags)111 static inline int tcg_target_get_call_iarg_regs_count(int flags)
112 {
113     if (TCG_TARGET_REG_BITS == 64) {
114         return 6;
115     }
116 
117     flags &= TCG_CALL_TYPE_MASK;
118     switch(flags) {
119     case TCG_CALL_TYPE_STD:
120         return 0;
121     case TCG_CALL_TYPE_REGPARM_1:
122     case TCG_CALL_TYPE_REGPARM_2:
123     case TCG_CALL_TYPE_REGPARM:
124         return flags - TCG_CALL_TYPE_REGPARM_1 + 1;
125     default:
126         tcg_abort();
127     }
128 }
129 
130 /* parse target specific constraints */
target_parse_constraint(TCGArgConstraint * ct,const char ** pct_str)131 static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
132 {
133     const char *ct_str;
134 
135     ct_str = *pct_str;
136     switch(ct_str[0]) {
137     case 'a':
138         ct->ct |= TCG_CT_REG;
139         tcg_regset_set_reg(ct->u.regs, TCG_REG_EAX);
140         break;
141     case 'b':
142         ct->ct |= TCG_CT_REG;
143         tcg_regset_set_reg(ct->u.regs, TCG_REG_EBX);
144         break;
145     case 'c':
146         ct->ct |= TCG_CT_REG;
147         tcg_regset_set_reg(ct->u.regs, TCG_REG_ECX);
148         break;
149     case 'd':
150         ct->ct |= TCG_CT_REG;
151         tcg_regset_set_reg(ct->u.regs, TCG_REG_EDX);
152         break;
153     case 'S':
154         ct->ct |= TCG_CT_REG;
155         tcg_regset_set_reg(ct->u.regs, TCG_REG_ESI);
156         break;
157     case 'D':
158         ct->ct |= TCG_CT_REG;
159         tcg_regset_set_reg(ct->u.regs, TCG_REG_EDI);
160         break;
161     case 'q':
162         ct->ct |= TCG_CT_REG;
163         if (TCG_TARGET_REG_BITS == 64) {
164             tcg_regset_set32(ct->u.regs, 0, 0xffff);
165         } else {
166             tcg_regset_set32(ct->u.regs, 0, 0xf);
167         }
168         break;
169     case 'r':
170         ct->ct |= TCG_CT_REG;
171         if (TCG_TARGET_REG_BITS == 64) {
172             tcg_regset_set32(ct->u.regs, 0, 0xffff);
173         } else {
174             tcg_regset_set32(ct->u.regs, 0, 0xff);
175         }
176         break;
177 
178         /* qemu_ld/st address constraint */
179     case 'L':
180         ct->ct |= TCG_CT_REG;
181         if (TCG_TARGET_REG_BITS == 64) {
182             tcg_regset_set32(ct->u.regs, 0, 0xffff);
183             tcg_regset_reset_reg(ct->u.regs, TCG_REG_RSI);
184             tcg_regset_reset_reg(ct->u.regs, TCG_REG_RDI);
185         } else {
186             tcg_regset_set32(ct->u.regs, 0, 0xff);
187             tcg_regset_reset_reg(ct->u.regs, TCG_REG_EAX);
188             tcg_regset_reset_reg(ct->u.regs, TCG_REG_EDX);
189         }
190         break;
191 
192     case 'e':
193         ct->ct |= TCG_CT_CONST_S32;
194         break;
195     case 'Z':
196         ct->ct |= TCG_CT_CONST_U32;
197         break;
198 
199     default:
200         return -1;
201     }
202     ct_str++;
203     *pct_str = ct_str;
204     return 0;
205 }
206 
207 /* test if a constant matches the constraint */
tcg_target_const_match(tcg_target_long val,const TCGArgConstraint * arg_ct)208 static inline int tcg_target_const_match(tcg_target_long val,
209                                          const TCGArgConstraint *arg_ct)
210 {
211     int ct = arg_ct->ct;
212     if (ct & TCG_CT_CONST) {
213         return 1;
214     }
215     if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
216         return 1;
217     }
218     if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
219         return 1;
220     }
221     return 0;
222 }
223 
224 #if TCG_TARGET_REG_BITS == 64
225 # define LOWREGMASK(x)	((x) & 7)
226 #else
227 # define LOWREGMASK(x)	(x)
228 #endif
229 
230 #define P_EXT		0x100		/* 0x0f opcode prefix */
231 #define P_DATA16	0x200		/* 0x66 opcode prefix */
232 #if TCG_TARGET_REG_BITS == 64
233 # define P_ADDR32	0x400		/* 0x67 opcode prefix */
234 # define P_REXW		0x800		/* Set REX.W = 1 */
235 # define P_REXB_R	0x1000		/* REG field as byte register */
236 # define P_REXB_RM	0x2000		/* R/M field as byte register */
237 #else
238 # define P_ADDR32	0
239 # define P_REXW		0
240 # define P_REXB_R	0
241 # define P_REXB_RM	0
242 #endif
243 
244 #define OPC_ARITH_EvIz	(0x81)
245 #define OPC_ARITH_EvIb	(0x83)
246 #define OPC_ARITH_GvEv	(0x03)		/* ... plus (ARITH_FOO << 3) */
247 #define OPC_ADD_GvEv	(OPC_ARITH_GvEv | (ARITH_ADD << 3))
248 #define OPC_BSWAP	(0xc8 | P_EXT)
249 #define OPC_CALL_Jz	(0xe8)
250 #define OPC_CMP_GvEv	(OPC_ARITH_GvEv | (ARITH_CMP << 3))
251 #define OPC_DEC_r32	(0x48)
252 #define OPC_IMUL_GvEv	(0xaf | P_EXT)
253 #define OPC_IMUL_GvEvIb	(0x6b)
254 #define OPC_IMUL_GvEvIz	(0x69)
255 #define OPC_INC_r32	(0x40)
256 #define OPC_JCC_long	(0x80 | P_EXT)	/* ... plus condition code */
257 #define OPC_JCC_short	(0x70)		/* ... plus condition code */
258 #define OPC_JMP_long	(0xe9)
259 #define OPC_JMP_short	(0xeb)
260 #define OPC_LEA         (0x8d)
261 #define OPC_MOVB_EvGv	(0x88)		/* stores, more or less */
262 #define OPC_MOVL_EvGv	(0x89)		/* stores, more or less */
263 #define OPC_MOVL_GvEv	(0x8b)		/* loads, more or less */
264 #define OPC_MOVL_EvIz	(0xc7)
265 #define OPC_MOVL_Iv     (0xb8)
266 #define OPC_MOVSBL	(0xbe | P_EXT)
267 #define OPC_MOVSWL	(0xbf | P_EXT)
268 #define OPC_MOVSLQ	(0x63 | P_REXW)
269 #define OPC_MOVZBL	(0xb6 | P_EXT)
270 #define OPC_MOVZWL	(0xb7 | P_EXT)
271 #define OPC_POP_r32	(0x58)
272 #define OPC_PUSH_r32	(0x50)
273 #define OPC_PUSH_Iv	(0x68)
274 #define OPC_PUSH_Ib	(0x6a)
275 #define OPC_RET		(0xc3)
276 #define OPC_SETCC	(0x90 | P_EXT | P_REXB_RM) /* ... plus cc */
277 #define OPC_SHIFT_1	(0xd1)
278 #define OPC_SHIFT_Ib	(0xc1)
279 #define OPC_SHIFT_cl	(0xd3)
280 #define OPC_TESTL	(0x85)
281 #define OPC_XCHG_ax_r32	(0x90)
282 
283 #define OPC_GRP3_Ev	(0xf7)
284 #define OPC_GRP5	(0xff)
285 
286 /* Group 1 opcode extensions for 0x80-0x83.
287    These are also used as modifiers for OPC_ARITH.  */
288 #define ARITH_ADD 0
289 #define ARITH_OR  1
290 #define ARITH_ADC 2
291 #define ARITH_SBB 3
292 #define ARITH_AND 4
293 #define ARITH_SUB 5
294 #define ARITH_XOR 6
295 #define ARITH_CMP 7
296 
297 /* Group 2 opcode extensions for 0xc0, 0xc1, 0xd0-0xd3.  */
298 #define SHIFT_ROL 0
299 #define SHIFT_ROR 1
300 #define SHIFT_SHL 4
301 #define SHIFT_SHR 5
302 #define SHIFT_SAR 7
303 
304 /* Group 3 opcode extensions for 0xf6, 0xf7.  To be used with OPC_GRP3.  */
305 #define EXT3_NOT   2
306 #define EXT3_NEG   3
307 #define EXT3_MUL   4
308 #define EXT3_IMUL  5
309 #define EXT3_DIV   6
310 #define EXT3_IDIV  7
311 
312 /* Group 5 opcode extensions for 0xff.  To be used with OPC_GRP5.  */
313 #define EXT5_INC_Ev	0
314 #define EXT5_DEC_Ev	1
315 #define EXT5_CALLN_Ev	2
316 #define EXT5_JMPN_Ev	4
317 
318 /* Condition codes to be added to OPC_JCC_{long,short}.  */
319 #define JCC_JMP (-1)
320 #define JCC_JO  0x0
321 #define JCC_JNO 0x1
322 #define JCC_JB  0x2
323 #define JCC_JAE 0x3
324 #define JCC_JE  0x4
325 #define JCC_JNE 0x5
326 #define JCC_JBE 0x6
327 #define JCC_JA  0x7
328 #define JCC_JS  0x8
329 #define JCC_JNS 0x9
330 #define JCC_JP  0xa
331 #define JCC_JNP 0xb
332 #define JCC_JL  0xc
333 #define JCC_JGE 0xd
334 #define JCC_JLE 0xe
335 #define JCC_JG  0xf
336 
337 static const uint8_t tcg_cond_to_jcc[10] = {
338     [TCG_COND_EQ] = JCC_JE,
339     [TCG_COND_NE] = JCC_JNE,
340     [TCG_COND_LT] = JCC_JL,
341     [TCG_COND_GE] = JCC_JGE,
342     [TCG_COND_LE] = JCC_JLE,
343     [TCG_COND_GT] = JCC_JG,
344     [TCG_COND_LTU] = JCC_JB,
345     [TCG_COND_GEU] = JCC_JAE,
346     [TCG_COND_LEU] = JCC_JBE,
347     [TCG_COND_GTU] = JCC_JA,
348 };
349 
350 #if TCG_TARGET_REG_BITS == 64
tcg_out_opc(TCGContext * s,int opc,int r,int rm,int x)351 static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
352 {
353     int rex;
354 
355     if (opc & P_DATA16) {
356         /* We should never be asking for both 16 and 64-bit operation.  */
357         assert((opc & P_REXW) == 0);
358         tcg_out8(s, 0x66);
359     }
360     if (opc & P_ADDR32) {
361         tcg_out8(s, 0x67);
362     }
363 
364     rex = 0;
365     rex |= (opc & P_REXW) >> 8;		/* REX.W */
366     rex |= (r & 8) >> 1;		/* REX.R */
367     rex |= (x & 8) >> 2;		/* REX.X */
368     rex |= (rm & 8) >> 3;		/* REX.B */
369 
370     /* P_REXB_{R,RM} indicates that the given register is the low byte.
371        For %[abcd]l we need no REX prefix, but for %{si,di,bp,sp}l we do,
372        as otherwise the encoding indicates %[abcd]h.  Note that the values
373        that are ORed in merely indicate that the REX byte must be present;
374        those bits get discarded in output.  */
375     rex |= opc & (r >= 4 ? P_REXB_R : 0);
376     rex |= opc & (rm >= 4 ? P_REXB_RM : 0);
377 
378     if (rex) {
379         tcg_out8(s, (uint8_t)(rex | 0x40));
380     }
381 
382     if (opc & P_EXT) {
383         tcg_out8(s, 0x0f);
384     }
385     tcg_out8(s, opc);
386 }
387 #else
tcg_out_opc(TCGContext * s,int opc)388 static void tcg_out_opc(TCGContext *s, int opc)
389 {
390     if (opc & P_DATA16) {
391         tcg_out8(s, 0x66);
392     }
393     if (opc & P_EXT) {
394         tcg_out8(s, 0x0f);
395     }
396     tcg_out8(s, opc);
397 }
398 /* Discard the register arguments to tcg_out_opc early, so as not to penalize
399    the 32-bit compilation paths.  This method works with all versions of gcc,
400    whereas relying on optimization may not be able to exclude them.  */
401 #define tcg_out_opc(s, opc, r, rm, x)  (tcg_out_opc)(s, opc)
402 #endif
403 
tcg_out_modrm(TCGContext * s,int opc,int r,int rm)404 static void tcg_out_modrm(TCGContext *s, int opc, int r, int rm)
405 {
406     tcg_out_opc(s, opc, r, rm, 0);
407     tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
408 }
409 
410 /* Output an opcode with a full "rm + (index<<shift) + offset" address mode.
411    We handle either RM and INDEX missing with a negative value.  In 64-bit
412    mode for absolute addresses, ~RM is the size of the immediate operand
413    that will follow the instruction.  */
414 
tcg_out_modrm_sib_offset(TCGContext * s,int opc,int r,int rm,int index,int shift,tcg_target_long offset)415 static void tcg_out_modrm_sib_offset(TCGContext *s, int opc, int r, int rm,
416                                      int index, int shift,
417                                      tcg_target_long offset)
418 {
419     int mod, len;
420 
421     if (index < 0 && rm < 0) {
422         if (TCG_TARGET_REG_BITS == 64) {
423             /* Try for a rip-relative addressing mode.  This has replaced
424                the 32-bit-mode absolute addressing encoding.  */
425             tcg_target_long pc = (tcg_target_long)s->code_ptr + 5 + ~rm;
426             tcg_target_long disp = offset - pc;
427             if (disp == (int32_t)disp) {
428                 tcg_out_opc(s, opc, r, 0, 0);
429                 tcg_out8(s, (LOWREGMASK(r) << 3) | 5);
430                 tcg_out32(s, disp);
431                 return;
432             }
433 
434             /* Try for an absolute address encoding.  This requires the
435                use of the MODRM+SIB encoding and is therefore larger than
436                rip-relative addressing.  */
437             if (offset == (int32_t)offset) {
438                 tcg_out_opc(s, opc, r, 0, 0);
439                 tcg_out8(s, (LOWREGMASK(r) << 3) | 4);
440                 tcg_out8(s, (4 << 3) | 5);
441                 tcg_out32(s, offset);
442                 return;
443             }
444 
445             /* ??? The memory isn't directly addressable.  */
446             tcg_abort();
447         } else {
448             /* Absolute address.  */
449             tcg_out_opc(s, opc, r, 0, 0);
450             tcg_out8(s, (r << 3) | 5);
451             tcg_out32(s, offset);
452             return;
453         }
454     }
455 
456     /* Find the length of the immediate addend.  Note that the encoding
457        that would be used for (%ebp) indicates absolute addressing.  */
458     if (rm < 0) {
459         mod = 0, len = 4, rm = 5;
460     } else if (offset == 0 && LOWREGMASK(rm) != TCG_REG_EBP) {
461         mod = 0, len = 0;
462     } else if (offset == (int8_t)offset) {
463         mod = 0x40, len = 1;
464     } else {
465         mod = 0x80, len = 4;
466     }
467 
468     /* Use a single byte MODRM format if possible.  Note that the encoding
469        that would be used for %esp is the escape to the two byte form.  */
470     if (index < 0 && LOWREGMASK(rm) != TCG_REG_ESP) {
471         /* Single byte MODRM format.  */
472         tcg_out_opc(s, opc, r, rm, 0);
473         tcg_out8(s, mod | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
474     } else {
475         /* Two byte MODRM+SIB format.  */
476 
477         /* Note that the encoding that would place %esp into the index
478            field indicates no index register.  In 64-bit mode, the REX.X
479            bit counts, so %r12 can be used as the index.  */
480         if (index < 0) {
481             index = 4;
482         } else {
483             assert(index != TCG_REG_ESP);
484         }
485 
486         tcg_out_opc(s, opc, r, rm, index);
487         tcg_out8(s, mod | (LOWREGMASK(r) << 3) | 4);
488         tcg_out8(s, (shift << 6) | (LOWREGMASK(index) << 3) | LOWREGMASK(rm));
489     }
490 
491     if (len == 1) {
492         tcg_out8(s, offset);
493     } else if (len == 4) {
494         tcg_out32(s, offset);
495     }
496 }
497 
498 /* A simplification of the above with no index or shift.  */
tcg_out_modrm_offset(TCGContext * s,int opc,int r,int rm,tcg_target_long offset)499 static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r,
500                                         int rm, tcg_target_long offset)
501 {
502     tcg_out_modrm_sib_offset(s, opc, r, rm, -1, 0, offset);
503 }
504 
505 /* Generate dest op= src.  Uses the same ARITH_* codes as tgen_arithi.  */
tgen_arithr(TCGContext * s,int subop,int dest,int src)506 static inline void tgen_arithr(TCGContext *s, int subop, int dest, int src)
507 {
508     /* Propagate an opcode prefix, such as P_REXW.  */
509     int ext = subop & ~0x7;
510     subop &= 0x7;
511 
512     tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3) + ext, dest, src);
513 }
514 
tcg_out_mov(TCGContext * s,TCGType type,int ret,int arg)515 static inline void tcg_out_mov(TCGContext *s, TCGType type, int ret, int arg)
516 {
517     if (arg != ret) {
518         int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
519         tcg_out_modrm(s, opc, ret, arg);
520     }
521 }
522 
tcg_out_movi(TCGContext * s,TCGType type,int ret,tcg_target_long arg)523 static void tcg_out_movi(TCGContext *s, TCGType type,
524                          int ret, tcg_target_long arg)
525 {
526     if (arg == 0) {
527         tgen_arithr(s, ARITH_XOR, ret, ret);
528         return;
529     } else if (arg == (uint32_t)arg || type == TCG_TYPE_I32) {
530         tcg_out_opc(s, OPC_MOVL_Iv + LOWREGMASK(ret), 0, ret, 0);
531         tcg_out32(s, arg);
532     } else if (arg == (int32_t)arg) {
533         tcg_out_modrm(s, OPC_MOVL_EvIz + P_REXW, 0, ret);
534         tcg_out32(s, arg);
535     } else {
536         tcg_out_opc(s, OPC_MOVL_Iv + P_REXW + LOWREGMASK(ret), 0, ret, 0);
537         tcg_out32(s, arg);
538         tcg_out32(s, arg >> 31 >> 1);
539     }
540 }
541 
tcg_out_pushi(TCGContext * s,tcg_target_long val)542 static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val)
543 {
544     if (val == (int8_t)val) {
545         tcg_out_opc(s, OPC_PUSH_Ib, 0, 0, 0);
546         tcg_out8(s, val);
547     } else if (val == (int32_t)val) {
548         tcg_out_opc(s, OPC_PUSH_Iv, 0, 0, 0);
549         tcg_out32(s, val);
550     } else {
551         tcg_abort();
552     }
553 }
554 
tcg_out_push(TCGContext * s,int reg)555 static inline void tcg_out_push(TCGContext *s, int reg)
556 {
557     tcg_out_opc(s, OPC_PUSH_r32 + LOWREGMASK(reg), 0, reg, 0);
558 }
559 
tcg_out_pop(TCGContext * s,int reg)560 static inline void tcg_out_pop(TCGContext *s, int reg)
561 {
562     tcg_out_opc(s, OPC_POP_r32 + LOWREGMASK(reg), 0, reg, 0);
563 }
564 
tcg_out_ld(TCGContext * s,TCGType type,int ret,int arg1,tcg_target_long arg2)565 static inline void tcg_out_ld(TCGContext *s, TCGType type, int ret,
566                               int arg1, tcg_target_long arg2)
567 {
568     int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
569     tcg_out_modrm_offset(s, opc, ret, arg1, arg2);
570 }
571 
tcg_out_st(TCGContext * s,TCGType type,int arg,int arg1,tcg_target_long arg2)572 static inline void tcg_out_st(TCGContext *s, TCGType type, int arg,
573                               int arg1, tcg_target_long arg2)
574 {
575     int opc = OPC_MOVL_EvGv + (type == TCG_TYPE_I64 ? P_REXW : 0);
576     tcg_out_modrm_offset(s, opc, arg, arg1, arg2);
577 }
578 
tcg_out_shifti(TCGContext * s,int subopc,int reg,int count)579 static void tcg_out_shifti(TCGContext *s, int subopc, int reg, int count)
580 {
581     /* Propagate an opcode prefix, such as P_DATA16.  */
582     int ext = subopc & ~0x7;
583     subopc &= 0x7;
584 
585     if (count == 1) {
586         tcg_out_modrm(s, OPC_SHIFT_1 + ext, subopc, reg);
587     } else {
588         tcg_out_modrm(s, OPC_SHIFT_Ib + ext, subopc, reg);
589         tcg_out8(s, count);
590     }
591 }
592 
tcg_out_bswap32(TCGContext * s,int reg)593 static inline void tcg_out_bswap32(TCGContext *s, int reg)
594 {
595     tcg_out_opc(s, OPC_BSWAP + LOWREGMASK(reg), 0, reg, 0);
596 }
597 
tcg_out_rolw_8(TCGContext * s,int reg)598 static inline void tcg_out_rolw_8(TCGContext *s, int reg)
599 {
600     tcg_out_shifti(s, SHIFT_ROL + P_DATA16, reg, 8);
601 }
602 
tcg_out_ext8u(TCGContext * s,int dest,int src)603 static inline void tcg_out_ext8u(TCGContext *s, int dest, int src)
604 {
605     /* movzbl */
606     assert(src < 4 || TCG_TARGET_REG_BITS == 64);
607     tcg_out_modrm(s, OPC_MOVZBL + P_REXB_RM, dest, src);
608 }
609 
tcg_out_ext8s(TCGContext * s,int dest,int src,int rexw)610 static void tcg_out_ext8s(TCGContext *s, int dest, int src, int rexw)
611 {
612     /* movsbl */
613     assert(src < 4 || TCG_TARGET_REG_BITS == 64);
614     tcg_out_modrm(s, OPC_MOVSBL + P_REXB_RM + rexw, dest, src);
615 }
616 
tcg_out_ext16u(TCGContext * s,int dest,int src)617 static inline void tcg_out_ext16u(TCGContext *s, int dest, int src)
618 {
619     /* movzwl */
620     tcg_out_modrm(s, OPC_MOVZWL, dest, src);
621 }
622 
tcg_out_ext16s(TCGContext * s,int dest,int src,int rexw)623 static inline void tcg_out_ext16s(TCGContext *s, int dest, int src, int rexw)
624 {
625     /* movsw[lq] */
626     tcg_out_modrm(s, OPC_MOVSWL + rexw, dest, src);
627 }
628 
tcg_out_ext32u(TCGContext * s,int dest,int src)629 static inline void tcg_out_ext32u(TCGContext *s, int dest, int src)
630 {
631     /* 32-bit mov zero extends.  */
632     tcg_out_modrm(s, OPC_MOVL_GvEv, dest, src);
633 }
634 
tcg_out_ext32s(TCGContext * s,int dest,int src)635 static inline void tcg_out_ext32s(TCGContext *s, int dest, int src)
636 {
637     tcg_out_modrm(s, OPC_MOVSLQ, dest, src);
638 }
639 
tcg_out_bswap64(TCGContext * s,int reg)640 static inline void tcg_out_bswap64(TCGContext *s, int reg)
641 {
642     tcg_out_opc(s, OPC_BSWAP + P_REXW + LOWREGMASK(reg), 0, reg, 0);
643 }
644 
tgen_arithi(TCGContext * s,int c,int r0,tcg_target_long val,int cf)645 static void tgen_arithi(TCGContext *s, int c, int r0,
646                         tcg_target_long val, int cf)
647 {
648     int rexw = 0;
649 
650     if (TCG_TARGET_REG_BITS == 64) {
651         rexw = c & -8;
652         c &= 7;
653     }
654 
655     /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce
656        partial flags update stalls on Pentium4 and are not recommended
657        by current Intel optimization manuals.  */
658     if (!cf && (c == ARITH_ADD || c == ARITH_SUB) && (val == 1 || val == -1)) {
659         int is_inc = (c == ARITH_ADD) ^ (val < 0);
660         if (TCG_TARGET_REG_BITS == 64) {
661             /* The single-byte increment encodings are re-tasked as the
662                REX prefixes.  Use the MODRM encoding.  */
663             tcg_out_modrm(s, OPC_GRP5 + rexw,
664                           (is_inc ? EXT5_INC_Ev : EXT5_DEC_Ev), r0);
665         } else {
666             tcg_out8(s, (is_inc ? OPC_INC_r32 : OPC_DEC_r32) + r0);
667         }
668         return;
669     }
670 
671     if (c == ARITH_AND) {
672         if (TCG_TARGET_REG_BITS == 64) {
673             if (val == 0xffffffffu) {
674                 tcg_out_ext32u(s, r0, r0);
675                 return;
676             }
677             if (val == (uint32_t)val) {
678                 /* AND with no high bits set can use a 32-bit operation.  */
679                 rexw = 0;
680             }
681         }
682         if (val == 0xffu && (r0 < 4 || TCG_TARGET_REG_BITS == 64)) {
683             tcg_out_ext8u(s, r0, r0);
684             return;
685         }
686         if (val == 0xffffu) {
687             tcg_out_ext16u(s, r0, r0);
688             return;
689         }
690     }
691 
692     if (val == (int8_t)val) {
693         tcg_out_modrm(s, OPC_ARITH_EvIb + rexw, c, r0);
694         tcg_out8(s, val);
695         return;
696     }
697     if (rexw == 0 || val == (int32_t)val) {
698         tcg_out_modrm(s, OPC_ARITH_EvIz + rexw, c, r0);
699         tcg_out32(s, val);
700         return;
701     }
702 
703     tcg_abort();
704 }
705 
tcg_out_addi(TCGContext * s,int reg,tcg_target_long val)706 static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
707 {
708     if (val != 0) {
709         tgen_arithi(s, ARITH_ADD + P_REXW, reg, val, 0);
710     }
711 }
712 
713 /* Use SMALL != 0 to force a short forward branch.  */
tcg_out_jxx(TCGContext * s,int opc,int label_index,int small)714 static void tcg_out_jxx(TCGContext *s, int opc, int label_index, int small)
715 {
716     int32_t val, val1;
717     TCGLabel *l = &s->labels[label_index];
718 
719     if (l->has_value) {
720         val = l->u.value - (tcg_target_long)s->code_ptr;
721         val1 = val - 2;
722         if ((int8_t)val1 == val1) {
723             if (opc == -1) {
724                 tcg_out8(s, OPC_JMP_short);
725             } else {
726                 tcg_out8(s, OPC_JCC_short + opc);
727             }
728             tcg_out8(s, val1);
729         } else {
730             if (small) {
731                 tcg_abort();
732             }
733             if (opc == -1) {
734                 tcg_out8(s, OPC_JMP_long);
735                 tcg_out32(s, val - 5);
736             } else {
737                 tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
738                 tcg_out32(s, val - 6);
739             }
740         }
741     } else if (small) {
742         if (opc == -1) {
743             tcg_out8(s, OPC_JMP_short);
744         } else {
745             tcg_out8(s, OPC_JCC_short + opc);
746         }
747         tcg_out_reloc(s, s->code_ptr, R_386_PC8, label_index, -1);
748         s->code_ptr += 1;
749     } else {
750         if (opc == -1) {
751             tcg_out8(s, OPC_JMP_long);
752         } else {
753             tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
754         }
755         tcg_out_reloc(s, s->code_ptr, R_386_PC32, label_index, -4);
756         s->code_ptr += 4;
757     }
758 }
759 
tcg_out_cmp(TCGContext * s,TCGArg arg1,TCGArg arg2,int const_arg2,int rexw)760 static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2,
761                         int const_arg2, int rexw)
762 {
763     if (const_arg2) {
764         if (arg2 == 0) {
765             /* test r, r */
766             tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg1);
767         } else {
768             tgen_arithi(s, ARITH_CMP + rexw, arg1, arg2, 0);
769         }
770     } else {
771         tgen_arithr(s, ARITH_CMP + rexw, arg1, arg2);
772     }
773 }
774 
tcg_out_brcond32(TCGContext * s,TCGCond cond,TCGArg arg1,TCGArg arg2,int const_arg2,int label_index,int small)775 static void tcg_out_brcond32(TCGContext *s, TCGCond cond,
776                              TCGArg arg1, TCGArg arg2, int const_arg2,
777                              int label_index, int small)
778 {
779     tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
780     tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
781 }
782 
783 #if TCG_TARGET_REG_BITS == 64
tcg_out_brcond64(TCGContext * s,TCGCond cond,TCGArg arg1,TCGArg arg2,int const_arg2,int label_index,int small)784 static void tcg_out_brcond64(TCGContext *s, TCGCond cond,
785                              TCGArg arg1, TCGArg arg2, int const_arg2,
786                              int label_index, int small)
787 {
788     tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
789     tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
790 }
791 #else
792 /* XXX: we implement it at the target level to avoid having to
793    handle cross basic blocks temporaries */
tcg_out_brcond2(TCGContext * s,const TCGArg * args,const int * const_args,int small)794 static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
795                             const int *const_args, int small)
796 {
797     int label_next;
798     label_next = gen_new_label();
799     switch(args[4]) {
800     case TCG_COND_EQ:
801         tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
802                          label_next, 1);
803         tcg_out_brcond32(s, TCG_COND_EQ, args[1], args[3], const_args[3],
804                          args[5], small);
805         break;
806     case TCG_COND_NE:
807         tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
808                          args[5], small);
809         tcg_out_brcond32(s, TCG_COND_NE, args[1], args[3], const_args[3],
810                          args[5], small);
811         break;
812     case TCG_COND_LT:
813         tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
814                          args[5], small);
815         tcg_out_jxx(s, JCC_JNE, label_next, 1);
816         tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
817                          args[5], small);
818         break;
819     case TCG_COND_LE:
820         tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
821                          args[5], small);
822         tcg_out_jxx(s, JCC_JNE, label_next, 1);
823         tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
824                          args[5], small);
825         break;
826     case TCG_COND_GT:
827         tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
828                          args[5], small);
829         tcg_out_jxx(s, JCC_JNE, label_next, 1);
830         tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
831                          args[5], small);
832         break;
833     case TCG_COND_GE:
834         tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
835                          args[5], small);
836         tcg_out_jxx(s, JCC_JNE, label_next, 1);
837         tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
838                          args[5], small);
839         break;
840     case TCG_COND_LTU:
841         tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
842                          args[5], small);
843         tcg_out_jxx(s, JCC_JNE, label_next, 1);
844         tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
845                          args[5], small);
846         break;
847     case TCG_COND_LEU:
848         tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
849                          args[5], small);
850         tcg_out_jxx(s, JCC_JNE, label_next, 1);
851         tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
852                          args[5], small);
853         break;
854     case TCG_COND_GTU:
855         tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
856                          args[5], small);
857         tcg_out_jxx(s, JCC_JNE, label_next, 1);
858         tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
859                          args[5], small);
860         break;
861     case TCG_COND_GEU:
862         tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
863                          args[5], small);
864         tcg_out_jxx(s, JCC_JNE, label_next, 1);
865         tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
866                          args[5], small);
867         break;
868     default:
869         tcg_abort();
870     }
871     tcg_out_label(s, label_next, (tcg_target_long)s->code_ptr);
872 }
873 #endif
874 
tcg_out_setcond32(TCGContext * s,TCGCond cond,TCGArg dest,TCGArg arg1,TCGArg arg2,int const_arg2)875 static void tcg_out_setcond32(TCGContext *s, TCGCond cond, TCGArg dest,
876                               TCGArg arg1, TCGArg arg2, int const_arg2)
877 {
878     tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
879     tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
880     tcg_out_ext8u(s, dest, dest);
881 }
882 
883 #if TCG_TARGET_REG_BITS == 64
tcg_out_setcond64(TCGContext * s,TCGCond cond,TCGArg dest,TCGArg arg1,TCGArg arg2,int const_arg2)884 static void tcg_out_setcond64(TCGContext *s, TCGCond cond, TCGArg dest,
885                               TCGArg arg1, TCGArg arg2, int const_arg2)
886 {
887     tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
888     tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
889     tcg_out_ext8u(s, dest, dest);
890 }
891 #else
tcg_out_setcond2(TCGContext * s,const TCGArg * args,const int * const_args)892 static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
893                              const int *const_args)
894 {
895     TCGArg new_args[6];
896     int label_true, label_over;
897 
898     memcpy(new_args, args+1, 5*sizeof(TCGArg));
899 
900     if (args[0] == args[1] || args[0] == args[2]
901         || (!const_args[3] && args[0] == args[3])
902         || (!const_args[4] && args[0] == args[4])) {
903         /* When the destination overlaps with one of the argument
904            registers, don't do anything tricky.  */
905         label_true = gen_new_label();
906         label_over = gen_new_label();
907 
908         new_args[5] = label_true;
909         tcg_out_brcond2(s, new_args, const_args+1, 1);
910 
911         tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
912         tcg_out_jxx(s, JCC_JMP, label_over, 1);
913         tcg_out_label(s, label_true, (tcg_target_long)s->code_ptr);
914 
915         tcg_out_movi(s, TCG_TYPE_I32, args[0], 1);
916         tcg_out_label(s, label_over, (tcg_target_long)s->code_ptr);
917     } else {
918         /* When the destination does not overlap one of the arguments,
919            clear the destination first, jump if cond false, and emit an
920            increment in the true case.  This results in smaller code.  */
921 
922         tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
923 
924         label_over = gen_new_label();
925         new_args[4] = tcg_invert_cond(new_args[4]);
926         new_args[5] = label_over;
927         tcg_out_brcond2(s, new_args, const_args+1, 1);
928 
929         tgen_arithi(s, ARITH_ADD, args[0], 1, 0);
930         tcg_out_label(s, label_over, (tcg_target_long)s->code_ptr);
931     }
932 }
933 #endif
934 
tcg_out_branch(TCGContext * s,int call,tcg_target_long dest)935 static void tcg_out_branch(TCGContext *s, int call, tcg_target_long dest)
936 {
937     tcg_target_long disp = dest - (tcg_target_long)s->code_ptr - 5;
938 
939     if (disp == (int32_t)disp) {
940         tcg_out_opc(s, call ? OPC_CALL_Jz : OPC_JMP_long, 0, 0, 0);
941         tcg_out32(s, disp);
942     } else {
943         tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R10, dest);
944         tcg_out_modrm(s, OPC_GRP5,
945                       call ? EXT5_CALLN_Ev : EXT5_JMPN_Ev, TCG_REG_R10);
946     }
947 }
948 
tcg_out_calli(TCGContext * s,tcg_target_long dest)949 static inline void tcg_out_calli(TCGContext *s, tcg_target_long dest)
950 {
951     tcg_out_branch(s, 1, dest);
952 }
953 
tcg_out_jmp(TCGContext * s,tcg_target_long dest)954 static void tcg_out_jmp(TCGContext *s, tcg_target_long dest)
955 {
956     tcg_out_branch(s, 0, dest);
957 }
958 
959 #if defined(CONFIG_SOFTMMU)
960 
961 #include "../../softmmu_defs.h"
962 
963 static void *qemu_ld_helpers[4] = {
964     __ldb_mmu,
965     __ldw_mmu,
966     __ldl_mmu,
967     __ldq_mmu,
968 };
969 
970 static void *qemu_st_helpers[4] = {
971     __stb_mmu,
972     __stw_mmu,
973     __stl_mmu,
974     __stq_mmu,
975 };
976 
977 /* Perform the TLB load and compare.
978 
979    Inputs:
980    ADDRLO_IDX contains the index into ARGS of the low part of the
981    address; the high part of the address is at ADDR_LOW_IDX+1.
982 
983    MEM_INDEX and S_BITS are the memory context and log2 size of the load.
984 
985    WHICH is the offset into the CPUTLBEntry structure of the slot to read.
986    This should be offsetof addr_read or addr_write.
987 
988    Outputs:
989    LABEL_PTRS is filled with 1 (32-bit addresses) or 2 (64-bit addresses)
990    positions of the displacements of forward jumps to the TLB miss case.
991 
992    First argument register is loaded with the low part of the address.
993    In the TLB hit case, it has been adjusted as indicated by the TLB
994    and so is a host address.  In the TLB miss case, it continues to
995    hold a guest address.
996 
997    Second argument register is clobbered.  */
998 
tcg_out_tlb_load(TCGContext * s,int addrlo_idx,int mem_index,int s_bits,const TCGArg * args,uint8_t ** label_ptr,int which)999 static inline void tcg_out_tlb_load(TCGContext *s, int addrlo_idx,
1000                                     int mem_index, int s_bits,
1001                                     const TCGArg *args,
1002                                     uint8_t **label_ptr, int which)
1003 {
1004     const int addrlo = args[addrlo_idx];
1005     const int r0 = tcg_target_call_iarg_regs[0];
1006     const int r1 = tcg_target_call_iarg_regs[1];
1007     TCGType type = TCG_TYPE_I32;
1008     int rexw = 0;
1009 
1010     if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 64) {
1011         type = TCG_TYPE_I64;
1012         rexw = P_REXW;
1013     }
1014 
1015     tcg_out_mov(s, type, r1, addrlo);
1016     tcg_out_mov(s, type, r0, addrlo);
1017 
1018     tcg_out_shifti(s, SHIFT_SHR + rexw, r1,
1019                    TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1020 
1021     tgen_arithi(s, ARITH_AND + rexw, r0,
1022                 TARGET_PAGE_MASK | ((1 << s_bits) - 1), 0);
1023     tgen_arithi(s, ARITH_AND + rexw, r1,
1024                 (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0);
1025 
1026     tcg_out_modrm_sib_offset(s, OPC_LEA + P_REXW, r1, TCG_AREG0, r1, 0,
1027                              offsetof(CPUState, tlb_table[mem_index][0])
1028                              + which);
1029 
1030     /* cmp 0(r1), r0 */
1031     tcg_out_modrm_offset(s, OPC_CMP_GvEv + rexw, r0, r1, 0);
1032 
1033     tcg_out_mov(s, type, r0, addrlo);
1034 
1035     /* jne label1 */
1036     tcg_out8(s, OPC_JCC_short + JCC_JNE);
1037     label_ptr[0] = s->code_ptr;
1038     s->code_ptr++;
1039 
1040     if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1041         /* cmp 4(r1), addrhi */
1042         tcg_out_modrm_offset(s, OPC_CMP_GvEv, args[addrlo_idx+1], r1, 4);
1043 
1044         /* jne label1 */
1045         tcg_out8(s, OPC_JCC_short + JCC_JNE);
1046         label_ptr[1] = s->code_ptr;
1047         s->code_ptr++;
1048     }
1049 
1050     /* TLB Hit.  */
1051 
1052     /* add addend(r1), r0 */
1053     tcg_out_modrm_offset(s, OPC_ADD_GvEv + P_REXW, r0, r1,
1054                          offsetof(CPUTLBEntry, addend) - which);
1055 }
1056 #endif
1057 
tcg_out_qemu_ld_direct(TCGContext * s,int datalo,int datahi,int base,tcg_target_long ofs,int sizeop)1058 static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi,
1059                                    int base, tcg_target_long ofs, int sizeop)
1060 {
1061 #ifdef TARGET_WORDS_BIGENDIAN
1062     const int bswap = 1;
1063 #else
1064     const int bswap = 0;
1065 #endif
1066     switch (sizeop) {
1067     case 0:
1068         tcg_out_modrm_offset(s, OPC_MOVZBL, datalo, base, ofs);
1069         break;
1070     case 0 | 4:
1071         tcg_out_modrm_offset(s, OPC_MOVSBL + P_REXW, datalo, base, ofs);
1072         break;
1073     case 1:
1074         tcg_out_modrm_offset(s, OPC_MOVZWL, datalo, base, ofs);
1075         if (bswap) {
1076             tcg_out_rolw_8(s, datalo);
1077         }
1078         break;
1079     case 1 | 4:
1080         if (bswap) {
1081             tcg_out_modrm_offset(s, OPC_MOVZWL, datalo, base, ofs);
1082             tcg_out_rolw_8(s, datalo);
1083             tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo);
1084         } else {
1085             tcg_out_modrm_offset(s, OPC_MOVSWL + P_REXW, datalo, base, ofs);
1086         }
1087         break;
1088     case 2:
1089         tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
1090         if (bswap) {
1091             tcg_out_bswap32(s, datalo);
1092         }
1093         break;
1094 #if TCG_TARGET_REG_BITS == 64
1095     case 2 | 4:
1096         if (bswap) {
1097             tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
1098             tcg_out_bswap32(s, datalo);
1099             tcg_out_ext32s(s, datalo, datalo);
1100         } else {
1101             tcg_out_modrm_offset(s, OPC_MOVSLQ, datalo, base, ofs);
1102         }
1103         break;
1104 #endif
1105     case 3:
1106         if (TCG_TARGET_REG_BITS == 64) {
1107             tcg_out_ld(s, TCG_TYPE_I64, datalo, base, ofs);
1108             if (bswap) {
1109                 tcg_out_bswap64(s, datalo);
1110             }
1111         } else {
1112             if (bswap) {
1113                 int t = datalo;
1114                 datalo = datahi;
1115                 datahi = t;
1116             }
1117             if (base != datalo) {
1118                 tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
1119                 tcg_out_ld(s, TCG_TYPE_I32, datahi, base, ofs + 4);
1120             } else {
1121                 tcg_out_ld(s, TCG_TYPE_I32, datahi, base, ofs + 4);
1122                 tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
1123             }
1124             if (bswap) {
1125                 tcg_out_bswap32(s, datalo);
1126                 tcg_out_bswap32(s, datahi);
1127             }
1128         }
1129         break;
1130     default:
1131         tcg_abort();
1132     }
1133 }
1134 
1135 /* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and
1136    EAX. It will be useful once fixed registers globals are less
1137    common. */
tcg_out_qemu_ld(TCGContext * s,const TCGArg * args,int opc)1138 static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
1139                             int opc)
1140 {
1141     int data_reg, data_reg2 = 0;
1142     int addrlo_idx;
1143 #if defined(CONFIG_SOFTMMU)
1144     int mem_index, s_bits, arg_idx;
1145     uint8_t *label_ptr[3];
1146 #endif
1147 
1148     data_reg = args[0];
1149     addrlo_idx = 1;
1150     if (TCG_TARGET_REG_BITS == 32 && opc == 3) {
1151         data_reg2 = args[1];
1152         addrlo_idx = 2;
1153     }
1154 
1155 #if defined(CONFIG_SOFTMMU)
1156     mem_index = args[addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS)];
1157     s_bits = opc & 3;
1158 
1159     tcg_out_tlb_load(s, addrlo_idx, mem_index, s_bits, args,
1160                      label_ptr, offsetof(CPUTLBEntry, addr_read));
1161 
1162     /* TLB Hit.  */
1163     tcg_out_qemu_ld_direct(s, data_reg, data_reg2,
1164                            tcg_target_call_iarg_regs[0], 0, opc);
1165 
1166     /* jmp label2 */
1167     tcg_out8(s, OPC_JMP_short);
1168     label_ptr[2] = s->code_ptr;
1169     s->code_ptr++;
1170 
1171     /* TLB Miss.  */
1172 
1173     /* label1: */
1174     *label_ptr[0] = s->code_ptr - label_ptr[0] - 1;
1175     if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1176         *label_ptr[1] = s->code_ptr - label_ptr[1] - 1;
1177     }
1178 
1179     /* XXX: move that code at the end of the TB */
1180     /* The first argument is already loaded with addrlo.  */
1181     arg_idx = 1;
1182     if (TCG_TARGET_REG_BITS == 32 && TARGET_LONG_BITS == 64) {
1183         tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx++],
1184                     args[addrlo_idx + 1]);
1185     }
1186     tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx],
1187                  mem_index);
1188     tcg_out_calli(s, (tcg_target_long)qemu_ld_helpers[s_bits]);
1189 
1190     switch(opc) {
1191     case 0 | 4:
1192         tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW);
1193         break;
1194     case 1 | 4:
1195         tcg_out_ext16s(s, data_reg, TCG_REG_EAX, P_REXW);
1196         break;
1197     case 0:
1198         tcg_out_ext8u(s, data_reg, TCG_REG_EAX);
1199         break;
1200     case 1:
1201         tcg_out_ext16u(s, data_reg, TCG_REG_EAX);
1202         break;
1203     case 2:
1204         tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1205         break;
1206 #if TCG_TARGET_REG_BITS == 64
1207     case 2 | 4:
1208         tcg_out_ext32s(s, data_reg, TCG_REG_EAX);
1209         break;
1210 #endif
1211     case 3:
1212         if (TCG_TARGET_REG_BITS == 64) {
1213             tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_RAX);
1214         } else if (data_reg == TCG_REG_EDX) {
1215             /* xchg %edx, %eax */
1216             tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0);
1217             tcg_out_mov(s, TCG_TYPE_I32, data_reg2, TCG_REG_EAX);
1218         } else {
1219             tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1220             tcg_out_mov(s, TCG_TYPE_I32, data_reg2, TCG_REG_EDX);
1221         }
1222         break;
1223     default:
1224         tcg_abort();
1225     }
1226 
1227     /* label2: */
1228     *label_ptr[2] = s->code_ptr - label_ptr[2] - 1;
1229 #else
1230     {
1231         int32_t offset = GUEST_BASE;
1232         int base = args[addrlo_idx];
1233 
1234         if (TCG_TARGET_REG_BITS == 64) {
1235             /* ??? We assume all operations have left us with register
1236                contents that are zero extended.  So far this appears to
1237                be true.  If we want to enforce this, we can either do
1238                an explicit zero-extension here, or (if GUEST_BASE == 0)
1239                use the ADDR32 prefix.  For now, do nothing.  */
1240 
1241             if (offset != GUEST_BASE) {
1242                 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_RDI, GUEST_BASE);
1243                 tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_RDI, base);
1244                 base = TCG_REG_RDI, offset = 0;
1245             }
1246         }
1247 
1248         tcg_out_qemu_ld_direct(s, data_reg, data_reg2, base, offset, opc);
1249     }
1250 #endif
1251 }
1252 
tcg_out_qemu_st_direct(TCGContext * s,int datalo,int datahi,int base,tcg_target_long ofs,int sizeop)1253 static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
1254                                    int base, tcg_target_long ofs, int sizeop)
1255 {
1256 #ifdef TARGET_WORDS_BIGENDIAN
1257     const int bswap = 1;
1258 #else
1259     const int bswap = 0;
1260 #endif
1261     /* ??? Ideally we wouldn't need a scratch register.  For user-only,
1262        we could perform the bswap twice to restore the original value
1263        instead of moving to the scratch.  But as it is, the L constraint
1264        means that the second argument reg is definitely free here.  */
1265     int scratch = tcg_target_call_iarg_regs[1];
1266 
1267     switch (sizeop) {
1268     case 0:
1269         tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R, datalo, base, ofs);
1270         break;
1271     case 1:
1272         if (bswap) {
1273             tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1274             tcg_out_rolw_8(s, scratch);
1275             datalo = scratch;
1276         }
1277         tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_DATA16, datalo, base, ofs);
1278         break;
1279     case 2:
1280         if (bswap) {
1281             tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1282             tcg_out_bswap32(s, scratch);
1283             datalo = scratch;
1284         }
1285         tcg_out_st(s, TCG_TYPE_I32, datalo, base, ofs);
1286         break;
1287     case 3:
1288         if (TCG_TARGET_REG_BITS == 64) {
1289             if (bswap) {
1290                 tcg_out_mov(s, TCG_TYPE_I64, scratch, datalo);
1291                 tcg_out_bswap64(s, scratch);
1292                 datalo = scratch;
1293             }
1294             tcg_out_st(s, TCG_TYPE_I64, datalo, base, ofs);
1295         } else if (bswap) {
1296             tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi);
1297             tcg_out_bswap32(s, scratch);
1298             tcg_out_st(s, TCG_TYPE_I32, scratch, base, ofs);
1299             tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1300             tcg_out_bswap32(s, scratch);
1301             tcg_out_st(s, TCG_TYPE_I32, scratch, base, ofs + 4);
1302         } else {
1303             tcg_out_st(s, TCG_TYPE_I32, datalo, base, ofs);
1304             tcg_out_st(s, TCG_TYPE_I32, datahi, base, ofs + 4);
1305         }
1306         break;
1307     default:
1308         tcg_abort();
1309     }
1310 }
1311 
tcg_out_qemu_st(TCGContext * s,const TCGArg * args,int opc)1312 static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
1313                             int opc)
1314 {
1315     int data_reg, data_reg2 = 0;
1316     int addrlo_idx;
1317 #if defined(CONFIG_SOFTMMU)
1318     int mem_index, s_bits;
1319     int stack_adjust;
1320     uint8_t *label_ptr[3];
1321 #endif
1322 
1323     data_reg = args[0];
1324     addrlo_idx = 1;
1325     if (TCG_TARGET_REG_BITS == 32 && opc == 3) {
1326         data_reg2 = args[1];
1327         addrlo_idx = 2;
1328     }
1329 
1330 #if defined(CONFIG_SOFTMMU)
1331     mem_index = args[addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS)];
1332     s_bits = opc;
1333 
1334     tcg_out_tlb_load(s, addrlo_idx, mem_index, s_bits, args,
1335                      label_ptr, offsetof(CPUTLBEntry, addr_write));
1336 
1337     /* TLB Hit.  */
1338     tcg_out_qemu_st_direct(s, data_reg, data_reg2,
1339                            tcg_target_call_iarg_regs[0], 0, opc);
1340 
1341     /* jmp label2 */
1342     tcg_out8(s, OPC_JMP_short);
1343     label_ptr[2] = s->code_ptr;
1344     s->code_ptr++;
1345 
1346     /* TLB Miss.  */
1347 
1348     /* label1: */
1349     *label_ptr[0] = s->code_ptr - label_ptr[0] - 1;
1350     if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1351         *label_ptr[1] = s->code_ptr - label_ptr[1] - 1;
1352     }
1353 
1354     /* XXX: move that code at the end of the TB */
1355     if (TCG_TARGET_REG_BITS == 64) {
1356         tcg_out_mov(s, (opc == 3 ? TCG_TYPE_I64 : TCG_TYPE_I32),
1357                     TCG_REG_RSI, data_reg);
1358         tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_RDX, mem_index);
1359         stack_adjust = 0;
1360     } else if (TARGET_LONG_BITS == 32) {
1361         tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_EDX, data_reg);
1362         if (opc == 3) {
1363             tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_ECX, data_reg2);
1364             tcg_out_pushi(s, mem_index);
1365             stack_adjust = 4;
1366         } else {
1367             tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_ECX, mem_index);
1368             stack_adjust = 0;
1369         }
1370     } else {
1371         if (opc == 3) {
1372             tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_EDX, args[addrlo_idx + 1]);
1373             tcg_out_pushi(s, mem_index);
1374             tcg_out_push(s, data_reg2);
1375             tcg_out_push(s, data_reg);
1376             stack_adjust = 12;
1377         } else {
1378             tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_EDX, args[addrlo_idx + 1]);
1379             switch(opc) {
1380             case 0:
1381                 tcg_out_ext8u(s, TCG_REG_ECX, data_reg);
1382                 break;
1383             case 1:
1384                 tcg_out_ext16u(s, TCG_REG_ECX, data_reg);
1385                 break;
1386             case 2:
1387                 tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_ECX, data_reg);
1388                 break;
1389             }
1390             tcg_out_pushi(s, mem_index);
1391             stack_adjust = 4;
1392         }
1393     }
1394 
1395     tcg_out_calli(s, (tcg_target_long)qemu_st_helpers[s_bits]);
1396 
1397     if (stack_adjust == (TCG_TARGET_REG_BITS / 8)) {
1398         /* Pop and discard.  This is 2 bytes smaller than the add.  */
1399         tcg_out_pop(s, TCG_REG_ECX);
1400     } else if (stack_adjust != 0) {
1401         tcg_out_addi(s, TCG_REG_ESP, stack_adjust);
1402     }
1403 
1404     /* label2: */
1405     *label_ptr[2] = s->code_ptr - label_ptr[2] - 1;
1406 #else
1407     {
1408         int32_t offset = GUEST_BASE;
1409         int base = args[addrlo_idx];
1410 
1411         if (TCG_TARGET_REG_BITS == 64) {
1412             /* ??? We assume all operations have left us with register
1413                contents that are zero extended.  So far this appears to
1414                be true.  If we want to enforce this, we can either do
1415                an explicit zero-extension here, or (if GUEST_BASE == 0)
1416                use the ADDR32 prefix.  For now, do nothing.  */
1417 
1418             if (offset != GUEST_BASE) {
1419                 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_RDI, GUEST_BASE);
1420                 tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_RDI, base);
1421                 base = TCG_REG_RDI, offset = 0;
1422             }
1423         }
1424 
1425         tcg_out_qemu_st_direct(s, data_reg, data_reg2, base, offset, opc);
1426     }
1427 #endif
1428 }
1429 
tcg_out_op(TCGContext * s,TCGOpcode opc,const TCGArg * args,const int * const_args)1430 static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
1431                               const TCGArg *args, const int *const_args)
1432 {
1433     int c, rexw = 0;
1434 
1435 #if TCG_TARGET_REG_BITS == 64
1436 # define OP_32_64(x) \
1437         case glue(glue(INDEX_op_, x), _i64): \
1438             rexw = P_REXW; /* FALLTHRU */    \
1439         case glue(glue(INDEX_op_, x), _i32)
1440 #else
1441 # define OP_32_64(x) \
1442         case glue(glue(INDEX_op_, x), _i32)
1443 #endif
1444 
1445     switch(opc) {
1446     case INDEX_op_exit_tb:
1447         tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, args[0]);
1448         tcg_out_jmp(s, (tcg_target_long) tb_ret_addr);
1449         break;
1450     case INDEX_op_goto_tb:
1451         if (s->tb_jmp_offset) {
1452             /* direct jump method */
1453             tcg_out8(s, OPC_JMP_long); /* jmp im */
1454             s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
1455             tcg_out32(s, 0);
1456         } else {
1457             /* indirect jump method */
1458             tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, -1,
1459                                  (tcg_target_long)(s->tb_next + args[0]));
1460         }
1461         s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf;
1462         break;
1463     case INDEX_op_call:
1464         if (const_args[0]) {
1465             tcg_out_calli(s, args[0]);
1466         } else {
1467             /* call *reg */
1468             tcg_out_modrm(s, OPC_GRP5, EXT5_CALLN_Ev, args[0]);
1469         }
1470         break;
1471     case INDEX_op_jmp:
1472         if (const_args[0]) {
1473             tcg_out_jmp(s, args[0]);
1474         } else {
1475             /* jmp *reg */
1476             tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, args[0]);
1477         }
1478         break;
1479     case INDEX_op_br:
1480         tcg_out_jxx(s, JCC_JMP, args[0], 0);
1481         break;
1482     case INDEX_op_movi_i32:
1483         tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]);
1484         break;
1485     OP_32_64(ld8u):
1486         /* Note that we can ignore REXW for the zero-extend to 64-bit.  */
1487         tcg_out_modrm_offset(s, OPC_MOVZBL, args[0], args[1], args[2]);
1488         break;
1489     OP_32_64(ld8s):
1490         tcg_out_modrm_offset(s, OPC_MOVSBL + rexw, args[0], args[1], args[2]);
1491         break;
1492     OP_32_64(ld16u):
1493         /* Note that we can ignore REXW for the zero-extend to 64-bit.  */
1494         tcg_out_modrm_offset(s, OPC_MOVZWL, args[0], args[1], args[2]);
1495         break;
1496     OP_32_64(ld16s):
1497         tcg_out_modrm_offset(s, OPC_MOVSWL + rexw, args[0], args[1], args[2]);
1498         break;
1499 #if TCG_TARGET_REG_BITS == 64
1500     case INDEX_op_ld32u_i64:
1501 #endif
1502     case INDEX_op_ld_i32:
1503         tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]);
1504         break;
1505 
1506     OP_32_64(st8):
1507         tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R,
1508                              args[0], args[1], args[2]);
1509         break;
1510     OP_32_64(st16):
1511         tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16,
1512                              args[0], args[1], args[2]);
1513         break;
1514 #if TCG_TARGET_REG_BITS == 64
1515     case INDEX_op_st32_i64:
1516 #endif
1517     case INDEX_op_st_i32:
1518         tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
1519         break;
1520 
1521     OP_32_64(add):
1522         /* For 3-operand addition, use LEA.  */
1523         if (args[0] != args[1]) {
1524             TCGArg a0 = args[0], a1 = args[1], a2 = args[2], c3 = 0;
1525 
1526             if (const_args[2]) {
1527                 c3 = a2, a2 = -1;
1528             } else if (a0 == a2) {
1529                 /* Watch out for dest = src + dest, since we've removed
1530                    the matching constraint on the add.  */
1531                 tgen_arithr(s, ARITH_ADD + rexw, a0, a1);
1532                 break;
1533             }
1534 
1535             tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a2, 0, c3);
1536             break;
1537         }
1538         c = ARITH_ADD;
1539         goto gen_arith;
1540     OP_32_64(sub):
1541         c = ARITH_SUB;
1542         goto gen_arith;
1543     OP_32_64(and):
1544         c = ARITH_AND;
1545         goto gen_arith;
1546     OP_32_64(or):
1547         c = ARITH_OR;
1548         goto gen_arith;
1549     OP_32_64(xor):
1550         c = ARITH_XOR;
1551         goto gen_arith;
1552     gen_arith:
1553         if (const_args[2]) {
1554             tgen_arithi(s, c + rexw, args[0], args[2], 0);
1555         } else {
1556             tgen_arithr(s, c + rexw, args[0], args[2]);
1557         }
1558         break;
1559 
1560     OP_32_64(mul):
1561         if (const_args[2]) {
1562             int32_t val;
1563             val = args[2];
1564             if (val == (int8_t)val) {
1565                 tcg_out_modrm(s, OPC_IMUL_GvEvIb + rexw, args[0], args[0]);
1566                 tcg_out8(s, val);
1567             } else {
1568                 tcg_out_modrm(s, OPC_IMUL_GvEvIz + rexw, args[0], args[0]);
1569                 tcg_out32(s, val);
1570             }
1571         } else {
1572             tcg_out_modrm(s, OPC_IMUL_GvEv + rexw, args[0], args[2]);
1573         }
1574         break;
1575 
1576     OP_32_64(div2):
1577         tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IDIV, args[4]);
1578         break;
1579     OP_32_64(divu2):
1580         tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_DIV, args[4]);
1581         break;
1582 
1583     OP_32_64(shl):
1584         c = SHIFT_SHL;
1585         goto gen_shift;
1586     OP_32_64(shr):
1587         c = SHIFT_SHR;
1588         goto gen_shift;
1589     OP_32_64(sar):
1590         c = SHIFT_SAR;
1591         goto gen_shift;
1592     OP_32_64(rotl):
1593         c = SHIFT_ROL;
1594         goto gen_shift;
1595     OP_32_64(rotr):
1596         c = SHIFT_ROR;
1597         goto gen_shift;
1598     gen_shift:
1599         if (const_args[2]) {
1600             tcg_out_shifti(s, c + rexw, args[0], args[2]);
1601         } else {
1602             tcg_out_modrm(s, OPC_SHIFT_cl + rexw, c, args[0]);
1603         }
1604         break;
1605 
1606     case INDEX_op_brcond_i32:
1607         tcg_out_brcond32(s, args[2], args[0], args[1], const_args[1],
1608                          args[3], 0);
1609         break;
1610     case INDEX_op_setcond_i32:
1611         tcg_out_setcond32(s, args[3], args[0], args[1],
1612                           args[2], const_args[2]);
1613         break;
1614 
1615     OP_32_64(bswap16):
1616         tcg_out_rolw_8(s, args[0]);
1617         break;
1618     OP_32_64(bswap32):
1619         tcg_out_bswap32(s, args[0]);
1620         break;
1621 
1622     OP_32_64(neg):
1623         tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, args[0]);
1624         break;
1625     OP_32_64(not):
1626         tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, args[0]);
1627         break;
1628 
1629     OP_32_64(ext8s):
1630         tcg_out_ext8s(s, args[0], args[1], rexw);
1631         break;
1632     OP_32_64(ext16s):
1633         tcg_out_ext16s(s, args[0], args[1], rexw);
1634         break;
1635     OP_32_64(ext8u):
1636         tcg_out_ext8u(s, args[0], args[1]);
1637         break;
1638     OP_32_64(ext16u):
1639         tcg_out_ext16u(s, args[0], args[1]);
1640         break;
1641 
1642     case INDEX_op_qemu_ld8u:
1643         tcg_out_qemu_ld(s, args, 0);
1644         break;
1645     case INDEX_op_qemu_ld8s:
1646         tcg_out_qemu_ld(s, args, 0 | 4);
1647         break;
1648     case INDEX_op_qemu_ld16u:
1649         tcg_out_qemu_ld(s, args, 1);
1650         break;
1651     case INDEX_op_qemu_ld16s:
1652         tcg_out_qemu_ld(s, args, 1 | 4);
1653         break;
1654 #if TCG_TARGET_REG_BITS == 64
1655     case INDEX_op_qemu_ld32u:
1656 #endif
1657     case INDEX_op_qemu_ld32:
1658         tcg_out_qemu_ld(s, args, 2);
1659         break;
1660     case INDEX_op_qemu_ld64:
1661         tcg_out_qemu_ld(s, args, 3);
1662         break;
1663 
1664     case INDEX_op_qemu_st8:
1665         tcg_out_qemu_st(s, args, 0);
1666         break;
1667     case INDEX_op_qemu_st16:
1668         tcg_out_qemu_st(s, args, 1);
1669         break;
1670     case INDEX_op_qemu_st32:
1671         tcg_out_qemu_st(s, args, 2);
1672         break;
1673     case INDEX_op_qemu_st64:
1674         tcg_out_qemu_st(s, args, 3);
1675         break;
1676 
1677 #if TCG_TARGET_REG_BITS == 32
1678     case INDEX_op_brcond2_i32:
1679         tcg_out_brcond2(s, args, const_args, 0);
1680         break;
1681     case INDEX_op_setcond2_i32:
1682         tcg_out_setcond2(s, args, const_args);
1683         break;
1684     case INDEX_op_mulu2_i32:
1685         tcg_out_modrm(s, OPC_GRP3_Ev, EXT3_MUL, args[3]);
1686         break;
1687     case INDEX_op_add2_i32:
1688         if (const_args[4]) {
1689             tgen_arithi(s, ARITH_ADD, args[0], args[4], 1);
1690         } else {
1691             tgen_arithr(s, ARITH_ADD, args[0], args[4]);
1692         }
1693         if (const_args[5]) {
1694             tgen_arithi(s, ARITH_ADC, args[1], args[5], 1);
1695         } else {
1696             tgen_arithr(s, ARITH_ADC, args[1], args[5]);
1697         }
1698         break;
1699     case INDEX_op_sub2_i32:
1700         if (const_args[4]) {
1701             tgen_arithi(s, ARITH_SUB, args[0], args[4], 1);
1702         } else {
1703             tgen_arithr(s, ARITH_SUB, args[0], args[4]);
1704         }
1705         if (const_args[5]) {
1706             tgen_arithi(s, ARITH_SBB, args[1], args[5], 1);
1707         } else {
1708             tgen_arithr(s, ARITH_SBB, args[1], args[5]);
1709         }
1710         break;
1711 #else /* TCG_TARGET_REG_BITS == 64 */
1712     case INDEX_op_movi_i64:
1713         tcg_out_movi(s, TCG_TYPE_I64, args[0], args[1]);
1714         break;
1715     case INDEX_op_ld32s_i64:
1716         tcg_out_modrm_offset(s, OPC_MOVSLQ, args[0], args[1], args[2]);
1717         break;
1718     case INDEX_op_ld_i64:
1719         tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]);
1720         break;
1721     case INDEX_op_st_i64:
1722         tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]);
1723         break;
1724     case INDEX_op_qemu_ld32s:
1725         tcg_out_qemu_ld(s, args, 2 | 4);
1726         break;
1727 
1728     case INDEX_op_brcond_i64:
1729         tcg_out_brcond64(s, args[2], args[0], args[1], const_args[1],
1730                          args[3], 0);
1731         break;
1732     case INDEX_op_setcond_i64:
1733         tcg_out_setcond64(s, args[3], args[0], args[1],
1734                           args[2], const_args[2]);
1735         break;
1736 
1737     case INDEX_op_bswap64_i64:
1738         tcg_out_bswap64(s, args[0]);
1739         break;
1740     case INDEX_op_ext32u_i64:
1741         tcg_out_ext32u(s, args[0], args[1]);
1742         break;
1743     case INDEX_op_ext32s_i64:
1744         tcg_out_ext32s(s, args[0], args[1]);
1745         break;
1746 #endif
1747 
1748     default:
1749         tcg_abort();
1750     }
1751 
1752 #undef OP_32_64
1753 }
1754 
1755 static const TCGTargetOpDef x86_op_defs[] = {
1756     { INDEX_op_exit_tb, { } },
1757     { INDEX_op_goto_tb, { } },
1758     { INDEX_op_call, { "ri" } },
1759     { INDEX_op_jmp, { "ri" } },
1760     { INDEX_op_br, { } },
1761     { INDEX_op_mov_i32, { "r", "r" } },
1762     { INDEX_op_movi_i32, { "r" } },
1763     { INDEX_op_ld8u_i32, { "r", "r" } },
1764     { INDEX_op_ld8s_i32, { "r", "r" } },
1765     { INDEX_op_ld16u_i32, { "r", "r" } },
1766     { INDEX_op_ld16s_i32, { "r", "r" } },
1767     { INDEX_op_ld_i32, { "r", "r" } },
1768     { INDEX_op_st8_i32, { "q", "r" } },
1769     { INDEX_op_st16_i32, { "r", "r" } },
1770     { INDEX_op_st_i32, { "r", "r" } },
1771 
1772     { INDEX_op_add_i32, { "r", "r", "ri" } },
1773     { INDEX_op_sub_i32, { "r", "0", "ri" } },
1774     { INDEX_op_mul_i32, { "r", "0", "ri" } },
1775     { INDEX_op_div2_i32, { "a", "d", "0", "1", "r" } },
1776     { INDEX_op_divu2_i32, { "a", "d", "0", "1", "r" } },
1777     { INDEX_op_and_i32, { "r", "0", "ri" } },
1778     { INDEX_op_or_i32, { "r", "0", "ri" } },
1779     { INDEX_op_xor_i32, { "r", "0", "ri" } },
1780 
1781     { INDEX_op_shl_i32, { "r", "0", "ci" } },
1782     { INDEX_op_shr_i32, { "r", "0", "ci" } },
1783     { INDEX_op_sar_i32, { "r", "0", "ci" } },
1784     { INDEX_op_rotl_i32, { "r", "0", "ci" } },
1785     { INDEX_op_rotr_i32, { "r", "0", "ci" } },
1786 
1787     { INDEX_op_brcond_i32, { "r", "ri" } },
1788 
1789     { INDEX_op_bswap16_i32, { "r", "0" } },
1790     { INDEX_op_bswap32_i32, { "r", "0" } },
1791 
1792     { INDEX_op_neg_i32, { "r", "0" } },
1793 
1794     { INDEX_op_not_i32, { "r", "0" } },
1795 
1796     { INDEX_op_ext8s_i32, { "r", "q" } },
1797     { INDEX_op_ext16s_i32, { "r", "r" } },
1798     { INDEX_op_ext8u_i32, { "r", "q" } },
1799     { INDEX_op_ext16u_i32, { "r", "r" } },
1800 
1801     { INDEX_op_setcond_i32, { "q", "r", "ri" } },
1802 
1803 #if TCG_TARGET_REG_BITS == 32
1804     { INDEX_op_mulu2_i32, { "a", "d", "a", "r" } },
1805     { INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } },
1806     { INDEX_op_sub2_i32, { "r", "r", "0", "1", "ri", "ri" } },
1807     { INDEX_op_brcond2_i32, { "r", "r", "ri", "ri" } },
1808     { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } },
1809 #else
1810     { INDEX_op_mov_i64, { "r", "r" } },
1811     { INDEX_op_movi_i64, { "r" } },
1812     { INDEX_op_ld8u_i64, { "r", "r" } },
1813     { INDEX_op_ld8s_i64, { "r", "r" } },
1814     { INDEX_op_ld16u_i64, { "r", "r" } },
1815     { INDEX_op_ld16s_i64, { "r", "r" } },
1816     { INDEX_op_ld32u_i64, { "r", "r" } },
1817     { INDEX_op_ld32s_i64, { "r", "r" } },
1818     { INDEX_op_ld_i64, { "r", "r" } },
1819     { INDEX_op_st8_i64, { "r", "r" } },
1820     { INDEX_op_st16_i64, { "r", "r" } },
1821     { INDEX_op_st32_i64, { "r", "r" } },
1822     { INDEX_op_st_i64, { "r", "r" } },
1823 
1824     { INDEX_op_add_i64, { "r", "0", "re" } },
1825     { INDEX_op_mul_i64, { "r", "0", "re" } },
1826     { INDEX_op_div2_i64, { "a", "d", "0", "1", "r" } },
1827     { INDEX_op_divu2_i64, { "a", "d", "0", "1", "r" } },
1828     { INDEX_op_sub_i64, { "r", "0", "re" } },
1829     { INDEX_op_and_i64, { "r", "0", "reZ" } },
1830     { INDEX_op_or_i64, { "r", "0", "re" } },
1831     { INDEX_op_xor_i64, { "r", "0", "re" } },
1832 
1833     { INDEX_op_shl_i64, { "r", "0", "ci" } },
1834     { INDEX_op_shr_i64, { "r", "0", "ci" } },
1835     { INDEX_op_sar_i64, { "r", "0", "ci" } },
1836     { INDEX_op_rotl_i64, { "r", "0", "ci" } },
1837     { INDEX_op_rotr_i64, { "r", "0", "ci" } },
1838 
1839     { INDEX_op_brcond_i64, { "r", "re" } },
1840     { INDEX_op_setcond_i64, { "r", "r", "re" } },
1841 
1842     { INDEX_op_bswap16_i64, { "r", "0" } },
1843     { INDEX_op_bswap32_i64, { "r", "0" } },
1844     { INDEX_op_bswap64_i64, { "r", "0" } },
1845     { INDEX_op_neg_i64, { "r", "0" } },
1846     { INDEX_op_not_i64, { "r", "0" } },
1847 
1848     { INDEX_op_ext8s_i64, { "r", "r" } },
1849     { INDEX_op_ext16s_i64, { "r", "r" } },
1850     { INDEX_op_ext32s_i64, { "r", "r" } },
1851     { INDEX_op_ext8u_i64, { "r", "r" } },
1852     { INDEX_op_ext16u_i64, { "r", "r" } },
1853     { INDEX_op_ext32u_i64, { "r", "r" } },
1854 #endif
1855 
1856 #if TCG_TARGET_REG_BITS == 64
1857     { INDEX_op_qemu_ld8u, { "r", "L" } },
1858     { INDEX_op_qemu_ld8s, { "r", "L" } },
1859     { INDEX_op_qemu_ld16u, { "r", "L" } },
1860     { INDEX_op_qemu_ld16s, { "r", "L" } },
1861     { INDEX_op_qemu_ld32, { "r", "L" } },
1862     { INDEX_op_qemu_ld32u, { "r", "L" } },
1863     { INDEX_op_qemu_ld32s, { "r", "L" } },
1864     { INDEX_op_qemu_ld64, { "r", "L" } },
1865 
1866     { INDEX_op_qemu_st8, { "L", "L" } },
1867     { INDEX_op_qemu_st16, { "L", "L" } },
1868     { INDEX_op_qemu_st32, { "L", "L" } },
1869     { INDEX_op_qemu_st64, { "L", "L" } },
1870 #elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
1871     { INDEX_op_qemu_ld8u, { "r", "L" } },
1872     { INDEX_op_qemu_ld8s, { "r", "L" } },
1873     { INDEX_op_qemu_ld16u, { "r", "L" } },
1874     { INDEX_op_qemu_ld16s, { "r", "L" } },
1875     { INDEX_op_qemu_ld32, { "r", "L" } },
1876     { INDEX_op_qemu_ld64, { "r", "r", "L" } },
1877 
1878     { INDEX_op_qemu_st8, { "cb", "L" } },
1879     { INDEX_op_qemu_st16, { "L", "L" } },
1880     { INDEX_op_qemu_st32, { "L", "L" } },
1881     { INDEX_op_qemu_st64, { "L", "L", "L" } },
1882 #else
1883     { INDEX_op_qemu_ld8u, { "r", "L", "L" } },
1884     { INDEX_op_qemu_ld8s, { "r", "L", "L" } },
1885     { INDEX_op_qemu_ld16u, { "r", "L", "L" } },
1886     { INDEX_op_qemu_ld16s, { "r", "L", "L" } },
1887     { INDEX_op_qemu_ld32, { "r", "L", "L" } },
1888     { INDEX_op_qemu_ld64, { "r", "r", "L", "L" } },
1889 
1890     { INDEX_op_qemu_st8, { "cb", "L", "L" } },
1891     { INDEX_op_qemu_st16, { "L", "L", "L" } },
1892     { INDEX_op_qemu_st32, { "L", "L", "L" } },
1893     { INDEX_op_qemu_st64, { "L", "L", "L", "L" } },
1894 #endif
1895     { -1 },
1896 };
1897 
1898 static int tcg_target_callee_save_regs[] = {
1899 #if TCG_TARGET_REG_BITS == 64
1900     TCG_REG_RBP,
1901     TCG_REG_RBX,
1902     TCG_REG_R12,
1903     TCG_REG_R13,
1904     /* TCG_REG_R14, */ /* Currently used for the global env. */
1905     TCG_REG_R15,
1906 #else
1907     /* TCG_REG_EBP, */ /* Currently used for the global env. */
1908     TCG_REG_EBX,
1909     TCG_REG_ESI,
1910     TCG_REG_EDI,
1911 #endif
1912 };
1913 
1914 /* Generate global QEMU prologue and epilogue code */
tcg_target_qemu_prologue(TCGContext * s)1915 static void tcg_target_qemu_prologue(TCGContext *s)
1916 {
1917     int i, frame_size, push_size, stack_addend;
1918 
1919     /* TB prologue */
1920 
1921     /* Save all callee saved registers.  */
1922     for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
1923         tcg_out_push(s, tcg_target_callee_save_regs[i]);
1924     }
1925 
1926     /* Reserve some stack space.  */
1927     push_size = 1 + ARRAY_SIZE(tcg_target_callee_save_regs);
1928     push_size *= TCG_TARGET_REG_BITS / 8;
1929 
1930     frame_size = push_size + TCG_STATIC_CALL_ARGS_SIZE;
1931     frame_size = (frame_size + TCG_TARGET_STACK_ALIGN - 1) &
1932         ~(TCG_TARGET_STACK_ALIGN - 1);
1933     stack_addend = frame_size - push_size;
1934     tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
1935 
1936     /* jmp *tb.  */
1937     tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[0]);
1938 
1939     /* TB epilogue */
1940     tb_ret_addr = s->code_ptr;
1941 
1942     tcg_out_addi(s, TCG_REG_ESP, stack_addend);
1943 
1944     for (i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) {
1945         tcg_out_pop(s, tcg_target_callee_save_regs[i]);
1946     }
1947     tcg_out_opc(s, OPC_RET, 0, 0, 0);
1948 }
1949 
tcg_target_init(TCGContext * s)1950 static void tcg_target_init(TCGContext *s)
1951 {
1952 #if !defined(CONFIG_USER_ONLY)
1953     /* fail safe */
1954     if ((1 << CPU_TLB_ENTRY_BITS) != sizeof(CPUTLBEntry))
1955         tcg_abort();
1956 #endif
1957 
1958     if (TCG_TARGET_REG_BITS == 64) {
1959         tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff);
1960         tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffff);
1961     } else {
1962         tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xff);
1963     }
1964 
1965     tcg_regset_clear(tcg_target_call_clobber_regs);
1966     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EAX);
1967     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EDX);
1968     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_ECX);
1969     if (TCG_TARGET_REG_BITS == 64) {
1970         tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RDI);
1971         tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RSI);
1972         tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
1973         tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
1974         tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
1975         tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
1976     }
1977 
1978     tcg_regset_clear(s->reserved_regs);
1979     tcg_regset_set_reg(s->reserved_regs, TCG_REG_ESP);
1980 
1981     tcg_add_target_add_op_defs(x86_op_defs);
1982 }
1983