Back to home page

LXR

 
 

    


File indexing completed on 2025-05-11 08:23:54

0001 /*
0002  *  em86real.S
0003  *
0004  *  Copyright (C) 1998, 1999 Gabriel Paubert, paubert@iram.es
0005  *
0006  *  Modified to compile in RTEMS development environment
0007  *  by Eric Valette
0008  *
0009  *  Copyright (C) 1999 Eric Valette. valette@crf.canon.fr
0010  *
0011  *  The license and distribution terms for this file may be
0012  *  found in the file LICENSE in this distribution or at
0013  *  http://www.rtems.org/license/LICENSE.
0014  */
0015 
0016 /* If the symbol __BOOT__ is defined, a slightly different version is
0017  * generated to be compiled with the -m relocatable option
0018  */
0019 
0020 #ifdef __BOOT__
0021 #include "bootldr.h"
0022 /* It is impossible to gather statistics in the boot version */
0023 #undef EIP_STATS
0024 #endif
0025 
0026 /*
0027  *
0028  * Given the size of this code, it deserves a few comments on how it works,
0029  * and why it was implemented the way it is.
0030  *
0031  * The goal is to have a real mode i486SX emulator to initialize hardware,
0032  * mostly graphics boards, by interpreting ROM BIOSes. The choice of a 486SX
0033  * is logical since this is the lowest processor that PCI ROM BIOSes must run
0034  * on.
0035  *
0036  * The goal of this emulator is not performance, but a small enough memory
0037  * footprint to include it in a bootloader.
0038  *
0039  * It is actually likely to be comparable to a 25MHz 386DX on a 200MHz 603e !
0040  * This is not as serious as it seems since most of the BIOS code performs
0041  * a lot of accesses to I/O and non-cacheable memory spaces. For such
0042  * instructions, the execution time is often dominated by bus accesses.
0043  * Statistics of the code also shows that it spends a large function of
0044  * the time in loops waiting for vertical retrace or programs one of the
0045  * timers and waits for the count to go down to zero. This type of loop
0046  * runs emulated at the same speed as on 5 GHz Pentium IV++ ;)
0047  *
0048  */
0049 
0050 /*
0051  * Known bugs or differences with a real 486SX (real mode):
0052  * - segment limits are not enforced (too costly)
0053  * - xchg instructions with memory are not locked
0054  * - lock prefixes are not implemented at all
0055  * - long divides implemented but perhaps still buggy
0056  * - miscellaneous system instructions not implemented
0057  *   (some probably cannot be implemented)
0058  * - neither control nor debug registers are implemented for the time being
0059  *   (debug registers are impossible to implement at a reasonable cost)
0060  */
0061 
0062 /* Code options,  put them on the compiler command line */
0063 /* #define EIP_STATS */ /* EIP based profiling */
0064 /* #undef EIP_STATS */
0065 
0066 /*
0067  * Implementation notes:
0068  *
0069  * A) flags emulation.
0070  *
0071  * The most important decisions when it comes to obtain a reasonable speed
0072  * are related to how the EFLAGS register is emulated.
0073  *
0074  *   Note: the code to set up flags is complex, but it is only seldom
0075  * executed since cmp and test instructions use much faster flag evaluation
0076  * paths. For example the overflow flag is almost only needed for pushf and
0077  * int. Comparison results only involve (SF^OF) or (SF^OF)+ZF and the
0078  * implementation is fast in this case.
0079  *
0080  * Rarely used flags: AC, NT and IOPL are kept in a memory EFLAGS image.
0081  * All other flags are either kept explicitly in PPC cr (DF, IF, and TF) or
0082  * lazily evaluated from the state of 4 registers called flags, result, op1,
0083  * op2, and sometimes the cr itself. The emulation has been designed for
0084  * minimal overhead for the common case where the flags are never used. With
0085  * few exceptions, all instructions that set flags leave the result of the
0086  * computation in a register called result, and operands are taken from op1
0087  * and op2 registers. However a few instructions like cmp, test and bit tests
0088  * (bt/btc/btr/bts/bsf/bsr) explicitly set cr bits to short circuit
0089  * condition code evaluation of conditional instructions.
0090  *
0091  * As a very brief summary:
0092  *
0093  * - the result of the last flag setting operation is often either in the
0094  *   result register or in op2 after increment or decrement instructions
0095  *   because result and op1 may be needed to compute the carry.
0096  *
0097  * - compare instruction leave the result of the unsigned comparison
0098  *   in cr4 and of signed comparison in cr6. This means that:
0099  *   - cr4[0]=CF            (short circuit for jc/jnc)
0100  *   - cr4[1]=~(CF+ZF)      (short circuit for ja/jna)
0101  *   - cr6[0]=(OF^SF)       (short circuit for jl/jnl)
0102  *   - cr6[1]=~((SF^OF)+ZF) (short circuit for jg/jng)
0103  *   - cr6[2]=ZF            (short circuit for jz/jnz)
0104  *
0105  * - test instruction set flags in cr6 and clear overflow. This means that:
0106  *   - cr6[0]=SF=(SF^OF)    (short circuit for jl/jnl/js/jns)
0107  *   - cr6[1]=~((SF^OF)+ZF) (short circuit for jg/jng)
0108  *   - cr6[2]=ZF            (short circuit for jz/jnz)
0109  *
0110  * All flags may be lazily evaluated from several values kept in registers:
0111  *
0112  *  Flag:   Depends upon:
0113  *  OF      result, op1, op2, flags[INCDEC_FIELD,SUBTRACTING,OF_STATE_MASK]
0114  *  SF  result, op2, flags[INCDEC_FIELD,RES_SIZE]
0115  *  ZF  result, op2, cr6[2], flags[INCDEC_FIELD,RES_SIZE,ZF_PROTECT]
0116  *  AF  op1, op2, flags[INCDEC_FIELD,SUBTRACTING,CF_IN]
0117  *  PF  result, op2, flags[INCDEC_FIELD]
0118  *  CF  result, op1, flags[CF_STATE_MASK, CF_IN]
0119  *
0120  * The order of the fields in the flags register has been chosen so that a
0121  * single rlwimi is necessary for common instruction that do not affect all
0122  * flags. (See the code for inc/dec emulation).
0123  *
0124  *
0125  * B) opcodes and prefixes.
0126  *
0127  * The register called opcode holds in its low order 8 bits the opcode
0128  * (second byte if the first byte is 0x0f). More precisely it holds the
0129  * last byte fetched before the modrm byte or the immediate operand(s)
0130  * of the instruction, if any. High order 24 bits are zero unless the
0131  * instruction has prefixes. These higher order bits have the following
0132  * meaning:
0133  * 0x80000000   segment override prefix
0134  * 0x00001000   repnz prefix (0xf2)
0135  * 0x00000800   repz prefix (0xf3)
0136  * 0x00000400   address size prefix (0x67)
0137  * 0x00000200   operand size prefix (0x66)
0138  * (bit 0x1000 and 0x800 cannot be set simultaneously)
0139  *
0140  * Therefore if there is a segment override the value will be between very
0141  * negative (between 0x80000000 and 0x800016ff), if there is no segment
0142  * override, the value will be between 0 and 0x16ff. The reason for
0143  * this choice will be understood in the next part.
0144  *
0145  * C) addresing mode description tables.
0146  *
0147  * the encoding of the modrm bytes (especially in 16 bit mode) is quite
0148  * complex. Hence a table, indexed by the five useful bits of the modrm
0149  * byte is used to simplify decoding. Here is a description:
0150  *
0151  *  bit mask    meaning
0152  *  0x80000000  use ss as default segment register
0153  *  0x00004000  means that this addressing mode needs a base register
0154  *              (set for all entries except sib and displacement-only)
0155  *  0x00002000  set if preceding is not set
0156  *  0x00001000  set if an sib follows
0157  *  0x00000700  base register to use (16 and 32 bit)
0158  *  0x00000080  set in 32 bit addressing mode table, cleared in 16 bit
0159  *      (so extsb mask,entry; ori mask,mask,0xffff gives a mask)
0160  *  0x00000070  kludge field, possible values are
0161  *      0: 16 bit addressing mode without index
0162  *      10: 32 bit addressing mode
0163  *      60: 16 bit addressing mode with %si as index
0164  *      70: 16 bit addressing mode with %di as index
0165  *
0166  *  This convention leads to the following special values used to check for
0167  * sib present and displacement-only, which happen to the three lowest
0168  * values in the table (unsigned):
0169  * 0x00003090   sib follows (implies it is a 32 bit mode)
0170  * 0x00002090   32 bit displacement-only
0171  * 0x00002000   16 bit displacement-only
0172  *
0173  * This means that all entries are either very negative in the 0x80002000
0174  * range if the segment defaults to ss or higher than 0x2000 if it defaults
0175  * to ds. Combined with the value in opcode this gives the following table:
0176  * opcode   entry       entry>opcode ?  segment to use
0177  * positive positive    yes     ds (default)
0178  * negative positive    yes     overriden by prefix
0179  * positive negative    no      ss
0180  * negative negative    yes     overridden by prefix
0181  *
0182  * Hence a simple comparison allows to check for the need to override
0183  * the current base with ss, i.e., when ss is the default base and the
0184  * instruction has no override prefix.
0185  *
0186  * D) BUGS
0187  *
0188  * This software is obviously bug-free :-). Nevertheless, if you encounter
0189  * an interesting feature. Mail me a note, if possible with a detailed
0190  * instruction example showing where and how it fails.
0191  *
0192  */
0193 
0194 /* Now the details of flag evaluation with the necessary macros */
0195 
0196 /* Alignment check is toggable so the system believes it is a 486, but
0197 CPUID is not to avoid unnecessary complexities. However, alignment
0198 is actually never checked (real mode is CPL 0 anyway). */
0199 #define AC86    13      /* Can only be toggled */
0200 #define VM86    14      /* Not used for now */
0201 #define RF86    15      /* Not emulated precisely */
0202 /* Actually NT and IOPL are kept in memory */
0203 #define NT86    17
0204 #define IOPL86  18      /* Actually 18 and 19 */
0205 #define OF86    20
0206 #define DF86    21
0207 #define IF86    22
0208 #define TF86    23
0209 #define SF86    24
0210 #define ZF86    25
0211 #define AF86    27
0212 #define PF86    29
0213 #define CF86    31
0214 
0215 /* Where the less important flags are placed in PPC cr */
0216 #define RF  20      /* Suppress trap flag: cr5[0] */
0217 #define DF  21      /* Direction flag: cr5[1] */
0218 #define IF  22      /* Interrupt flag: cr5[2] */
0219 #define TF      23      /* Single step flag: cr5[3] */
0220 
0221 /* Now the flags which are frequently used */
0222 /*
0223  * CF_IN is a copy of the input carry with PPC polarity,
0224  * it is cleared for add, set for sub and cmp,
0225  * equal to the x86 carry for adc and to its complement for sbb.
0226  * it is used to evaluate AF and CF.
0227  */
0228 #define CF_IN       0x80000000
0229 
0230 /* #define GET_CF_IN(dst)   rlwinm dst,flags,1,0x01 */
0231 
0232 /* CF_IN_CR set in flags means that cr4[0] is a copy of carry bit */
0233 #define CF_IN_CR    0x40000000
0234 
0235 #define EVAL_CF     andis. r3,flags,(CF_IN_CR)>>16; beql- _eval_cf
0236 
0237 /*
0238  * CF_STATE tells how to compute the carry bit.
0239  * NOTRESULT16 and NOTRESULT8 are never set explicitly,
0240  * but they may happen after a cmc instruction.
0241  */
0242 #define CF      16      /* cr4[0] */
0243 #define CF_LOCATION 0x30000000
0244 #define CF_ZERO     0x00000000
0245 #define CF_EXPLICIT 0x00000000
0246 #define CF_COMPLEMENT   0x08000000  /* Indeed a polarity bit */
0247 #define CF_STATE_MASK   (CF_LOCATION|CF_COMPLEMENT)
0248 #define CF_VALUE    0x08000000
0249 #define CF_SET      0x08000000
0250 #define CF_RES32    0x10000000
0251 #define CF_NOTRES32 0x18000000
0252 #define CF_RES16    0x20000000
0253 #define CF_NOTRES16 0x28000000
0254 #define CF_RES8     0x30000000
0255 #define CF_NOTRES8  0x38000000
0256 
0257 #define CF_ADDL     CF_RES32
0258 #define CF_SUBL     CF_NOTRES32
0259 #define CF_ADDW     CF_RES16
0260 #define CF_SUBW     CF_RES16
0261 #define CF_ADDB     CF_RES8
0262 #define CF_SUBB     CF_RES8
0263 
0264 #define CF_ROTCNT(dst)  rlwinm dst,flags,7,0x18
0265 #define CF_POL(dst,pos) rlwinm dst,flags,(36-pos)%32,pos,pos
0266 #define CF_POL_INSERT(dst,pos)  \
0267             rlwimi dst,flags,(36-pos)%32,pos,pos
0268 #define RES2CF(dst) rlwinm dst,result,8,7,15
0269 
0270 /*
0271  * OF_STATE tells how to compute the overflow bit. When the low order bit
0272  * is set (OF_EXPLICIT), it means that OF is the exclusive or of the
0273  * two other bits. For the reason of this choice, see rotate instructions.
0274  */
0275 #define OF      1       /* Only after EVAL_OF */
0276 #define OF_STATE_MASK   0x07000000
0277 #define OF_INCDEC   0x00000000
0278 #define OF_EXPLICIT 0x01000000
0279 #define OF_ZERO     0x01000000
0280 #define OF_VALUE        0x04000000
0281 #define OF_SET          0x04000000
0282 #define OF_ONE      0x05000000
0283 #define OF_XOR      0x06000000
0284 #define OF_ARITHL   0x06000000
0285 #define OF_ARITHW   0x02000000
0286 #define OF_ARITHB   0x04000000
0287 
0288 #define EVAL_OF     rlwinm. r3,flags,6,0,1; bngl+ _eval_of; andis. r3,flags,OF_VALUE>>16
0289 
0290 /* See _eval_of to see how this can be used */
0291 #define OF_ROTCNT(dst)  rlwinm dst,flags,10,0x1c
0292 
0293 /*
0294  * SIGNED_IN_CR means that cr6 is set as after a signed compare:
0295  * - cr6[0] is SF^OF for jl/jnl/setl/setnl...
0296  * - cr6[1] is ~((SF^OF)+ZF) for jg/jng/setg/setng...
0297  * - cr6[2] is ZF (ZF_IN_CR is always set if this bit is set)
0298  */
0299 #define SLT     24      /* cr6[0], signed less than */
0300 #define SGT     25      /* cr6[1], signed greater than */
0301 #define SIGNED_IN_CR    0x00800000
0302 
0303 #define EVAL_SIGNED andis. r3,flags,SIGNED_IN_CR>>16; beql- _eval_signed
0304 
0305 /*
0306  * Above in CR means that cr4 is set as after an unsigned compare:
0307  * - cr4[0] is CF (CF_IN_CR is also set)
0308  * - cr4[1] is ~(CF+ZF) (ZF_IN_CR is also set)
0309  */
0310 #define ABOVE       17      /* cr4[1] */
0311 #define ABOVE_IN_CR 0x00400000
0312 
0313 #define EVAL_ABOVE  andis. r3,flags,ABOVE_IN_CR>>16; beql- _eval_above
0314 
0315 /* SF_IN_CR means cr6[0] is a copy of SF. It implies ZF_IN_CR is also set */
0316 #define SF      24      /* cr6[0] */
0317 #define SF_IN_CR    0x00200000
0318 
0319 #define EVAL_SF     andis. r3,flags,SF_IN_CR>>16; beql- _eval_sf_zf
0320 
0321 /* ZF_IN_CR means cr6[2] is a copy of ZF. */
0322 #define ZF  26
0323 #define ZF_IN_CR    0x00100000
0324 
0325 #define EVAL_ZF     andis. r3,flags,ZF_IN_CR>>16; beql- _eval_sf_zf
0326 #define ZF2ZF86(s,d)    rlwimi d,s,ZF-ZF86,ZF86,ZF86
0327 #define ZF862ZF(reg)    rlwimi reg,reg,32+ZF86-ZF,ZF,ZF
0328 
0329 /*
0330  * ZF_PROTECT means cr6[2] is the only valid value for ZF. This is necessary
0331  * because some infrequent instructions may leave SF and ZF in an apparently
0332  * inconsistent state (both set): sahf, popf and the few (not implemented)
0333  * instructions that only affect ZF.
0334  */
0335 #define ZF_PROTECT  0x00080000
0336 
0337 /* The parity is always evaluated when it is needed */
0338 #define PF      0       /* Only after EVAL_PF */
0339 #define EVAL_PF     bl _eval_pf
0340 
0341 /* This field gives the shift amount to use to evaluate SF
0342    and ZF when ZF_PROTECT is not set */
0343 #define RES_SIZE_MASK   0x00060000
0344 #define RESL        0x00000000
0345 #define RESW        0x00040000
0346 #define RESB        0x00060000
0347 
0348 #define RES_SHIFT(dst)  rlwinm dst,flags,18,0x18
0349 
0350 /* SUBTRACTING is set if the last flag setting instruction was sub/sbb/cmp,
0351    used to evaluate OF and AF */
0352 #define SUBTRACTING 0x00010000
0353 
0354 #define GET_ADDSUB(dst) rlwinm dst,flags,16,0x01
0355 
0356 /* rotate (rcl/rcr/rol/ror) affect CF and OF but not other flags */
0357 #define ROTATE_MASK (CF_IN_CR|CF_STATE_MASK|ABOVE_IN_CR|OF_STATE_MASK|SIGNED_IN_CR)
0358 #define ROTATE_FLAGS    rlwimi flags,one,24,ROTATE_MASK
0359 
0360 /*
0361  * INCDEC_FIELD has at most one bit set when the last flag setting instruction
0362  * was either inc or dec (which do not affect the carry). When one of these
0363  * bits is set, it affects the way OF, SF, ZF, AF, and PF are evaluated.
0364  */
0365 #define INCDEC_FIELD    0x0000ff00
0366 
0367 #define DECB_SHIFT  8
0368 #define INCB_SHIFT  9
0369 #define DECW_SHIFT  10
0370 #define INCW_SHIFT  11
0371 #define DECL_SHIFT  14
0372 #define INCL_SHIFT  15
0373 
0374 #define INCDEC_MASK (OF_STATE_MASK|SIGNED_IN_CR|ABOVE_IN_CR|SF_IN_CR|\
0375             ZF_IN_CR|ZF_PROTECT|RES_SIZE_MASK|SUBTRACTING|\
0376             INCDEC_FIELD)
0377 /* Operations to perform to tell where the flags are after inc or dec */
0378 #define INC_FLAGS(BWL)  rlwimi flags,one,INC##BWL##_SHIFT,INCDEC_MASK
0379 #define DEC_FLAGS(BWL)  rlwimi flags,one,DEC##BWL##_SHIFT,INCDEC_MASK
0380 
0381 /* How the flags are set after arithmetic operations */
0382 #define FLAGS_ADD(BWL)  (CF_ADD##BWL|OF_ARITH##BWL|RES##BWL)
0383 #define FLAGS_SBB(BWL)  (CF_SUB##BWL|OF_ARITH##BWL|RES##BWL|SUBTRACTING)
0384 #define FLAGS_SUB(BWL)  FLAGS_SBB(BWL)|CF_IN
0385 #define FLAGS_CMP(BWL)  FLAGS_SUB(BWL)|ZF_IN_CR|CF_IN_CR|SIGNED_IN_CR|ABOVE_IN_CR
0386 
0387 /* How the flags are set after logical operations */
0388 #define FLAGS_LOG(BWL)  (CF_ZERO|OF_ZERO|RES##BWL)
0389 #define FLAGS_TEST(BWL) FLAGS_LOG(BWL)|ZF_IN_CR|SIGNED_IN_CR|SF_IN_CR
0390 
0391 /*  How the flags are set after bt/btc/btr/bts. */
0392 #define FLAGS_BTEST CF_IN_CR|CF_ADDL|OF_ZERO|RESL
0393 
0394 /*  How the flags are set after bsf/bsr. */
0395 #define FLAGS_BSRCH(WL) CF_ZERO|OF_ZERO|RES##WL|ZF_IN_CR
0396 
0397 /* How the flags are set after logical right shifts */
0398 #define FLAGS_SHR(BWL)  (CF_EXPLICIT|OF_ARITH##BWL|RES##BWL)
0399 
0400 /* How the flags are set after double length shifts */
0401 #define FLAGS_DBLSH(WL) (CF_EXPLICIT|OF_ARITH##WL|RES##WL)
0402 
0403 /* How the flags are set after multiplies */
0404 #define FLAGS_MUL   (CF_EXPLICIT|OF_EXPLICIT)
0405 
0406 #define SET_FLAGS(fl)   lis flags,(fl)>>16
0407 #define ADD_FLAGS(fl)   addis flags,flags,(fl)>>16
0408 
0409 /*
0410  * We are always off by one when compared with Intel's eip, this shortens
0411  * code by allowing to load next byte with lbzu x,1(eip). The register
0412  * called eip actually contains csbase+eip, and thus should be called lip
0413  * for linear ip.
0414  */
0415 
0416 /*
0417  * Reason codes passed to the C part of the emulator, this includes all
0418  * instructions which may change the current code segment. These definitions
0419  * will soon go into a separate include file. Codes 0 to 255 correspond
0420  * directly to the interrupt/trap that has to be generated.
0421  */
0422 
0423 #define code_divide_err 0
0424 #define code_trap   1
0425 #define code_int3   3
0426 #define code_into   4
0427 #define code_bound  5
0428 #define code_ud     6
0429 #define code_dna    7   /* FPU not available */
0430 
0431 #define code_iretw  256 /* Interrupt returns */
0432 #define code_iretl  257
0433 #define code_lcallw 258 /* Far calls and jumps */
0434 #define code_lcalll 259
0435 #define code_ljmpw  260
0436 #define code_ljmpl  261
0437 #define code_lretw  262 /* Far returns */
0438 #define code_lretl  263
0439 #define code_softint    264 /* int $xx */
0440 #define code_lock   265 /* Lock prefix */
0441 /* Codes 1024 to 2047 are used for I/O port access instructions:
0442  - The three LSB define the port size (1, 2 or 4)
0443  - bit of weight 512 means out if set, in if clear
0444  - bit of weight 256 means ins/outs if set, in/out if clear
0445  - bit of weight 128 means use 32 bit addresses if set, 16 bit if clear
0446    (only used for ins/outs instructions, always clear for in/out)
0447  */
0448 #define code_inb    1024+1
0449 #define code_inw    1024+2
0450 #define code_inl    1024+4
0451 #define code_outb   1024+512+1
0452 #define code_outw   1024+512+2
0453 #define code_outl   1024+512+4
0454 #define code_insb_a16   1024+256+1
0455 #define code_insw_a16   1024+256+2
0456 #define code_insl_a16   1024+256+4
0457 #define code_outsb_a16  1024+512+256+1
0458 #define code_outsw_a16  1024+512+256+2
0459 #define code_outsl_a16  1024+512+256+4
0460 #define code_insb_a32   1024+256+128+1
0461 #define code_insw_a32   1024+256+128+2
0462 #define code_insl_a32   1024+256+128+4
0463 #define code_outsb_a32  1024+512+256+128+1
0464 #define code_outsw_a32  1024+512+256+128+2
0465 #define code_outsl_a32  1024+512+256+128+4
0466 
0467 #define state 31
0468 /* r31 (state) is a pointer to a structure describing the emulated x86
0469 processor, its layout is the following:
0470 
0471 first the general purpose registers, they are in little endian byte order
0472 
0473 offset  name
0474 
0475    0    eax/ax/al
0476    1    ah
0477    4    ecx/cx/cl
0478    5    ch
0479    8    edx/dx/dl
0480    9    dh
0481   12    ebx/bx/bl
0482   13    bh
0483   16    esp/sp
0484   20    ebp/bp
0485   24    esi/si
0486   28    edi/di
0487 */
0488 
0489 #define AL  0
0490 #define AX  0
0491 #define EAX 0
0492 #define AH  1
0493 #define CL  4
0494 #define CX  4
0495 #define ECX 4
0496 #define DX  8
0497 #define EDX 8
0498 #define BX  12
0499 #define EBX 12
0500 #define SP  16
0501 #define ESP 16
0502 #define BP  20
0503 #define EBP 20
0504 #define SI  24
0505 #define ESI 24
0506 #define DI  28
0507 #define EDI 28
0508 
0509 /*
0510 than the rest of the machine state, big endian !
0511 
0512 offset     name
0513 
0514   32       essel     segment register selectors (values)
0515   36       cssel
0516   40       sssel
0517   44       dssel
0518   48       fssel
0519   52       gssel
0520   56       eipimg    true eip (register named eip is csbase+eip)
0521   60       eflags    eip and eflags only valid when C code running !
0522   64       esbase    segment registers bases
0523   68       csbase
0524   72       ssbase
0525   76       dsbase
0526   80       fsbase
0527   84       gsbase
0528   88       iobase    For I/O instructions, I/O space virtual base
0529   92       ioperm    I/O permission bitmap pointer
0530   96       reason    Reason code when calling external emulator
0531  100       nexteip   eip past instruction for external emulator
0532  104       parm1     parameter for external emulator
0533  108       parm2     parameter for external emulator
0534  112       _opcode   current opcode register for external emulator
0535  116       _base     segment register base for external emulator
0536  120       _offset   intruction operand offset
0537  More internal state was dumped here for debugging in first versions
0538 
0539  128       vbase     where the 1Mb memory is mapped
0540  132       cntimg    instruction counter
0541  136                 scratch
0542  192       eipstat   array of 32k unsigned long pairs for eip stats
0543 */
0544 
0545 #define essel   32
0546 #define cssel   36
0547 #define sssel   40
0548 #define dssel   44
0549 #define fssel   48
0550 #define gssel   52
0551 #define eipimg  56
0552 #define eflags  60
0553 #define esbase  64
0554 #define csbase  68
0555 #define ssbase  72
0556 #define dsbase  76
0557 #define fsbase  80
0558 #define gsbase  84
0559 #define iobase  88
0560 #define ioperm  92
0561 #define reason  96
0562 #define nexteip 100
0563 #define parm1   104
0564 #define parm2   108
0565 #define _opcode 112
0566 #define _base   116
0567 #define _offset 120
0568 #define vbase   128
0569 #define cntimg  132
0570 #ifdef EIP_STATS
0571 #define eipstat 192
0572 #endif
0573 /* Global registers */
0574 
0575 /* Some segment register bases are permanently kept in registers since they
0576 are often used: these are csb, esb and ssb because they are
0577 required for jumps, string instructions, and pushes/pops/calls/rets.
0578 dsbase is not kept in a register but loaded from memory to allow somewhat
0579 more parallelism in the main emulation loop.
0580 */
0581 
0582 #define one 30      /* Constant one, so pervasive */
0583 #define ssb 29
0584 #define csb 28
0585 #define esb 27
0586 #define eip 26      /* That one is indeed csbase+(e)ip-1 */
0587 #define result  25      /* For the use of result, op1, op2 */
0588 #define op1 24      /* see the section on flag emulation */
0589 #define op2 23
0590 #define opbase  22      /* default opcode table */
0591 #define flags   21      /* See earlier description */
0592 #define opcode  20      /* Opcode */
0593 #define opreg   19      /* Opcode extension/register number */
0594 /* base is reloaded with the base of the ds segment at the beginning of
0595 every instruction, it is modified by segment override prefixes, when
0596 the default base segment is ss, or when the modrm byte specifies a
0597 register operand */
0598 #define base    18      /* Instruction's operand segment base */
0599 #define offset  17      /* Instruction's memory operand offset */
0600 /* used to address a table telling how to decode the addressing mode
0601 specified by the modrm byte */
0602 #define adbase  16      /* addressing mode table */
0603 /* Following registers are used only as dedicated temporaries during decoding,
0604 they are free for use during emulation */
0605 /*
0606  * ceip (current eip) is only in use when we call the external emulator for
0607  * instructions that fault. Note that it is forbidden to change flags before
0608  * the check for the fault happens (divide by zero...) ! ceip is also used
0609  * when measuring timing.
0610  */
0611 #define ceip 15
0612 
0613 /* A register used to measure timing information (when enabled) */
0614 #ifdef EIP_STATS
0615 #define tstamp 14
0616 #endif
0617 
0618 #define count 12        /* Instruction counter. */
0619 
0620 #define r0 0
0621 #define r1 1            /* PPC Stack pointer. */
0622 #define r3 3
0623 #define r4 4
0624 #define r5 5
0625 #define r6 6
0626 #define r7 7
0627 
0628 /* Macros to read code stream */
0629 #define NEXTBYTE(dest) lbzu dest,1(eip)
0630 #define NEXTWORD(dest) lhbrx dest,eip,one; la eip,2(eip)
0631 #define NEXTDWORD(dest) lwbrx dest,eip,one; la eip,4(eip)
0632 #define NEXT    b nop
0633 #define GOTNEXT b gotopcode
0634 
0635 #ifdef __BOOT__
0636         START_GOT
0637         GOT_ENTRY(_jtables)
0638         GOT_ENTRY(jtab_www)
0639         GOT_ENTRY(adtable)
0640         END_GOT
0641 #else
0642         .text
0643 #endif
0644         .align 2
0645         .global em86_enter
0646         .type em86_enter,@function
0647 em86_enter: stwu r1,-96(r1)         # allocate stack
0648         mflr r0
0649         stmw 14,24(r1)
0650         mfcr r4
0651         stw r0,100(r1)
0652         mr state,r3
0653         stw r4,20(r1)
0654 #ifdef __BOOT__
0655 /* We need this since r30 is the default GOT pointer */
0656 #define r30 30
0657         GET_GOT
0658 /* The relocation of these tables is explicit, this could be done
0659  * automatically with fixups but would add more than 8kb in the fixup tables.
0660  */
0661         lwz r3,GOT(_jtables)
0662         lwz r4,_endjtables-_jtables(r3)
0663         sub. r4,r3,r4
0664         beq+ 1f
0665         li r0,((_endjtables-_jtables)>>2)+1
0666         addi r3,r3,-4
0667         mtctr r0
0668 0:      lwzu r5,4(r3)
0669         add r5,r5,r4
0670         stw r5,0(r3)
0671         bdnz 0b
0672 1:      lwz adbase,GOT(adtable)
0673         lwz opbase,GOT(jtab_www)
0674 /* Now r30 is only used as constant 1 */
0675 #undef r30
0676         li one,1            # pervasive constant
0677 #else
0678         lis opbase,jtab_www@ha
0679         lis adbase,adtable@ha
0680         li one,1            # pervasive constant
0681         addi opbase,opbase,jtab_www@l
0682         addi adbase,adbase,adtable@l
0683 #ifdef EIP_STATS
0684         li ceip,0
0685         mftb tstamp
0686 #endif
0687 #endif
0688 /* We branch back here when calling an external function tells us to resume */
0689 restart:    lwz r3,eflags(state)
0690         lis flags,(OF_EXPLICIT|ZF_IN_CR|ZF_PROTECT|SF_IN_CR)>>16
0691         lwz csb,csbase(state)
0692         extsb result,r3         # SF/PF
0693         rlwinm op1,r3,31,0x08       # AF
0694         lwz eip,eipimg(state)
0695         ZF862ZF(r3)         # cr6
0696         addi op2,op1,0          # AF
0697         lwz ssb,ssbase(state)
0698         rlwimi flags,r3,15,OF_VALUE # OF
0699         rlwimi r3,r3,32+RF86-RF,RF,RF   # RF
0700         lwz esb,esbase(state)
0701         ori result,result,0xfb      # PF
0702         mtcrf 0x06,r3           # RF/DF/IF/TF/SF/ZF
0703         lbzux opcode,eip,csb
0704         rlwimi flags,r3,27,CF_VALUE # CF
0705         xori result,result,0xff     # PF
0706         lwz count,cntimg(state)
0707         GOTNEXT             # start the emulator
0708 
0709 /* Now return */
0710 exit:       lwz r0,100(r1)
0711         lwz r4,20(r1)
0712         mtlr r0
0713         lmw 14,24(r1)
0714         mtcr r4
0715         addi r1,r1,96
0716         blr
0717 
0718 trap:       crmove 0,RF
0719         crclr RF
0720         bt- 0,resume
0721         sub ceip,eip,csb
0722         li r3,code_trap
0723 complex:    addi eip,eip,1
0724         stw r3,reason(state)
0725         sub eip,eip,csb
0726         stw op1,240(state)
0727         stw op2,244(state)
0728         stw result,248(state)
0729         stw flags,252(state)
0730         stw r4,parm1(state)
0731         stw r5,parm2(state)
0732         stw opcode,_opcode(state)
0733         bl _eval_flags
0734         stw base,_base(state)
0735         stw eip,nexteip(state)
0736         stw r3,eflags(state)
0737         mr r3,state
0738         stw offset,_offset(state)
0739         stw ceip,eipimg(state)
0740         stw count,cntimg(state)
0741         bl em86_trap
0742         cmpwi r3,0
0743         bne exit
0744         b restart
0745 
0746 /* Main loop */
0747 /*
0748  * The two LSB of each entry in the main table mean the following:
0749  * 00: indirect opcode: modrm follows and the three middle bits are an
0750  *     opcode extension. The entry points to another jump table.
0751  * 01: direct instruction, branch directly to the routine.
0752  * 10: modrm specifies byte size memory and register operands.
0753  * 11: modrm specifies word/long memory and register operands.
0754  *
0755  *  The modrm byte, if present, is always loaded in r7.
0756  *
0757  * Note: most "mr x,y" instructions have been replaced by "addi x,y,0" since
0758  * the latter can be executed in the second integer unit on 603e.
0759  */
0760 
0761 /*
0762  * This code is very good example of absolutely unmaintainable code.
0763  * It was actually much easier to write than it is to understand !
0764  * If my computations are right, the maximum path length from fetching
0765  * the opcode to exiting to the actual instruction execution is
0766  * 46 instructions (for non-prefixed, single byte opcode instructions).
0767  *
0768  */
0769         .align 5
0770 #ifdef EIP_STATS
0771 nop:        NEXTBYTE(opcode)
0772 gotopcode:  slwi r3,opcode,2
0773         bt- TF,trap
0774 resume:     lwzx r4,opbase,r3
0775         addi r5,state,eipstat+4
0776         clrlslwi r6,ceip,17,3
0777         mtctr r4
0778         lwzux r7,r5,r6
0779         slwi. r0,r4,30      # two lsb of table entry
0780         sub r7,r7,tstamp
0781         lwz r6,-4(r5)
0782         mftb tstamp
0783         addi r6,r6,1
0784         sub ceip,eip,csb
0785         stw r6,-4(r5)
0786         add r7,r7,tstamp
0787         lwz base,dsbase(state)
0788         stw r7,0(r5)
0789 #else
0790 nop:        NEXTBYTE(opcode)
0791 gotopcode:  slwi r3,opcode,2
0792         bt- TF,trap
0793 resume:     lwzx r4,opbase,r3
0794         sub ceip,eip,csb
0795         mtctr r4
0796         slwi. r0,r4,30      # two lsb of table entry
0797         lwz base,dsbase(state)
0798         addi count,count,1
0799 #endif
0800         bgtctr-         # for instructions without modrm
0801 
0802 /* modrm byte present */
0803         NEXTBYTE(r7)        # modrm byte
0804         cmplwi cr1,r7,192
0805         rlwinm opreg,r7,31,0x1c
0806         beq- cr0,8f     # extended opcode
0807 /* modrm with middle 3 bits specifying a register (non prefixed) */
0808         rlwinm r0,r4,3,0x8
0809         li r4,0x1c0d
0810         rlwimi opreg,r7,27,0x01
0811         srw r4,r4,r0
0812         and opreg,opreg,r4
0813         blt cr1,9f
0814 /* modrm with 2 register operands */
0815 1:      rlwinm offset,r7,2,0x1c
0816         addi base,state,0
0817         rlwimi offset,r7,30,0x01
0818         and offset,offset,r4
0819         bctr
0820 
0821 /* Prefixes: first segment overrides */
0822         .align 4
0823 _es:        NEXTBYTE(r7); addi base,esb,0
0824         oris opcode,opcode,0x8000; b 2f
0825 _cs:        NEXTBYTE(r7); addi base,csb,0
0826         oris opcode,opcode,0x8000; b 2f
0827 _fs:        NEXTBYTE(r7); lwz base,fsbase(state)
0828         oris opcode,opcode,0x8000; b 2f
0829 _gs:        NEXTBYTE(r7); lwz base,gsbase(state)
0830         oris opcode,opcode,0x8000; b 2f
0831 _ss:        NEXTBYTE(r7); addi base,ssb,0
0832         oris opcode,opcode,0x8000; b 2f
0833 _ds:        NEXTBYTE(r7)
0834         oris opcode,opcode,0x8000; b 2f
0835 
0836 /* Lock (unimplemented) and repeat prefixes */
0837 _lock:      li r3,code_lock; b complex
0838 _repnz:     NEXTBYTE(r7); rlwimi opcode,one,12,0x1800; b 2f
0839 _repz:      NEXTBYTE(r7); rlwimi opcode,one,11,0x1800; b 2f
0840 
0841 /* Operand and address size prefixes */
0842         .align 4
0843 _opsize:    NEXTBYTE(r7); ori opcode,opcode,0x200
0844         rlwinm r3,opcode,2,0x1ffc; b 2f
0845 _adsize:    NEXTBYTE(r7); ori opcode,opcode,0x400
0846         rlwinm r3,opcode,2,0x1ffc; b 2f
0847 
0848 _twobytes:  NEXTBYTE(r7); addi r3,r3,0x400
0849 2:      rlwimi r3,r7,2,0x3fc
0850         lwzx r4,opbase,r3
0851         rlwimi opcode,r7,0,0xff
0852         mtctr r4
0853         slwi. r0,r4,30
0854         bgtctr-             # direct instruction
0855 /* modrm byte in a prefixed instruction */
0856         NEXTBYTE(r7)            # modrm byte
0857         cmpwi cr1,r7,192
0858         rlwinm opreg,r7,31,0x1c
0859         beq- 6f
0860 /* modrm with middle 3 bits specifying a register (prefixed) */
0861         rlwinm r0,r4,3,0x8
0862         li r4,0x1c0d
0863         rlwimi opreg,r7,27,0x01
0864         srw r4,r4,r0
0865         and opreg,opreg,r4
0866         bnl cr1,1b          # 2 register operands
0867 /* modrm specifying memory with prefix */
0868 3:      rlwinm r3,r3,27,0xff80
0869         rlwimi adbase,r7,2,0x1c
0870         extsh r3,r3
0871         rlwimi r3,r7,31,0x60
0872         lwzx r4,r3,adbase
0873         cmpwi cr1,r4,0x3090
0874         bnl+ cr1,10f
0875 /* displacement only addressing modes */
0876 4:      cmpwi r4,0x2000
0877         bne 5f
0878         NEXTWORD(offset)
0879         bctr
0880 5:      NEXTDWORD(offset)
0881         bctr
0882 /* modrm with opcode extension (prefixed) */
0883 6:      lwzx r4,r4,opreg
0884         mtctr r4
0885         blt cr1,3b
0886 /* modrm with opcode extension and register operand */
0887 7:      rlwinm offset,r7,2,0x1c
0888         addi base,state,0
0889         rlwinm r0,r4,3,0x8
0890         li r4,0x1c0d
0891         rlwimi offset,r7,30,0x01
0892         srw r4,r4,r0
0893         and offset,offset,r4
0894         bctr
0895 /* modrm with opcode extension (non prefixed) */
0896 8:      lwzx r4,r4,opreg
0897         mtctr r4
0898 /* FIXME ? We continue fetching even if the opcode extension is undefined.
0899  * It shouldn't do any harm on real mode emulation anyway, and for ROM
0900  * BIOS emulation, we are supposed to read valid code.
0901  */
0902         bnl cr1,7b
0903 /* modrm specifying memory without prefix */
0904 9:      rlwimi adbase,r7,2,0x1c # memory addressing mode computation
0905         rlwinm r3,r7,31,0x60
0906         lwzx r4,r3,adbase
0907         cmplwi cr1,r4,0x3090
0908         blt- cr1,4b     # displacement only addressing mode
0909 10:     rlwinm. r0,r7,24,0,1    # three cases distinguished
0910         beq- cr1,15f        # an sib follows
0911         rlwinm r3,r4,30,0x1c    # 16bit/32bit/%si index/%di index
0912         cmpwi cr1,r3,8      # set cr1 as early as possible
0913         rlwinm r6,r4,26,0x1c    # base register
0914         lwbrx offset,state,r6   # load the base register
0915         beq cr0,14f     # no displacement
0916         cmpw cr2,r4,opcode  # check for ss as default base
0917         bgt cr0,12f     # byte offset
0918         beq cr1,11f     # 32 bit displacement
0919         NEXTWORD(r5)        # 16 bit displacement
0920         bgt cr1,13f     # d16(base,index)
0921 /* d16(base) */
0922         add offset,offset,r5
0923         clrlwi offset,offset,16
0924         bgtctr cr2
0925         addi base,ssb,0
0926         bctr
0927 /* d32(base) */
0928 11:     NEXTDWORD(r5)
0929         add offset,offset,r5
0930         bgtctr cr2
0931         addi base,ssb,0
0932         bctr
0933 /* 8 bit displacement */
0934 12:     NEXTBYTE(r5)
0935         extsb r5,r5
0936         bgt cr1,13f
0937 /* d8(base) */
0938         extsb r6,r4
0939         add offset,offset,r5
0940         ori r6,r6,0xffff
0941         and offset,offset,r6
0942         bgtctr cr2
0943         addi base,ssb,0
0944         bctr
0945 /* d8(base,index) and d16(base,index) share this code ! */
0946 13:     lhbrx r3,state,r3
0947         add offset,offset,r5
0948         add offset,offset,r3
0949         clrlwi offset,offset,16
0950         bgtctr cr2
0951         addi base,ssb,0
0952         bctr
0953 /* no displacement: only indexed modes may use ss as default base */
0954 14:     beqctr cr1      # 32 bit register indirect
0955         clrlwi offset,offset,16
0956         bltctr cr1      # 16 bit register indirect
0957 /* (base,index) */
0958         lhbrx r3,state,r3   # 16 bit [{bp,bx}+{si,di}]
0959         cmpw cr2,r4,opcode  # check for ss as default base
0960         add offset,offset,r3
0961         clrlwi offset,offset,r3
0962         bgtctr+ cr2
0963         addi base,ssb,0
0964         bctr
0965 /* sib modes, note that the size of the offset can be known from cr0 */
0966 15:     NEXTBYTE(r7)            # get sib
0967         rlwinm r3,r7,31,0x1c        # index
0968         rlwinm offset,r7,2,0x1c     # base
0969         cmpwi cr1,r3,ESP        # has index ?
0970         bne cr0,18f         # base+d8/d32
0971         cmpwi offset,EBP
0972         beq 17f             # d32(,index,scale)
0973         xori r4,one,0xcc01      # build 0x0000cc00
0974         rlwnm r4,r4,offset,0,1      # 0 or 0xc0000000
0975         lwbrx offset,state,offset
0976         cmpw cr2,r4,opcode      # use ss ?
0977         beq- cr1,16f            # no index
0978 /* (base,index,scale) */
0979         lwbrx r3,state,r3
0980         srwi r6,r7,6
0981         slw r3,r3,r6
0982         add offset,offset,r3
0983         bgtctr cr2
0984         addi base,ssb,0
0985         bctr
0986 /* (base), in practice only (%esp) is coded this way */
0987 16:     bgtctr cr2
0988         addi base,ssb,0
0989         bctr
0990 /* d32(,index,scale) */
0991 17:     NEXTDWORD(offset)
0992         beqctr- cr1         # no index: very unlikely
0993         lwbrx r3,state,r3
0994         srwi r6,r7,6
0995         slw r3,r3,r6
0996         add offset,offset,r3
0997         bctr
0998 /* 8 or 32 bit displacement */
0999 18:     xori r4,one,0xcc01      # build 0x0000cc00
1000         rlwnm r4,r4,offset,0,1      # 0 or 0xc0000000
1001         lwbrx offset,state,offset
1002         cmpw cr2,r4,opcode      # use ss ?
1003         bgt cr0,20f         # 8 bit offset
1004 /* 32 bit displacement */
1005         NEXTDWORD(r5)
1006         beq- cr1,21f
1007 /* d(base,index,scale) */
1008 19:     lwbrx r3,state,r3
1009         add offset,offset,r5
1010         add offset,offset,r3
1011         bgtctr cr2
1012         addi base,ssb,0
1013         bctr
1014 /* 8 bit displacement */
1015 20:     NEXTBYTE(r5)
1016         extsb r5,r5
1017         bne+ cr1,19b
1018 /* d(base), in practice base is %esp  */
1019 21:     add offset,offset,r5
1020         bgtctr- cr2
1021         addi base,ssb,0
1022         bctr
1023 
1024 /*
1025  *  Flag evaluation subroutines: they have not been written for performance
1026  * since they are not often used in practice. The rule of the game was to
1027  * write them with as few branches as possible.
1028  * The first routines eveluate either one or 2 (ZF and SF simultaneously)
1029  * flags and do not use r0 and r7.
1030  * The more complex routines (_eval_above, _eval_signed and _eval_flags)
1031  * call the former ones, using r0 as a return address save register and
1032  * r7 as a safe temporary.
1033  */
1034 
1035 /*
1036  * _eval_sf_zf evaluates simultaneously SF and ZF unless ZF is already valid
1037  * and protected because it is possible, although it is exceptional, to have
1038  * SF and ZF set at the same time after a few instructions which may leave the
1039  * flags in this apparently inconsistent state: sahf, popf, iret and the few
1040  * (for now unimplemented) instructions which only affect ZF (lar, lsl, arpl,
1041  * cmpxchg8b). This also solves the obscure case of ZF set and PF clear.
1042  * On return: SF=cr6[0], ZF=cr6[2].
1043  */
1044 
1045 _eval_sf_zf:    andis. r5,flags,ZF_PROTECT>>16
1046         rlwinm r3,flags,0,INCDEC_FIELD
1047         RES_SHIFT(r4)
1048         cntlzw r3,r3
1049         slw r4,result,r4
1050         srwi r5,r3,5            # ? use result : use op1
1051         rlwinm r3,r3,2,0x18
1052         oris flags,flags,(SF_IN_CR|SIGNED_IN_CR|ZF_IN_CR)>>16
1053         neg r5,r5           # mux result/op2
1054         slw r3,op2,r3
1055         and r4,r4,r5
1056         andc r3,r3,r5
1057         xoris flags,flags,(SIGNED_IN_CR)>>16
1058         bne- 1f             # 12 instructions between set
1059         or r3,r3,r4         # and test, good for folding
1060         cmpwi cr6,r3,0
1061         blr
1062 1:      or. r3,r3,r4
1063         crmove SF,0
1064         blr
1065 
1066 /*
1067  * _eval_cf may be called at any time, no other flag is affected.
1068  * On return: CF=cr4[0], r3= CF ? 0x100:0 = CF<<8.
1069  */
1070 _eval_cf:   addc r3,flags,flags     # CF_IN to xer[ca]
1071         RES2CF(r4)          # get 8 or 16 bit carry
1072         subfe r3,result,op1     # generate PPC carry for
1073         CF_ROTCNT(r5)           # preceding operation
1074         addze r3,r4         # put carry into LSB
1075         CF_POL(r4,23)           # polarity & 0x100
1076         oris flags,flags,(CF_IN_CR|ABOVE_IN_CR)>>16
1077         rlwnm r3,r3,r5,23,23        # shift carry there
1078         xor r3,r3,r4            # CF <<8
1079         xoris flags,flags,(ABOVE_IN_CR)>>16
1080         cmplw cr4,one,r3        # sets cr4[0]
1081         blr
1082 
1083 /*
1084  * eval_of returns the overflow flag in OF_STATE field, which will be
1085  * either 001 (OF clear) or 101 (OF set), is is only called when the two
1086  * low order bits of OF_STATE are not 01 (otherwise it will work but
1087  * it is an elaborate variant of a nop with a few registers destroyed)
1088  * The code multiplexes several sources in a branchless way, was fun to write.
1089  */
1090 _eval_of:   GET_ADDSUB(r4)          # 0(add)/1(sub)
1091         rlwinm r3,flags,0,INCDEC_FIELD
1092         neg r4,r4           # 0(add)/-1(sub)
1093         eqv r5,result,op1       # result[]==op1[] (bit by bit)
1094         cntlzw r3,r3            # inc/dec
1095         xor r4,r4,op2           # true sign of op2
1096         oris r5,r5,0x0808       # bits to clear
1097         clrlwi r6,r3,31         # 0(inc)/1(dec)
1098         eqv r4,r4,op1           # op1[]==op2[] (bit by bit)
1099         add r6,op2,r6           # add 1 if dec
1100         rlwinm r3,r3,2,0x18     # incdec_shift
1101         andc r4,r4,r5           # arithmetic overflow
1102         slw r3,r6,r3            # shifted inc/dec result
1103         addis r3,r3,0x8000      # compare with 0x80000000
1104         ori r4,r4,0x0808        # bits to set
1105         cntlzw r3,r3            # 32 if inc/dec overflow
1106         OF_ROTCNT(r6)
1107         rlwimi r4,r3,18,0x00800000  # insert inc/dec overflow
1108         rlwimi flags,one,24,OF_STATE_MASK
1109         rlwnm r3,r4,r6,8,8      # get field
1110         rlwimi flags,r3,3,OF_VALUE  # insert OF
1111         blr
1112 
1113 /*
1114  * _eval_pf will always be called when needed (complex but infrequent),
1115  * there are a few quirks for a branchless solution.
1116  * On return: PF=cr0[0], PF=MSB(r3)
1117  */
1118 _eval_pf:   rlwinm r3,flags,0,INCDEC_FIELD
1119         rotrwi r4,op2,4         # from inc/dec
1120         rotrwi r5,result,4      # from result
1121         cntlzw r3,r3            # use result if 32
1122         xor r4,r4,op2
1123         xor r5,r5,result
1124         rlwinm r3,r3,26,0,0     # 32 becomes 0x80000000
1125         clrlwi r4,r4,28
1126         lis r6,0x9669           # constant to shift
1127         clrlwi r5,r5,28
1128         rlwnm r4,r6,r4,0,0      # parity from inc/dec
1129         rlwnm r5,r6,r5,0,0      # parity from result
1130         andc r4,r4,r3           # select which one
1131         and r5,r5,r3
1132         add. r3,r4,r5           # and test to simplify
1133         blr             # returns in r3 and cr0 set.
1134 
1135 /*
1136  * _eval_af will always be called when needed (complex but infrequent):
1137  * - if after inc, af is set when 4 low order bits of op1 are 0
1138  * - if after dec, af is set when 4 low order bits of op1 are 1
1139  *   (or 0 after adding 1 as implemented here)
1140  * - if after add/sub/adc/sbb/cmp af is set from sum of 4 LSB of op1
1141  *    and 4 LSB of op2 (eventually complemented) plus carry in.
1142  * - other instructions leave AF undefined so the returned value is irrelevant.
1143  * Returned value must be masked with 0x10, since all other bits are undefined.
1144  * There branchless code is perhaps not the most efficient, but quite parallel.
1145  */
1146 _eval_af:   rlwinm r3,flags,0,INCDEC_FIELD
1147         clrlwi r5,op2,28        # 4 LSB of op2
1148         addc r4,flags,flags     # carry_in
1149         GET_ADDSUB(r6)
1150         cntlzw r3,r3            # if inc/dec 16..23 else 32
1151         neg r6,r6           # add/sub
1152         clrlwi r4,r3,31         # if dec 1 else 0
1153         xor r5,r5,r6            # conditionally complement
1154         clrlwi r6,op1,28        # 4 LSB of op1
1155         add r4,op2,r4           # op2+(dec ? 1 : 0)
1156         clrlwi r4,r4,28         # 4 LSB of op2+(dec ? 1 : 0)
1157         adde r5,r6,r5           # op1+cy_in+(op2/~op2)
1158         cntlzw r4,r4            # 28..31 if not AF, 32 if set
1159         andc r5,r5,r3           # masked AF from add/sub...
1160         andc r4,r3,r4           # masked AF from inc/dec
1161         or r3,r4,r5
1162         blr
1163 
1164 /*
1165  * _eval_above will only be called if ABOVE_IN_CR is not set.
1166  * On return: ZF=cr6[2], CF=cr4[0], ABOVE=cr4[1]
1167  */
1168 _eval_above:    andis. r3,flags,ZF_IN_CR>>16
1169         mflr r0
1170         beql+ _eval_sf_zf
1171         andis. r3,flags,CF_IN_CR>>16
1172         beql+ _eval_cf
1173         mtlr r0
1174         oris flags,flags,ABOVE_IN_CR>>16
1175         crnor ABOVE,ZF,CF
1176         blr
1177 
1178 /* _eval_signed may only be called when signed_in_cr is clear ! */
1179 _eval_signed:   andis. r3,flags,SF_IN_CR>>16
1180         mflr r0
1181         beql+ _eval_sf_zf
1182 /* SF_IN_CR and ZF_IN_CR are set, SIGNED_IN_CR is clear */
1183         rlwinm. r3,flags,5,0,1
1184         xoris flags,flags,(SIGNED_IN_CR|SF_IN_CR)>>16
1185         bngl+ _eval_of
1186         andis. r3,flags,OF_VALUE>>16
1187         mtlr r0
1188         crxor SLT,SF,OF
1189         crnor SGT,SLT,ZF
1190         blr
1191 
1192 _eval_flags:    mflr r0
1193         bl _eval_cf
1194         li r7,2
1195         rlwimi r7,r3,24,CF86,CF86   # 2 if CF clear, 3 if set
1196         bl _eval_pf
1197         andis. r4,flags,SF_IN_CR>>16
1198         rlwimi r7,r3,32+PF-PF86,PF86,PF86
1199         bl _eval_af
1200         rlwimi r7,r3,0,AF86,AF86
1201         beql+ _eval_sf_zf
1202         mfcr r3
1203         rlwinm. r4,flags,5,0,1
1204         rlwimi r7,r3,0,DF86,SF86
1205         ZF2ZF86(r3,r7)
1206         bngl+ _eval_of
1207         mtlr r0
1208         lis r4,0x0004
1209         lwz r3,eflags(state)
1210         addi r4,r4,0x7000
1211         rlwimi r7,flags,17,OF86,OF86
1212         and r3,r3,r4
1213         or r3,r3,r7
1214         blr
1215 
1216 /* Quite simple for real mode, input in r4, returns in r3. */
1217 _segment_load:  lwz r5,vbase(state)
1218         rlwinm r3,r4,4,0xffff0      # segment selector * 16
1219         add r3,r3,r5
1220         blr
1221 
1222 /* To allow I/O port virtualization if necessary, code for exception in r3,
1223 port number in r4 */
1224 _check_port:    lwz r5,ioperm(state)
1225         rlwinm r6,r4,29,0x1fff      # 0 to 8kB
1226         lis r0,0xffff
1227         lhbrx r5,r5,r6
1228         clrlwi r6,r4,29         # modulo 8
1229         rlwnm r0,r0,r3,0x0f     # 1, 3, or 0xf
1230         slw r0,r0,r6
1231         and. r0,r0,r5
1232         bne- complex
1233         blr
1234 /*
1235  * Instructions are in approximate functional order:
1236  * 1) move, exchange, lea, push/pop, pusha/popa
1237  * 2) cbw/cwde/cwd/cdq, zero/sign extending moves, in/out
1238  * 3) arithmetic: add/sub/adc/sbb/cmp/inc/dec/neg
1239  * 4) logical: and/or/xor/test/not/bt/btc/btr/bts/bsf/bsr
1240  * 5) jump, call, ret
1241  * 6) string instructions and xlat
1242  * 7) rotate/shift/mul/div
1243  * 8) segment register, far jumps, calls and rets, interrupts
1244  * 9) miscellenaous (flags, bcd,...)
1245  */
1246 
1247 #define MEM offset,base
1248 #define REG opreg,state
1249 #define SELECTORS 32
1250 #define SELBASES 64
1251 
1252 /* Immediate moves */
1253 movb_imm_reg:   rlwinm opreg,opcode,2,28,29; lbz r3,1(eip)
1254         rlwimi opreg,opcode,30,31,31; lbzu opcode,2(eip)
1255         stbx r3,REG; GOTNEXT
1256 
1257 movw_imm_reg:   lhz r3,1(eip); clrlslwi opreg,opcode,29,2; lbzu opcode,3(eip)
1258         sthx r3,REG; GOTNEXT
1259 
1260 movl_imm_reg:   lwz r3,1(eip); clrlslwi opreg,opcode,29,2; lbzu opcode,5(eip)
1261         stwx r3,REG; GOTNEXT
1262 
1263 movb_imm_mem:   lbz r0,1(eip); cmpwi opreg,0
1264         lbzu opcode,2(eip); bne- ud
1265         stbx r0,MEM; GOTNEXT
1266 
1267 movw_imm_mem:   lhz r0,1(eip); cmpwi opreg,0
1268         lbzu opcode,3(eip); bne- ud
1269         sthx r0,MEM; GOTNEXT
1270 
1271 movl_imm_mem:   lwz r0,1(eip); cmpwi opreg,0
1272         lbzu opcode,5(eip); bne- ud
1273         stwx r0,MEM; GOTNEXT
1274 
1275 /* The special short form moves between memory and al/ax/eax */
1276 movb_al_a32:    lwbrx offset,eip,one; lbz r0,AL(state); lbzu opcode,5(eip)
1277         stbx r0,MEM; GOTNEXT
1278 
1279 movb_al_a16:    lhbrx offset,eip,one; lbz r0,AL(state); lbzu opcode,3(eip)
1280         stbx r0,MEM; GOTNEXT
1281 
1282 movw_ax_a32:    lwbrx offset,eip,one; lhz r0,AX(state); lbzu opcode,5(eip)
1283         sthx r0,MEM; GOTNEXT
1284 
1285 movw_ax_a16:    lhbrx offset,eip,one; lhz r0,AX(state); lbzu opcode,3(eip)
1286         sthx r0,MEM; GOTNEXT
1287 
1288 movl_eax_a32:   lwbrx offset,eip,one; lwz r0,EAX(state); lbzu opcode,5(eip)
1289         stwx r0,MEM; GOTNEXT
1290 
1291 movl_eax_a16:   lhbrx offset,eip,one; lwz r0,EAX(state); lbzu opcode,3(eip)
1292         stwx r0,MEM; GOTNEXT
1293 
1294 movb_a32_al:    lwbrx offset,eip,one; lbzu opcode,5(eip); lbzx r0,MEM
1295         stb r0,AL(state); GOTNEXT
1296 
1297 movb_a16_al:    lhbrx offset,eip,one; lbzu opcode,3(eip); lbzx r0,MEM
1298         stb r0,AL(state); GOTNEXT
1299 
1300 movw_a32_ax:    lwbrx offset,eip,one; lbzu opcode,5(eip); lhzx r0,MEM
1301         sth r0,AX(state); GOTNEXT
1302 
1303 movw_a16_ax:    lhbrx offset,eip,one; lbzu opcode,3(eip); lhzx r0,MEM
1304         sth r0,AX(state); GOTNEXT
1305 
1306 movl_a32_eax:   lwbrx offset,eip,one; lbzu opcode,5(eip); lwzx r0,MEM
1307         stw r0,EAX(state); GOTNEXT
1308 
1309 movl_a16_eax:   lhbrx offset,eip,one; lbzu opcode,3(eip); lwzx r0,MEM
1310         stw r0,EAX(state); GOTNEXT
1311 
1312 /* General purpose move (all are exactly 4 instructions long) */
1313         .align 4
1314 movb_reg_mem:   lbzx r0,REG
1315         NEXTBYTE(opcode)
1316         stbx r0,MEM
1317         GOTNEXT
1318 
1319 movw_reg_mem:   lhzx r0,REG
1320         NEXTBYTE(opcode)
1321         sthx r0,MEM
1322         GOTNEXT
1323 
1324 movl_reg_mem:   lwzx r0,REG
1325         NEXTBYTE(opcode)
1326         stwx r0,MEM
1327         GOTNEXT
1328 
1329 movb_mem_reg:   lbzx r0,MEM
1330         NEXTBYTE(opcode)
1331         stbx r0,REG
1332         GOTNEXT
1333 
1334 movw_mem_reg:   lhzx r0,MEM
1335         NEXTBYTE(opcode)
1336         sthx r0,REG
1337         GOTNEXT
1338 
1339 movl_mem_reg:   lwzx r0,MEM
1340         NEXTBYTE(opcode)
1341         stwx r0,REG
1342         GOTNEXT
1343 
1344 /* short form exchange ax/eax with register */
1345 xchgw_ax_reg:   clrlslwi opreg,opcode,29,2
1346         lhz r3,AX(state)
1347         lhzx r4,REG
1348         sthx r3,REG
1349         sth r4,AX(state)
1350         NEXT
1351 
1352 xchgl_eax_reg:  clrlslwi opreg,opcode,29,2
1353         lwz r3,EAX(state)
1354         lwzx r4,REG
1355         stwx r3,REG
1356         stw r4,EAX(state)
1357         NEXT
1358 
1359 /* General exchange (unlocked!) */
1360 xchgb_reg_mem:  lbzx r3,MEM
1361         lbzx r4,REG
1362         NEXTBYTE(opcode)
1363         stbx r3,REG
1364         stbx r4,MEM
1365         GOTNEXT
1366 
1367 xchgw_reg_mem:  lhzx r3,MEM
1368         lhzx r4,REG
1369         sthx r3,REG
1370         sthx r4,MEM
1371         NEXT
1372 
1373 xchgl_reg_mem:  lwzx r3,MEM
1374         lwzx r4,REG
1375         stwx r3,REG
1376         stwx r4,MEM
1377         NEXT
1378 
1379 /* lea, one of the simplest instructions */
1380 leaw:       cmpw base,state
1381         beq- ud
1382         sthbrx offset,REG
1383         NEXT
1384 
1385 leal:       cmpw base,state
1386         beq- ud
1387         stwbrx offset,REG
1388         NEXT
1389 
1390 /* Short form pushes and pops */
1391 pushw_sp_reg:   li r3,SP
1392         lhbrx r4,state,r3
1393         clrlslwi opreg,opcode,29,2
1394         lhzx r0,REG
1395         addi r4,r4,-2
1396         sthbrx r4,state,r3
1397         clrlwi r4,r4,16
1398         sthx r0,ssb,r4
1399         NEXT
1400 
1401 pushl_sp_reg:   li r3,SP
1402         lhbrx r4,state,r3
1403         clrlslwi opreg,opcode,29,2
1404         lwzx r0,REG
1405         addi r4,r4,-4
1406         sthbrx r4,state,r3
1407         clrlwi r4,r4,16
1408         stwx r0,ssb,r4
1409         NEXT
1410 
1411 popw_sp_reg:    li r3,SP
1412         lhbrx r4,state,r3
1413         clrlslwi opreg,opcode,29,2
1414         lhzx r0,ssb,r4
1415         addi r4,r4,2        # order is important in case of pop sp
1416         sthbrx r4,state,r3
1417         sthx r0,REG
1418         NEXT
1419 
1420 popl_sp_reg:    li r3,SP
1421         lhbrx r4,state,r3
1422         clrlslwi opreg,opcode,29,2
1423         lwzx r0,ssb,r4
1424         addi r4,r4,4
1425         sthbrx r4,state,r3
1426         stwx r0,REG
1427         NEXT
1428 
1429 /* Push immediate */
1430 pushw_sp_imm:   li r3,SP
1431         lhbrx r4,state,r3
1432         lhz r0,1(eip)
1433         addi r4,r4,-2
1434         sthbrx r4,state,r3
1435         clrlwi r4,r4,16
1436         lbzu opcode,3(eip)
1437         sthx r0,ssb,r4
1438         GOTNEXT
1439 
1440 pushl_sp_imm:   li r3,SP
1441         lhbrx r4,state,r3
1442         lwz r0,1(eip)
1443         addi r4,r4,-4
1444         sthbrx r4,state,r3
1445         clrlwi r4,r4,16
1446         lbzu opcode,5(eip)
1447         stwx r0,ssb,r4
1448         GOTNEXT
1449 
1450 pushw_sp_imm8:  li r3,SP
1451         lhbrx r4,state,r3
1452         lhz r0,1(eip)
1453         addi r4,r4,-2
1454         sthbrx r4,state,r3
1455         clrlwi r4,r4,16
1456         lbzu opcode,2(eip)
1457         extsb r0,r0
1458         sthx r0,ssb,r4
1459         GOTNEXT
1460 
1461 pushl_sp_imm8:  li r3,SP
1462         lhbrx r4,state,r3
1463         lhz r0,1(eip)
1464         addi r4,r4,-4
1465         sthbrx r4,state,r3
1466         clrlwi r4,r4,16
1467         lbzu opcode,2(eip)
1468         extsb r0,r0
1469         stwx r0,ssb,r4
1470         GOTNEXT
1471 
1472 /* General push/pop */
1473 pushw_sp:   lhbrx r0,MEM
1474         li r3,SP
1475         lhbrx r4,state,r3
1476         addi r4,r4,-2
1477         sthbrx r4,state,r3
1478         clrlwi r4,r4,16
1479         sthbrx r0,r4,ssb
1480         NEXT
1481 
1482 pushl_sp:   lwbrx r0,MEM
1483         li r3,SP
1484         lhbrx r4,state,r3
1485         addi r4,r4,-4
1486         sthbrx r4,state,r3
1487         clrlwi r4,r4,16
1488         stwbrx r0,r4,ssb
1489         NEXT
1490 
1491 /* pop is an exception with 32 bit addressing modes, it is possible
1492 to calculate wrongly the address when esp is used as base. But 16 bit
1493 addressing modes are safe */
1494 
1495 popw_sp_a16:    cmpw cr1,opreg,0    # first check the opcode
1496         li r3,SP
1497         lhbrx r4,state,r3
1498         bne- cr1,ud
1499         lhzx r0,ssb,r4
1500         addi r4,r4,2
1501         sthx r0,MEM
1502         sthbrx r4,state,r3
1503         NEXT
1504 
1505 popl_sp_a16:    cmpw cr1,opreg,0
1506         li r3,SP
1507         lhbrx r4,state,r3
1508         bne- cr1,ud
1509         lwzx r0,ssb,r4
1510         addi r4,r4,2
1511         stwx r0,MEM
1512         sthbrx r4,state,r3
1513         NEXT
1514 
1515 /* 32 bit addressing modes for pop not implemented for now. */
1516         .equ popw_sp_a32,unimpl
1517         .equ popl_sp_a32,unimpl
1518 
1519 /* pusha/popa */
1520 pushaw_sp:  li r3,SP
1521         li r0,8
1522         lhbrx r4,r3,state
1523         mtctr r0
1524         addi r5,state,-4
1525 1:      addi r4,r4,-2
1526         lhzu r6,4(r5)
1527         clrlwi r4,r4,16
1528         sthx r6,ssb,r4
1529         bdnz 1b
1530         sthbrx r4,r3,state  # new sp
1531         NEXT
1532 
1533 pushal_sp:  li r3,SP
1534         li r0,8
1535         lhbrx r4,r3,state
1536         mtctr r0
1537         addi r5,state,-4
1538 1:      addi r4,r4,-4
1539         lwzu r6,4(r5)
1540         clrlwi r4,r4,16
1541         stwx r6,ssb,r4
1542         bdnz 1b
1543         sthbrx r4,r3,state  # new sp
1544         NEXT
1545 
1546 popaw_sp:   li r3,SP
1547         li r0,8
1548         lhbrx r4,state,r3
1549         mtctr r0
1550         addi r5,state,32
1551 1:      lhzx r6,ssb,r4
1552         addi r4,r4,2
1553         sthu r6,-4(r5)
1554         clrlwi r4,r4,16
1555         bdnz 1b
1556         sthbrx r4,r3,state  # updated sp
1557         NEXT
1558 
1559 popal_sp:   li r3,SP
1560         lis r0,0xef00       # mask to skip esp
1561         lhbrx r4,state,r3
1562         addi r5,state,32
1563 1:      add. r0,r0,r0
1564         lwzx r6,ssb,r4
1565         addi r4,r4,4
1566         stwu r6,-4(r5)
1567         clrlwi r4,r4,16
1568         blt 1b
1569         addi r6,r6,-4
1570         beq 2f
1571         addi r4,r4,4
1572         clrlwi r4,r4,16
1573         b 1b
1574 2:      sthbrx r4,state,r3  # updated sp
1575         NEXT
1576 
1577 /* Moves with zero or sign extension: first the special cases */
1578 cbw:        lbz r3,AL(state)
1579         extsb r3,r3
1580         sthbrx r3,AX,state
1581         NEXT
1582 
1583 cwde:       lhbrx r3,AX,state
1584         extsh r3,r3
1585         stwbrx r3,EAX,state
1586         NEXT
1587 
1588 cwd:        lbz r3,AH(state)
1589         extsb r3,r3
1590         srwi r3,r3,8            # get sign bits
1591         sth r3,DX(state)
1592         NEXT
1593 
1594 cdq:        lwbrx r3,EAX,state
1595         srawi r3,r3,31
1596         stw r3,EDX(state)       # byte order unimportant !
1597         NEXT
1598 
1599 /* The move with zero or sign extension are special since the source
1600 and destination are not the same size. The register describing the destination
1601 is modified to take this into account. */
1602 
1603 movsbw:     lbzx r3,MEM
1604         rlwimi opreg,opreg,4,0x10
1605         extsb r3,r3
1606         rlwinm opreg,opreg,0,0x1c
1607         sthbrx r3,REG
1608         NEXT
1609 
1610 movsbl:     lbzx r3,MEM
1611         rlwimi opreg,opreg,4,0x10
1612         extsb r3,r3
1613         rlwinm opreg,opreg,0,0x1c
1614         stwbrx r3,REG
1615         NEXT
1616 
1617         .equ movsww, movw_mem_reg
1618 
1619 movswl:     lhbrx r3,MEM
1620         extsh r3,r3
1621         stwbrx r3,REG
1622         NEXT
1623 
1624 movzbw:     lbzx r3,MEM
1625         rlwimi opreg,opreg,4,0x10
1626         rlwinm opreg,opreg,0,0x1c
1627         sthbrx r3,REG
1628         NEXT
1629 
1630 movzbl:     lbzx r3,MEM
1631         rlwimi opreg,opreg,4,0x10
1632         rlwinm opreg,opreg,0,0x1c
1633         stwbrx r3,REG
1634         NEXT
1635 
1636         .equ movzww, movw_mem_reg
1637 
1638 movzwl:     lhbrx r3,MEM
1639         stwbrx r3,REG
1640         NEXT
1641 
1642 /* Byte swapping */
1643 bswap:      clrlslwi opreg,opcode,29,2  # extract reg from opcode
1644         lwbrx r0,REG
1645         stwx r0,REG
1646         NEXT
1647 
1648 /* Input/output */
1649 inb_port_al:    NEXTBYTE(r4)
1650         b 1f
1651 inb_dx_al:  li r4,DX
1652         lhbrx r4,r4,state
1653 1:      li r3,code_inb
1654         bl _check_port
1655         lwz r3,iobase(state)
1656         lbzx r5,r4,r3
1657         eieio
1658         stb r5,AL(state)
1659         NEXT
1660 
1661 inw_port_ax:    NEXTBYTE(r4)
1662         b 1f
1663 inw_dx_ax:  li r4,DX
1664         lhbrx r4,r4,state
1665 1:      li r3,code_inw
1666         bl _check_port
1667         lwz r3,iobase(state)
1668         lhzx r5,r4,r3
1669         eieio
1670         sth r5,AX(state)
1671         NEXT
1672 
1673 inl_port_eax:   NEXTBYTE(r4)
1674         b 1f
1675 inl_dx_eax: li r4,DX
1676         lhbrx r4,r4,state
1677 1:      li r3,code_inl
1678         bl _check_port
1679         lwz r3,iobase(state)
1680         lwzx r5,r4,r3
1681         eieio
1682         stw r5,EAX(state)
1683         NEXT
1684 
1685 outb_al_port:   NEXTBYTE(r4)
1686         b 1f
1687 outb_al_dx: li r4,DX
1688         lhbrx r4,r4,state
1689 1:      li r3,code_outb
1690         bl _check_port
1691         lwz r3,iobase(state)
1692         lbz r5,AL(state)
1693         stbx r5,r4,r3
1694         eieio
1695         NEXT
1696 
1697 outw_ax_port:   NEXTBYTE(r4)
1698         b 1f
1699 outw_ax_dx: li r4,DX
1700         lhbrx r4,r4,state
1701 1:      li r3,code_outw
1702         bl _check_port
1703         lwz r3,iobase(state)
1704         lhz r5,AX(state)
1705         sthx r5,r4,r3
1706         eieio
1707         NEXT
1708 
1709 outl_eax_port:  NEXTBYTE(r4)
1710         b 1f
1711 outl_eax_dx:    li r4,DX
1712         lhbrx r4,r4,state
1713 1:      li r3,code_outl
1714         bl _check_port
1715         lwz r4,iobase(state)
1716         lwz r5,EAX(state)
1717         stwx r5,r4,r3
1718         eieio
1719         NEXT
1720 
1721 /* Macro used for add and sub */
1722 #define ARITH(op,fl) \
1723 op##b_reg_mem:  lbzx op1,MEM; SET_FLAGS(fl(B)); lbzx op2,REG; \
1724         op result,op1,op2; \
1725         stbx result,MEM; NEXT; \
1726 op##w_reg_mem:  lhbrx op1,MEM; SET_FLAGS(fl(W)); lhbrx op2,REG; \
1727         op result,op1,op2; \
1728         sthbrx result,MEM; NEXT; \
1729 op##l_reg_mem:  lwbrx op1,MEM; SET_FLAGS(fl(L)); lwbrx op2,REG; \
1730         op result,op1,op2; \
1731         stwbrx result,MEM; NEXT; \
1732 op##b_mem_reg:  lbzx op2,MEM; SET_FLAGS(fl(B)); lbzx op1,REG; \
1733         op result,op1,op2; \
1734         stbx result,REG; NEXT; \
1735 op##w_mem_reg:  lhbrx op2,MEM; SET_FLAGS(fl(W)); lhbrx op1,REG; \
1736         op result,op1,op2; \
1737         sthbrx result,REG; NEXT; \
1738 op##l_mem_reg:  lwbrx op2,MEM; SET_FLAGS(fl(L)); lwbrx op1,REG; \
1739         op result,op1,op2; \
1740         stwbrx result,REG; NEXT; \
1741 op##b_imm_al:   addi base,state,0; li offset,AL; \
1742 op##b_imm:  lbzx op1,MEM; SET_FLAGS(fl(B)); lbz op2,1(eip); \
1743         op result,op1,op2; \
1744         lbzu opcode,2(eip); \
1745         stbx result,MEM; GOTNEXT; \
1746 op##w_imm_ax:   addi base,state,0; li offset,AX; \
1747 op##w_imm:  lhbrx op1,MEM; SET_FLAGS(fl(W)); lhbrx op2,eip,one; \
1748         op result,op1,op2; \
1749         lbzu opcode,3(eip); \
1750         sthbrx result,MEM; GOTNEXT; \
1751 op##w_imm8: lbz op2,1(eip); SET_FLAGS(fl(W)); lhbrx op1,MEM; \
1752         extsb op2,op2; clrlwi op2,op2,16; \
1753         op result,op1,op2; \
1754         lbzu opcode,2(eip); \
1755         sthbrx result,MEM; GOTNEXT; \
1756 op##l_imm_eax:  addi base,state,0; li offset,EAX; \
1757 op##l_imm:  lwbrx op1,MEM; SET_FLAGS(fl(L)); lwbrx op2,eip,one; \
1758         op result,op1,op2; lbzu opcode,5(eip); \
1759         stwbrx result,MEM; GOTNEXT; \
1760 op##l_imm8: lbz op2,1(eip); SET_FLAGS(fl(L)); lwbrx op1,MEM; \
1761         extsb op2,op2; lbzu opcode,2(eip); \
1762         op result,op1,op2; \
1763         stwbrx result,MEM; GOTNEXT
1764 
1765         ARITH(add, FLAGS_ADD)
1766         ARITH(sub, FLAGS_SUB)
1767 
1768 #define adc(result, op1, op2) adde result,op1,op2
1769 #define sbb(result, op1, op2) subfe result,op2,op1
1770 
1771 #define ARITH_WITH_CARRY(op, fl) \
1772 op##b_reg_mem:  lbzx op1,MEM; bl carryfor##op; lbzx op2,REG; \
1773         ADD_FLAGS(fl(B)); op(result, op1, op2); \
1774         stbx result,MEM; NEXT; \
1775 op##w_reg_mem:  lhbrx op1,MEM; bl carryfor##op; lhbrx op2,REG; \
1776         ADD_FLAGS(fl(W)); op(result, op1, op2); \
1777         sthbrx result,MEM; NEXT; \
1778 op##l_reg_mem:  lwbrx op1,MEM; bl carryfor##op; lwbrx op2,REG; \
1779         ADD_FLAGS(fl(L)); op(result, op1, op2); \
1780         stwbrx result,MEM; NEXT; \
1781 op##b_mem_reg:  lbzx op1,MEM; bl carryfor##op; lbzx op2,REG; \
1782         ADD_FLAGS(fl(B)); op(result, op1, op2); \
1783         stbx result,REG; NEXT; \
1784 op##w_mem_reg:  lhbrx op1,MEM; bl carryfor##op; lhbrx op2,REG; \
1785         ADD_FLAGS(fl(W)); op(result, op1, op2); \
1786         sthbrx result,REG; NEXT; \
1787 op##l_mem_reg:  lwbrx op1,MEM; bl carryfor##op; lwbrx op2,REG; \
1788         ADD_FLAGS(fl(L)); op(result, op1, op2); \
1789         stwbrx result,REG; NEXT; \
1790 op##b_imm_al:   addi base,state,0; li offset,AL; \
1791 op##b_imm:  lbzx op1,MEM; bl carryfor##op; lbz op2,1(eip); \
1792         ADD_FLAGS(fl(B)); lbzu opcode,2(eip); op(result, op1, op2); \
1793         stbx result,MEM; GOTNEXT; \
1794 op##w_imm_ax:   addi base,state,0; li offset,AX; \
1795 op##w_imm:  lhbrx op1,MEM; bl carryfor##op; lhbrx op2,eip,one; \
1796         ADD_FLAGS(fl(W)); lbzu opcode,3(eip); op(result, op1, op2); \
1797         sthbrx result,MEM; GOTNEXT; \
1798 op##w_imm8: lbz op2,1(eip); bl carryfor##op; lhbrx op1,MEM; \
1799         extsb op2,op2; ADD_FLAGS(fl(W)); clrlwi op2,op2,16; \
1800         lbzu opcode,2(eip); op(result, op1, op2); \
1801         sthbrx result,MEM; GOTNEXT; \
1802 op##l_imm_eax:  addi base,state,0; li offset,EAX; \
1803 op##l_imm:  lwbrx op1,MEM; bl carryfor##op; lwbrx op2,eip,one; \
1804         ADD_FLAGS(fl(L)); lbzu opcode,5(eip); op(result, op1, op2); \
1805         stwbrx result,MEM; GOTNEXT; \
1806 op##l_imm8: lbz op2,1(eip); SET_FLAGS(fl(L)); lwbrx op1,MEM; \
1807         extsb op2,op2; lbzu opcode,2(eip); \
1808         op(result, op1, op2); \
1809         stwbrx result,MEM; GOTNEXT
1810 
1811 carryforadc:    addc r3,flags,flags     # CF_IN to xer[ca]
1812         RES2CF(r4)          # get 8 or 16 bit carry
1813         subfe r3,result,op1     # generate PPC carry for
1814         CF_ROTCNT(r5)           # preceding operation
1815         addze r3,r4         # 32 bit carry in LSB
1816         CF_POL(r4,23)           # polarity
1817         rlwnm r3,r3,r5,0x100        # shift carry there
1818         xor flags,r4,r3         # CF86 ? 0x100 : 0
1819         addic r4,r3,0xffffff00      # set xer[ca]
1820         rlwinm flags,r3,23,CF_IN
1821         blr
1822 
1823         ARITH_WITH_CARRY(adc, FLAGS_ADD)
1824 
1825 /* for sbb the input carry must be the complement of the x86 carry */
1826 carryforsbb:    addc r3,flags,flags     # CF_IN to xer[ca]
1827         RES2CF(r4)          # 8/16 bit carry from result
1828         subfe r3,result,op1
1829         CF_ROTCNT(r5)
1830         addze r3,r4
1831         CF_POL(r4,23)
1832         rlwnm r3,r3,r5,0x100
1833         eqv flags,r4,r3         # CF86 ? 0xfffffeff:0xffffffff
1834         addic r4,r3,1           # set xer[ca]
1835         rlwinm flags,r3,23,CF_IN    # keep only the carry
1836         blr
1837 
1838         ARITH_WITH_CARRY(sbb, FLAGS_SBB)
1839 
1840 cmpb_reg_mem:   lbzx op1,MEM
1841         SET_FLAGS(FLAGS_CMP(B))
1842         lbzx op2,REG
1843         extsb r3,op1
1844         cmplw cr4,op1,op2
1845         extsb r4,op2
1846         sub result,op1,op2
1847         cmpw cr6,r3,r4
1848         NEXT
1849 
1850 cmpw_reg_mem:   lhbrx op1,MEM
1851         SET_FLAGS(FLAGS_CMP(W))
1852         lhbrx op2,REG
1853         extsh r3,op1
1854         cmplw cr4,op1,op2
1855         extsh r4,op2
1856         sub result,op1,op2
1857         cmpw cr6,r3,r4
1858         NEXT
1859 
1860 cmpl_reg_mem:   lwbrx op1,MEM
1861         SET_FLAGS(FLAGS_CMP(L))
1862         lwbrx op2,REG
1863         cmplw cr4,op1,op2
1864         sub result,op1,op2
1865         cmpw cr6,op1,op2
1866         NEXT
1867 
1868 cmpb_mem_reg:   lbzx op2,MEM
1869         SET_FLAGS(FLAGS_CMP(B))
1870         lbzx op1,REG
1871         extsb r4,op2
1872         cmplw cr4,op1,op2
1873         extsb r3,op1
1874         sub result,op1,op2
1875         cmpw cr6,r3,r4
1876         NEXT
1877 
1878 cmpw_mem_reg:   lhbrx op2,MEM
1879         SET_FLAGS(FLAGS_CMP(W))
1880         lhbrx op1,REG
1881         extsh r4,op2
1882         cmplw cr4,op1,op2
1883         extsh r3,op1
1884         sub result,op1,op2
1885         cmpw cr6,r3,r4
1886         NEXT
1887 
1888 cmpl_mem_reg:   lwbrx op2,MEM
1889         SET_FLAGS(FLAGS_CMP(L))
1890         lwbrx op1,REG
1891         cmpw cr6,op1,op2
1892         sub result,op1,op2
1893         cmplw cr4,op1,op2
1894         NEXT
1895 
1896 cmpb_imm_al:    addi base,state,0
1897         li offset,AL
1898 cmpb_imm:   lbzx op1,MEM
1899         SET_FLAGS(FLAGS_CMP(B))
1900         lbz op2,1(eip)
1901         extsb r3,op1
1902         cmplw cr4,op1,op2
1903         lbzu opcode,2(eip)
1904         extsb r4,op2
1905         sub result,op1,op2
1906         cmpw cr6,r3,r4
1907         GOTNEXT
1908 
1909 cmpw_imm_ax:    addi base,state,0
1910         li offset,AX
1911 cmpw_imm:   lhbrx op1,MEM
1912         SET_FLAGS(FLAGS_CMP(W))
1913         lhbrx op2,eip,one
1914         extsh r3,op1
1915         cmplw cr4,op1,op2
1916         lbzu opcode,3(eip)
1917         extsh r4,op2
1918         sub result,op1,op2
1919         cmpw cr6,r3,r4
1920         GOTNEXT
1921 
1922 cmpw_imm8:  lbz op2,1(eip)
1923         SET_FLAGS(FLAGS_CMP(W))
1924         lhbrx op1,MEM
1925         extsb r4,op2
1926         extsh r3,op1
1927         lbzu opcode,2(eip)
1928         clrlwi op2,r4,16
1929         cmpw cr6,r3,r4
1930         sub result,op1,op2
1931         cmplw cr4,op1,op2
1932         GOTNEXT
1933 
1934 cmpl_imm_eax:   addi base,state,0
1935         li offset,EAX
1936 cmpl_imm:   lwbrx op1,MEM
1937         SET_FLAGS(FLAGS_CMP(L))
1938         lwbrx op2,eip,one
1939         cmpw cr6,op1,op2
1940         lbzu opcode,5(eip)
1941         sub result,op1,op2
1942         cmplw cr4,op1,op2
1943         GOTNEXT
1944 
1945 cmpl_imm8:  lbz op2,1(eip)
1946         SET_FLAGS(FLAGS_CMP(L))
1947         lwbrx op1,MEM
1948         extsb op2,op2
1949         lbzu opcode,2(eip)
1950         cmpw cr6,op1,op2
1951         sub result,op1,op2
1952         cmplw cr4,op1,op2
1953         GOTNEXT
1954 
1955 /* Increment and decrement */
1956 incb:       lbzx op2,MEM
1957         INC_FLAGS(B)
1958         addi op2,op2,1
1959         stbx op2,MEM
1960         NEXT
1961 
1962 incw_reg:   clrlslwi opreg,opcode,29,2  # extract reg from opcode
1963         lhbrx op2,REG
1964         INC_FLAGS(W)
1965         addi op2,op2,1
1966         sthbrx op2,REG
1967         NEXT
1968 
1969 incw:       lhbrx op2,MEM
1970         INC_FLAGS(W)
1971         addi op2,op2,1
1972         sthbrx op2,MEM
1973         NEXT
1974 
1975 incl_reg:   clrlslwi opreg,opcode,29,2
1976         lwbrx op2,REG
1977         INC_FLAGS(L)
1978         addi op2,op2,1
1979         sthbrx op2,REG
1980         NEXT
1981 
1982 incl:       lwbrx op2,MEM
1983         INC_FLAGS(L)
1984         addi op2,op2,1
1985         stwbrx op2,MEM
1986         NEXT
1987 
1988 decb:       lbzx op2,MEM
1989         DEC_FLAGS(B)
1990         addi op2,op2,-1
1991         stbx op2,MEM
1992         NEXT
1993 
1994 decw_reg:   clrlslwi opreg,opcode,29,2  # extract reg from opcode
1995         lhbrx op2,REG
1996         DEC_FLAGS(W)
1997         addi op2,op2,-1
1998         sthbrx op2,REG
1999         NEXT
2000 
2001 decw:       lhbrx op2,MEM
2002         DEC_FLAGS(W)
2003         addi op2,op2,-1
2004         sthbrx op2,MEM
2005         NEXT
2006 
2007 decl_reg:   clrlslwi opreg,opcode,29,2
2008         lwbrx op2,REG
2009         DEC_FLAGS(L)
2010         addi op2,op2,-1
2011         sthbrx op2,REG
2012         NEXT
2013 
2014 decl:       lwbrx op2,MEM
2015         DEC_FLAGS(L)
2016         addi op2,op2,-1
2017         stwbrx op2,MEM
2018         NEXT
2019 
2020 negb:       lbzx op2,MEM
2021         SET_FLAGS(FLAGS_SUB(B))
2022         neg result,op2
2023         li op1,0
2024         stbx result,MEM
2025         NEXT
2026 
2027 negw:       lhbrx op2,MEM
2028         SET_FLAGS(FLAGS_SUB(W))
2029         neg result,op2
2030         li op1,0
2031         sthbrx r0,MEM
2032         NEXT
2033 
2034 negl:       lwbrx op2,MEM
2035         SET_FLAGS(FLAGS_SUB(L))
2036         subfic result,op2,0
2037         li op1,0
2038         stwbrx result,MEM
2039         NEXT
2040 
2041 /* Macro used to generate code for OR/AND/XOR */
2042 #define LOGICAL(op) \
2043 op##b_reg_mem:  lbzx op1,MEM; SET_FLAGS(FLAGS_LOG(B)); lbzx op2,REG; \
2044         op result,op1,op2; \
2045         stbx result,MEM; NEXT; \
2046 op##w_reg_mem:  lhbrx op1,MEM; SET_FLAGS(FLAGS_LOG(W)); lhbrx op2,REG; \
2047         op result,op1,op2; \
2048         sthbrx result,MEM; NEXT; \
2049 op##l_reg_mem:  lwbrx op1,MEM; SET_FLAGS(FLAGS_LOG(L)); lwbrx op2,REG; \
2050         op result,op1,op2; \
2051         stwbrx result,MEM; NEXT; \
2052 op##b_mem_reg:  lbzx op1,MEM; SET_FLAGS(FLAGS_LOG(B)); lbzx op2,REG; \
2053         op result,op1,op2; \
2054         stbx result,REG; NEXT; \
2055 op##w_mem_reg:  lhbrx op2,MEM; SET_FLAGS(FLAGS_LOG(W)); lhbrx op1,REG; \
2056         op result,op1,op2; \
2057         sthbrx result,REG; NEXT; \
2058 op##l_mem_reg:  lwbrx op2,MEM; SET_FLAGS(FLAGS_LOG(L)); lwbrx op1,REG; \
2059         op result,op1,op2; \
2060         stwbrx result,REG; NEXT; \
2061 op##b_imm_al:   addi base,state,0; li offset,AL; \
2062 op##b_imm:  lbzx op1,MEM; SET_FLAGS(FLAGS_LOG(B)); lbz op2,1(eip); \
2063         op result,op1,op2; lbzu opcode,2(eip); \
2064         stbx result,MEM; GOTNEXT; \
2065 op##w_imm_ax:   addi base,state,0; li offset,AX; \
2066 op##w_imm:  lhbrx op1,MEM; SET_FLAGS(FLAGS_LOG(W)); lhbrx op2,eip,one; \
2067         op result,op1,op2; lbzu opcode,3(eip); \
2068         sthbrx result,MEM; GOTNEXT; \
2069 op##w_imm8: lbz op2,1(eip); SET_FLAGS(FLAGS_LOG(W)); lhbrx op1,MEM; \
2070         extsb op2,op2; lbzu opcode,2(eip); \
2071         op result,op1,op2; \
2072         sthbrx result,MEM; GOTNEXT; \
2073 op##l_imm_eax:  addi base,state,0; li offset,EAX; \
2074 op##l_imm:  lwbrx op1,MEM; SET_FLAGS(FLAGS_LOG(L)); lwbrx op2,eip,one; \
2075         op result,op1,op2; lbzu opcode,5(eip); \
2076         stwbrx result,MEM; GOTNEXT; \
2077 op##l_imm8: lbz op2,1(eip); SET_FLAGS(FLAGS_LOG(L)); lwbrx op1,MEM; \
2078         extsb op2,op2; lbzu opcode,2(eip); \
2079         op result,op1,op2; \
2080         stwbrx result,MEM; GOTNEXT
2081 
2082         LOGICAL(or)
2083 
2084         LOGICAL(and)
2085 
2086         LOGICAL(xor)
2087 
2088 testb_reg_mem:  lbzx op1,MEM
2089         SET_FLAGS(FLAGS_TEST(B))
2090         lbzx op2,REG
2091         and result,op1,op2
2092         extsb r3,result
2093         cmpwi cr6,r3,0
2094         NEXT
2095 
2096 testw_reg_mem:  lhbrx op1,MEM
2097         SET_FLAGS(FLAGS_TEST(W))
2098         lhbrx op2,REG
2099         and result,op1,op2
2100         extsh r3,result
2101         cmpwi cr6,r3,0
2102         NEXT
2103 
2104 testl_reg_mem:  lwbrx r3,MEM
2105         SET_FLAGS(FLAGS_TEST(L))
2106         lwbrx r4,REG
2107         and result,op1,op2
2108         cmpwi cr6,result,0
2109         NEXT
2110 
2111 testb_imm_al:   addi base,state,0
2112         li offset,AL
2113 testb_imm:  lbzx op1,MEM
2114         SET_FLAGS(FLAGS_TEST(B))
2115         lbz op2,1(eip)
2116         and result,op1,op2
2117         lbzu opcode,2(eip)
2118         extsb r3,result
2119         cmpwi cr6,r3,0
2120         GOTNEXT
2121 
2122 testw_imm_ax:   addi base,state,0
2123         li offset,AX
2124 testw_imm:  lhbrx op1,MEM
2125         SET_FLAGS(FLAGS_TEST(W))
2126         lhbrx op2,eip,one
2127         and result,op1,op2
2128         lbzu opcode,3(eip)
2129         extsh r3,result
2130         cmpwi cr6,r3,0
2131         GOTNEXT
2132 
2133 testl_imm_eax:  addi base,state,0
2134         li offset,EAX
2135 testl_imm:  lwbrx op1,MEM
2136         SET_FLAGS(FLAGS_TEST(L))
2137         lwbrx op2,eip,one
2138         and result,r3,r4
2139         lbzu opcode,5(eip)
2140         cmpwi cr6,result,0
2141         GOTNEXT
2142 
2143 /* Not does not affect flags */
2144 notb:       lbzx r3,MEM
2145         xori r3,r3,255
2146         stbx r3,MEM
2147         NEXT
2148 
2149 notw:       lhzx r3,MEM
2150         xori r3,r3,65535
2151         sthx r3,MEM
2152         NEXT
2153 
2154 notl:       lwzx r3,MEM
2155         not r3,r3
2156         stwx r3,MEM
2157         NEXT
2158 
2159 boundw:     lhbrx r4,REG
2160         li r3,code_bound
2161         lhbrx r5,MEM
2162         addi offset,offset,2
2163         extsh r4,r4
2164         lhbrx r6,MEM
2165         extsh r5,r5
2166         cmpw r4,r5
2167         extsh r6,r6
2168         blt- complex
2169         cmpw r4,r6
2170         ble+ nop
2171         b complex
2172 
2173 boundl:     lwbrx r4,REG
2174         li r3,code_bound
2175         lwbrx r5,MEM
2176         addi offset,offset,4
2177         lwbrx r6,MEM
2178         cmpw r4,r5
2179         blt- complex
2180         cmpw r4,r6
2181         ble+ nop
2182         b complex
2183 
2184 /* Bit test and modify instructions */
2185 
2186 /* Common routine: bit index in op2, returns memory value in r3, mask in op2,
2187 and of mask and value in op1. CF flag is set as with 32 bit add when bit is
2188 non zero since result (which is cleared) will be less than op1, and in cr4,
2189 all other flags are undefined from Intel doc. Here OF and SF are cleared
2190 and ZF is set as a side effect of result being cleared.  */
2191 _setup_bitw:    cmpw base,state
2192         SET_FLAGS(FLAGS_BTEST)
2193         extsh op2,op2
2194         beq- 1f
2195         srawi r4,op2,4
2196         add offset,offset,r4
2197 1:      clrlwi op2,op2,28       # true bit index
2198         lhbrx r3,MEM
2199         slw op2,one,op2         # build mask
2200         li result,0         # implicitly sets CF
2201         and op1,r3,op2          # if result<op1
2202         cmplw cr4,result,op1        # sets CF in cr4
2203         blr
2204 
2205 _setup_bitl:    cmpw base,state
2206         SET_FLAGS(FLAGS_BTEST)
2207         beq- 1f
2208         srawi r4,op2,5
2209         add offset,offset,r4
2210 1:      lwbrx r3,MEM
2211         rotlw op2,one,op2       # build mask
2212         li result,0
2213         and op1,r3,op2
2214         cmplw cr4,result,op1
2215         blr
2216 
2217 /* Immediate forms bit tests are not frequent since logical are often faster */
2218 btw_imm:    NEXTBYTE(op2)
2219         b 1f
2220 btw_reg_mem:    lhbrx op2,REG
2221 1:      bl _setup_bitw
2222         NEXT
2223 
2224 btl_imm:    NEXTBYTE(op2)
2225         b 1f
2226 btl_reg_mem:    lhbrx op2,REG
2227 1:      bl _setup_bitl
2228         NEXT
2229 
2230 btcw_imm:   NEXTBYTE(op2)
2231         b 1f
2232 btcw_reg_mem:   lhbrx op2,REG
2233 1:      bl _setup_bitw
2234         xor r3,r3,op2
2235         sthbrx r3,MEM
2236         NEXT
2237 
2238 btcl_imm:   NEXTBYTE(op2)
2239         b 1f
2240 btcl_reg_mem:   lhbrx op2,REG
2241 1:      bl _setup_bitl
2242         xor r3,r3,op2
2243         stwbrx result,MEM
2244         NEXT
2245 
2246 btrw_imm:   NEXTBYTE(op2)
2247         b 1f
2248 btrw_reg_mem:   lhbrx op2,REG
2249 1:      bl _setup_bitw
2250         andc r3,r3,op2
2251         sthbrx r3,MEM
2252         NEXT
2253 
2254 btrl_imm:   NEXTBYTE(op2)
2255         b 1f
2256 btrl_reg_mem:   lhbrx op2,REG
2257 1:      bl _setup_bitl
2258         andc r3,r3,op2
2259         stwbrx r3,MEM
2260         NEXT
2261 
2262 btsw_imm:   NEXTBYTE(op2)
2263         b 1f
2264 btsw_reg_mem:   lhbrx op2,REG
2265 1:      bl _setup_bitw
2266         or r3,r3,op2
2267         sthbrx r3,MEM
2268         NEXT
2269 
2270 btsl_imm:   NEXTBYTE(op2)
2271         b 1f
2272 btsl_reg_mem:   lhbrx op2,REG
2273 1:      bl _setup_bitl
2274         or r3,r3,op2
2275         stwbrx r3,MEM
2276         NEXT
2277 
2278 /* Bit string search instructions, only ZF is defined after these, and the
2279 result value is not defined when the bit field is zero. */
2280 bsfw:       lhbrx result,MEM
2281         SET_FLAGS(FLAGS_BSRCH(W))
2282         neg r3,result
2283         cmpwi cr6,result,0      # sets ZF
2284         and r3,r3,result        # keep only LSB
2285         cntlzw r3,r3
2286         subfic r3,r3,31
2287         sthbrx r3,REG
2288         NEXT
2289 
2290 bsfl:       lwbrx result,MEM
2291         SET_FLAGS(FLAGS_BSRCH(L))
2292         neg r3,result
2293         cmpwi cr6,result,0      # sets ZF
2294         and r3,r3,result        # keep only LSB
2295         cntlzw r3,r3
2296         subfic r3,r3,31
2297         stwbrx r3,REG
2298         NEXT
2299 
2300 bsrw:       lhbrx result,MEM
2301         SET_FLAGS(FLAGS_BSRCH(W))
2302         cntlzw r3,result
2303         cmpwi cr6,result,0
2304         subfic r3,r3,31
2305         sthbrx r3,REG
2306         NEXT
2307 
2308 bsrl:       lwbrx result,MEM
2309         SET_FLAGS(FLAGS_BSRCH(L))
2310         cntlzw r3,result
2311         cmpwi cr6,result,0
2312         subfic r3,r3,31
2313         stwbrx r3,REG
2314         NEXT
2315 
2316 /* Unconditional jumps, first the indirect than relative */
2317 jmpw:       lhbrx eip,MEM
2318         lbzux opcode,eip,csb
2319         GOTNEXT
2320 
2321 jmpl:       lwbrx eip,MEM
2322         lbzux opcode,eip,csb
2323         GOTNEXT
2324 
2325 sjmp_w:     lbz r3,1(eip)
2326         sub eip,eip,csb
2327         addi eip,eip,2          # EIP after instruction
2328         extsb r3,r3
2329         add eip,eip,r3
2330         clrlwi eip,eip,16       # module 64k
2331         lbzux opcode,eip,csb
2332         GOTNEXT
2333 
2334 jmp_w:      lhbrx r3,eip,one        # eip now off by 3
2335         sub eip,eip,csb
2336         addi r3,r3,3            # compensate
2337         add eip,eip,r3
2338         clrlwi eip,eip,16
2339         lbzux opcode,eip,csb
2340         GOTNEXT
2341 
2342 sjmp_l:     lbz r3,1(eip)
2343         addi eip,eip,2
2344         extsb r3,r3
2345         lbzux opcode,eip,r3
2346         GOTNEXT
2347 
2348 jmp_l:      lwbrx r3,eip,one        # Simple
2349         addi eip,eip,5
2350         lbzux opcode,eip,r3
2351         GOTNEXT
2352 
2353 /*  The conditional jumps: although it should not happen,
2354 byte relative jumps (sjmp) may wrap around in 16 bit mode */
2355 
2356 #define NOTTAKEN_S lbzu opcode,2(eip); GOTNEXT
2357 #define NOTTAKEN_W lbzu opcode,3(eip); GOTNEXT
2358 #define NOTTAKEN_L lbzu opcode,5(eip); GOTNEXT
2359 
2360 #define CONDJMP(cond, eval, flag) \
2361 sj##cond##_w:   EVAL_##eval; bt flag,sjmp_w; NOTTAKEN_S; \
2362 j##cond##_w:    EVAL_##eval; bt flag,jmp_w; NOTTAKEN_W; \
2363 sj##cond##_l:   EVAL_##eval; bt flag,sjmp_l; NOTTAKEN_S; \
2364 j##cond##_l:    EVAL_##eval; bt flag,jmp_l; NOTTAKEN_L; \
2365 sjn##cond##_w:  EVAL_##eval; bf flag,sjmp_w; NOTTAKEN_S; \
2366 jn##cond##_w:   EVAL_##eval; bf flag,jmp_w; NOTTAKEN_W; \
2367 sjn##cond##_l:  EVAL_##eval; bf flag,sjmp_l; NOTTAKEN_S; \
2368 jn##cond##_l:   EVAL_##eval; bf flag,jmp_l; NOTTAKEN_L
2369 
2370         CONDJMP(o, OF, OF)
2371         CONDJMP(c, CF, CF)
2372         CONDJMP(z, ZF, ZF)
2373         CONDJMP(a, ABOVE, ABOVE)
2374         CONDJMP(s, SF, SF)
2375         CONDJMP(p, PF, PF)
2376         CONDJMP(g, SIGNED, SGT)
2377         CONDJMP(l, SIGNED, SLT)
2378 
2379 jcxz_w:     lhz r3,CX(state); cmpwi r3,0; beq- sjmp_w; NOTTAKEN_S
2380 jcxz_l:     lhz r3,CX(state); cmpwi r3,0; beq- sjmp_l; NOTTAKEN_S
2381 jecxz_w:    lwz r3,ECX(state); cmpwi r3,0; beq- sjmp_w; NOTTAKEN_S
2382 jecxz_l:    lwz r3,ECX(state); cmpwi r3,0; beq- sjmp_l; NOTTAKEN_S
2383 
2384 /* Note that loop is somewhat strange, the data size attribute gives
2385 the size of eip, and the address size whether the counter is cx or ecx.
2386 This is the same for jcxz/jecxz. */
2387 
2388 loopw_w:    li opreg,CX
2389         lhbrx r0,REG
2390         sub. r0,r0,one
2391         sthbrx r0,REG
2392         bne+ sjmp_w
2393         NOTTAKEN_S
2394 
2395 loopl_w:    li opreg,ECX
2396         lwbrx r0,REG
2397         sub. r0,r0,one
2398         stwbrx r0,REG
2399         bne+ sjmp_w
2400         NOTTAKEN_S
2401 
2402 loopw_l:    li opreg,CX
2403         lhbrx r0,REG
2404         sub. r0,r0,one
2405         sthbrx r0,REG
2406         bne+ sjmp_l
2407         NOTTAKEN_S
2408 
2409 loopl_l:    li opreg,ECX
2410         lwbrx r0,REG
2411         sub. r0,r0,one
2412         stwbrx r0,REG
2413         bne+ sjmp_l
2414         NOTTAKEN_S
2415 
2416 loopzw_w:   li opreg,CX
2417         lhbrx r0,REG
2418         EVAL_ZF
2419         sub. r0,r0,one
2420         sthbrx r0,REG
2421         bf ZF,1f
2422         bne+ sjmp_w
2423 1:      NOTTAKEN_S
2424 
2425 loopzl_w:   li opreg,ECX
2426         lwbrx r0,REG
2427         EVAL_ZF
2428         sub. r3,r3,one
2429         stwbrx r3,REG
2430         bf ZF,1f
2431         bne+ sjmp_w
2432 1:      NOTTAKEN_S
2433 
2434 loopzw_l:   li opreg,CX
2435         lhbrx r0,REG
2436         EVAL_ZF
2437         sub. r0,r0,one
2438         sthbrx r0,REG
2439         bf ZF,1f
2440         bne+ sjmp_l
2441 1:      NOTTAKEN_S
2442 
2443 loopzl_l:   li opreg,ECX
2444         lwbrx r0,REG
2445         EVAL_ZF
2446         sub. r0,r0,one
2447         stwbrx r0,REG
2448         bf ZF,1f
2449         bne+ sjmp_l
2450 1:      NOTTAKEN_S
2451 
2452 loopnzw_w:  li opreg,CX
2453         lhbrx r0,REG
2454         EVAL_ZF
2455         sub. r0,r0,one
2456         sthbrx r0,REG
2457         bt ZF,1f
2458         bne+ sjmp_w
2459 1:      NOTTAKEN_S
2460 
2461 loopnzl_w:  li opreg,ECX
2462         lwbrx r0,REG
2463         EVAL_ZF
2464         sub. r0,r0,one
2465         stwbrx r0,REG
2466         bt ZF,1f
2467         bne+ sjmp_w
2468 1:      NOTTAKEN_S
2469 
2470 loopnzw_l:  li opreg,CX
2471         lhbrx r0,REG
2472         EVAL_ZF
2473         sub. r0,r0,one
2474         sthbrx r0,REG
2475         bt ZF,1f
2476         bne+ sjmp_l
2477 1:      NOTTAKEN_S
2478 
2479 loopnzl_l:  li opreg,ECX
2480         lwbrx r0,REG
2481         EVAL_ZF
2482         sub. r0,r0,one
2483         stwbrx r0,REG
2484         bt ZF,1f
2485         bne+ sjmp_l
2486 1:      NOTTAKEN_S
2487 
2488 /* Memory indirect calls are rare enough to limit code duplication */
2489 callw_sp_mem:   lhbrx r3,MEM
2490         sub r4,eip,csb
2491         addi r4,r4,1            # r4 is now return address
2492         b 1f
2493         .equ calll_sp_mem, unimpl
2494 
2495 callw_sp:   lhbrx r3,eip,one
2496         sub r4,eip,csb
2497         addi r4,r4,3            # r4 is return address
2498         add r3,r4,r3
2499 1:      clrlwi eip,r3,16
2500         li r5,SP
2501         lhbrx r6,state,r5       # get sp
2502         addi r6,r6,-2
2503         lbzux opcode,eip,csb
2504         sthbrx r6,state,r5      # update sp
2505         clrlwi r6,r6,16
2506         sthbrx r4,ssb,r6        # push return address
2507         GOTNEXT
2508         .equ calll_sp, unimpl
2509 
2510 retw_sp_imm:    li opreg,SP
2511         lhbrx r4,REG
2512         lhbrx r6,eip,one
2513         addi r5,r4,2
2514         lhbrx eip,ssb,r4
2515         lbzux opcode,eip,csb
2516         add r5,r5,r6
2517         sthbrx r5,REG
2518         GOTNEXT
2519 
2520         .equ retl_sp_imm, unimpl
2521 
2522 retw_sp:    li opreg,SP
2523         lhbrx r4,REG
2524         addi r5,r4,2
2525         lhbrx eip,ssb,r4
2526         lbzux opcode,eip,csb
2527         sthbrx r5,REG
2528         GOTNEXT
2529 
2530         .equ retl_sp, unimpl
2531 
2532 /* Enter is a mess, and the description in Intel documents is actually wrong
2533  * in most revisions (all PPro/PII I have but the old Pentium is Ok) !
2534  */
2535 
2536 enterw_sp:  lhbrx r0,eip,one        # Stack space to allocate
2537         li opreg,SP
2538         lhbrx r3,REG            # SP
2539         li r7,BP
2540         lbzu r4,3(eip)          # nesting level
2541         addi r3,r3,-2
2542         lhbrx r5,state,r7       # Original BP
2543         clrlwi r3,r3,16
2544         sthbrx r5,ssb,r3        # Push BP
2545         andi. r4,r4,31          # modulo 32 and test
2546         mr r6,r3            # Save frame pointer to temp
2547         beq 3f
2548         mtctr r4            # iterate level-1 times
2549         b 2f
2550 1:      addi r5,r5,-2           # copy list of frame pointers
2551         clrlwi r5,r5,16
2552         lhzx r4,ssb,r5
2553         addi r3,r3,-2
2554         clrlwi r3,r3,16
2555         sthx r4,ssb,r3
2556 2:      bdnz 1b
2557         addi r3,r3,-2           # save current frame pointer
2558         clrlwi r3,r3,16
2559         sthbrx r6,ssb,r3
2560 3:      sthbrx r6,state,r7      # New BP
2561         sub r3,r3,r0
2562         sthbrx r3,REG           # Save new stack pointer
2563         NEXT
2564 
2565         .equ enterl_sp, unimpl
2566 
2567 leavew_sp:  li opreg,BP
2568         lhbrx r3,REG            # Stack = BP
2569         addi r4,r3,2            #
2570         lhzx r3,ssb,r3
2571         li opreg,SP
2572         sthbrx r4,REG           # New Stack
2573         sth r3,BP(state)        # Popped BP
2574         NEXT
2575 
2576         .equ leavel_sp, unimpl
2577 
2578 /* String instructions: first a generic setup routine, which exits early
2579 if there is a repeat prefix with a count of 0 */
2580 #define STRINGSRC base,offset
2581 #define STRINGDST esb,opreg
2582 
2583 _setup_stringw: li offset,SI            #
2584         rlwinm. r3,opcode,19,0,1    # lt=repnz, gt= repz, eq none
2585         li opreg,DI
2586         lhbrx offset,state,offset   # load si
2587         li r3,1             # no repeat
2588         lhbrx opreg,state,opreg     # load di
2589         beq 1f              # no repeat
2590         li r3,CX
2591         lhbrx r3,state,r3       # load CX
2592         cmpwi r3,0
2593         beq nop             # early exit here !
2594 1:      mtctr r3            # ctr=CX or 1
2595         li r7,1             # stride
2596         bflr+ DF
2597         li r7,-1            # change stride sign
2598         blr
2599 
2600 /* Ending routine to update all changed registers (goes directly to NEXT) */
2601 _finish_strw:   li r4,SI
2602         sthbrx offset,state,r4      # update si
2603         li r4,DI
2604         sthbrx opreg,state,r4       # update di
2605         beq nop
2606         mfctr r3
2607         li r4,CX
2608         sthbrx r3,state,r4      # update cx
2609         NEXT
2610 
2611 lodsb_a16:  bl _setup_stringw
2612 1:      lbzx r0,STRINGSRC       # [rep] lodsb
2613         add offset,offset,r7
2614         clrlwi offset,offset,16
2615         bdnz 1b
2616         stb r0,AL(state)
2617         b _finish_strw
2618 
2619 lodsw_a16:  bl _setup_stringw
2620         slwi r7,r7,1
2621 1:      lhzx r0,STRINGSRC       # [rep] lodsw
2622         add offset,offset,r7
2623         clrlwi offset,offset,16
2624         bdnz 1b
2625         sth r0,AX(state)
2626         b _finish_strw
2627 
2628 lodsl_a16:  bl _setup_stringw
2629         slwi r7,r7,2
2630 1:      lwzx r0,STRINGSRC       # [rep] lodsl
2631         add offset,offset,r7
2632         clrlwi offset,offset,16
2633         bdnz 1b
2634         stw r0,EAX(state)
2635         b _finish_strw
2636 
2637 stosb_a16:  bl _setup_stringw
2638         lbz r0,AL(state)
2639 1:      stbx r0,STRINGDST       # [rep] stosb
2640         add opreg,opreg,r7
2641         clrlwi opreg,opreg,16
2642         bdnz 1b
2643         b _finish_strw
2644 
2645 stosw_a16:  bl _setup_stringw
2646         lhz r0,AX(state)
2647         slwi r7,r7,1
2648 1:      sthx r0,STRINGDST       # [rep] stosw
2649         add opreg,opreg,r7
2650         clrlwi opreg,opreg,16
2651         bdnz 1b
2652         b _finish_strw
2653 
2654 stosl_a16:  bl _setup_stringw
2655         lwz r0,EAX(state)
2656         slwi r7,r7,2
2657 1:      stwx r0,STRINGDST       # [rep] stosl
2658         add opreg,opreg,r7
2659         clrlwi opreg,opreg,16
2660         bdnz 1b
2661         b _finish_strw
2662 
2663 movsb_a16:  bl _setup_stringw
2664 1:      lbzx r0,STRINGSRC       # [rep] movsb
2665         add offset,offset,r7
2666         stbx r0,STRINGDST
2667         clrlwi offset,offset,16
2668         add opreg,opreg,r7
2669         clrlwi opreg,opreg,16
2670         bdnz 1b
2671         b _finish_strw
2672 
2673 movsw_a16:  bl _setup_stringw
2674         slwi r7,r7,1
2675 1:      lhzx r0,STRINGSRC       # [rep] movsw
2676         add offset,offset,r7
2677         sthx r0,STRINGDST
2678         clrlwi offset,offset,16
2679         add opreg,opreg,r7
2680         clrlwi opreg,opreg,16
2681         bdnz 1b
2682         b _finish_strw
2683 
2684 movsl_a16:  bl _setup_stringw
2685         slwi r7,r7,2
2686 1:      lwzx r0,STRINGSRC       # [rep] movsl
2687         add offset,offset,r7
2688         stwx r0,STRINGDST
2689         clrlwi offset,offset,16
2690         add opreg,opreg,r7
2691         clrlwi opreg,opreg,16
2692         bdnz 1b
2693         b _finish_strw
2694 
2695 /* At least on a Pentium, repeated string I/O instructions check for
2696 access port permission even if count is 0 ! So the order of the check is not
2697 important. */
2698 insb_a16:   li r4,DX
2699         li r3,code_insb_a16
2700         lhbrx r4,state,r4
2701         bl _check_port
2702         bl _setup_stringw
2703         lwz base,iobase(state)
2704 1:      lbzx r0,base,r4         # [rep] insb
2705         eieio
2706         stbx r0,STRINGDST
2707         add opreg,opreg,r7
2708         clrlwi opreg,opreg,16
2709         bdnz 1b
2710         b _finish_strw
2711 
2712 insw_a16:   li r4,DX
2713         li r3,code_insw_a16
2714         lhbrx r4,state,r4
2715         bl _check_port
2716         bl _setup_stringw
2717         lwz base,iobase(state)
2718         slwi r7,r7,1
2719 1:      lhzx r0,base,r4         # [rep] insw
2720         eieio
2721         sthx r0,STRINGDST
2722         add opreg,opreg,r7
2723         clrlwi opreg,opreg,16
2724         bdnz 1b
2725         b _finish_strw
2726 
2727 insl_a16:   li r4,DX
2728         li r3,code_insl_a16
2729         lhbrx r4,state,r4
2730         bl _check_port
2731         bl _setup_stringw
2732         lwz base,iobase(state)
2733         slwi r7,r7,2
2734 1:      lwzx r0,base,r4         # [rep] insl
2735         eieio
2736         stwx r0,STRINGDST
2737         add opreg,opreg,r7
2738         clrlwi opreg,opreg,16
2739         bdnz 1b
2740         b _finish_strw
2741 
2742 outsb_a16:  li r4,DX
2743         li r3,code_outsb_a16
2744         lhbrx r4,state,r4
2745         bl _check_port
2746         bl _setup_stringw
2747         lwz r6,iobase(state)
2748 1:      lbzx r0,STRINGSRC       # [rep] outsb
2749         add offset,offset,r7
2750         stbx r0,r6,r4
2751         clrlwi offset,offset,16
2752         eieio
2753         bdnz 1b
2754         b _finish_strw
2755 
2756 outsw_a16:  li r4,DX
2757         li r3,code_outsw_a16
2758         lhbrx r4,state,r4
2759         bl _check_port
2760         bl _setup_stringw
2761         li r5,DX
2762         lwz r6,iobase(state)
2763         slwi r7,r7,1
2764 1:      lhzx r0,STRINGSRC       # [rep] outsw
2765         add offset,offset,r7
2766         sthx r0,r6,r4
2767         clrlwi offset,offset,16
2768         eieio
2769         bdnz 1b
2770         b _finish_strw
2771 
2772 outsl_a16:  li r4,DX
2773         li r3,code_outsl_a16
2774         lhbrx r4,state,r4
2775         bl _check_port
2776         bl _setup_stringw
2777         lwz r6,iobase(state)
2778         slwi r7,r7,2
2779 1:      lwzx r0,STRINGSRC       # [rep] outsl
2780         add offset,offset,r7
2781         stwx r0,r6,r4
2782         clrlwi offset,offset,16
2783         eieio
2784         bdnz 1b
2785         b _finish_strw
2786 
2787 cmpsb_a16:  bl _setup_stringw
2788         SET_FLAGS(FLAGS_CMP(B))
2789         blt 3f              # repnz prefix
2790 1:      lbzx op1,STRINGSRC      # [repz] cmpsb
2791         add offset,offset,r7
2792         lbzx op2,STRINGDST
2793         add opreg,opreg,r7
2794         cmplw cr4,op1,op2
2795         clrlwi offset,offset,16
2796         clrlwi opreg,opreg,16
2797         bdnzt CF+2,1b
2798 2:      extsb r3,op1
2799         extsb r4,op2
2800         cmpw cr6,r3,r4
2801         sub result,op1,op2
2802         b _finish_strw
2803 
2804 3:      lbzx op1,STRINGSRC      # repnz cmpsb
2805         add offset,offset,r7
2806         lbzx op2,STRINGDST
2807         add opreg,opreg,r7
2808         cmplw cr4,op1,op2
2809         clrlwi offset,offset,16
2810         clrlwi opreg,opreg,16
2811         bdnzf CF+2,3b
2812         b 2b
2813 
2814 cmpsw_a16:  bl _setup_stringw
2815         SET_FLAGS(FLAGS_CMP(W))
2816         slwi r7,r7,1
2817         blt 3f              # repnz prefix
2818 1:      lhbrx op1,STRINGSRC     # [repz] cmpsb
2819         add offset,offset,r7
2820         lhbrx op2,STRINGDST
2821         add opreg,opreg,r7
2822         cmplw cr4,op1,op2
2823         clrlwi offset,offset,16
2824         clrlwi opreg,opreg,16
2825         bdnzt CF+2,1b
2826 2:      extsh r3,op1
2827         extsh r4,op2
2828         cmpw cr6,r3,r4
2829         sub result,op1,op2
2830         b _finish_strw
2831 
2832 3:      lhbrx op1,STRINGSRC     # repnz cmpsw
2833         add offset,offset,r7
2834         lhbrx op2,STRINGDST
2835         add opreg,opreg,r7
2836         cmplw cr4,op1,op2
2837         clrlwi offset,offset,16
2838         clrlwi opreg,opreg,16
2839         bdnzf CF+2,3b
2840         b 2b
2841 
2842 cmpsl_a16:  bl _setup_stringw
2843         SET_FLAGS(FLAGS_CMP(L))
2844         slwi r7,r7,2
2845         blt 3f              # repnz prefix
2846 1:      lwbrx op1,STRINGSRC     # [repz] cmpsl
2847         add offset,offset,r7
2848         lwbrx op2,STRINGDST
2849         add opreg,opreg,r7
2850         cmplw cr4,op1,op2
2851         clrlwi offset,offset,16
2852         clrlwi opreg,opreg,16
2853         bdnzt CF+2,1b
2854 2:      cmpw cr6,op1,op2
2855         sub result,op1,op2
2856         b _finish_strw
2857 
2858 3:      lwbrx op1,STRINGSRC     # repnz cmpsl
2859         add offset,offset,r7
2860         lwbrx op2,STRINGDST
2861         add opreg,opreg,r7
2862         cmplw cr4,op1,op2
2863         clrlwi offset,offset,16
2864         clrlwi opreg,opreg,16
2865         bdnzf CF+2,3b
2866         b 2b
2867 
2868 scasb_a16:  bl _setup_stringw
2869         lbzx op1,AL,state       # AL
2870         SET_FLAGS(FLAGS_CMP(B))
2871         bgt 3f              # repz prefix
2872 1:      lbzx op2,STRINGDST      # [repnz] scasb
2873         add opreg,opreg,r7
2874         cmplw cr4,op1,op2
2875         clrlwi opreg,opreg,16
2876         bdnzf CF+2,1b
2877 2:      extsb r3,op1
2878         extsb r4,op2
2879         cmpw cr6,r3,r4
2880         sub result,op1,op2
2881         b _finish_strw
2882 
2883 3:      lbzx op2,STRINGDST      # repz scasb
2884         add opreg,opreg,r7
2885         cmplw cr4,op1,op2
2886         clrlwi opreg,opreg,16
2887         bdnzt CF+2,3b
2888         b 2b
2889 
2890 scasw_a16:  bl _setup_stringw
2891         lhbrx op1,AX,state
2892         SET_FLAGS(FLAGS_CMP(W))
2893         slwi r7,r7,1
2894         bgt 3f              # repz prefix
2895 1:      lhbrx op2,STRINGDST     # [repnz] scasw
2896         add opreg,opreg,r7
2897         cmplw cr4,op1,op2
2898         clrlwi opreg,opreg,16
2899         bdnzf CF+2,1b
2900 2:      extsh r3,op1
2901         extsh r4,op2
2902         cmpw cr6,r3,r4
2903         sub result,op1,op2
2904         b _finish_strw
2905 
2906 3:      lhbrx op2,STRINGDST     # repz scasw
2907         add opreg,opreg,r7
2908         cmplw cr4,op1,op2
2909         clrlwi opreg,opreg,16
2910         bdnzt CF+2,3b
2911         b 2b
2912 
2913 scasl_a16:  bl _setup_stringw
2914         lwbrx op1,EAX,state
2915         SET_FLAGS(FLAGS_CMP(L))
2916         slwi r7,r7,2
2917         bgt 3f              # repz prefix
2918 1:      lwbrx op2,STRINGDST     # [repnz] scasl
2919         add opreg,opreg,r7
2920         cmplw cr4,op1,op2
2921         clrlwi opreg,opreg,16
2922         bdnzf CF+2,1b
2923 2:      cmpw cr6,op1,op2
2924         sub result,op1,op2
2925         b _finish_strw
2926 
2927 3:      lwbrx op2,STRINGDST     # repz scasl
2928         add opreg,opreg,r7
2929         cmplw cr4,op1,op2
2930         clrlwi opreg,opreg,16
2931         bdnzt CF+2,3b
2932         b 2b
2933 
2934         .equ lodsb_a32, unimpl
2935         .equ lodsw_a32, unimpl
2936         .equ lodsl_a32, unimpl
2937         .equ stosb_a32, unimpl
2938         .equ stosw_a32, unimpl
2939         .equ stosl_a32, unimpl
2940         .equ movsb_a32, unimpl
2941         .equ movsw_a32, unimpl
2942         .equ movsl_a32, unimpl
2943         .equ insb_a32, unimpl
2944         .equ insw_a32, unimpl
2945         .equ insl_a32, unimpl
2946         .equ outsb_a32, unimpl
2947         .equ outsw_a32, unimpl
2948         .equ outsl_a32, unimpl
2949         .equ cmpsb_a32, unimpl
2950         .equ cmpsw_a32, unimpl
2951         .equ cmpsl_a32, unimpl
2952         .equ scasb_a32, unimpl
2953         .equ scasw_a32, unimpl
2954         .equ scasl_a32, unimpl
2955 
2956 xlatb_a16:  li offset,BX
2957         lbz r3,AL(state)
2958         lhbrx offset,offset,state
2959         add r3,r3,base
2960         lbzx r3,r3,offset
2961         stb r3,AL(state)
2962         NEXT
2963 
2964         .equ xlatb_a32, unimpl
2965 
2966 /*
2967  * Shift and rotates: note the oddity that rotates do not affect SF/ZF/AF/PF
2968  * but shifts do. Also testing has indicated that rotates with a count of zero
2969  * do not affect any flag. The documentation specifies this for shifts but
2970  * is more obscure for rotates. The overflow flag setting is only specified
2971  * when count is 1, otherwise OF is undefined which simplifies emulation.
2972  */
2973 
2974 /*
2975  * The rotates through carry are among the most difficult instructions,
2976  * they are implemented as a shift of 2*n+some bits depending on case.
2977  * First the left rotates through carry.
2978  */
2979 
2980 /* Byte rcl is performed on 18 bits (17 actually used) in a single register */
2981 rclb_imm:   NEXTBYTE(r3)
2982         b 1f
2983 rclb_cl:    lbz r3,CL(state)
2984         b 1f
2985 rclb_1:     li r3,1
2986 1:      lbzx r0,MEM
2987         andi. r3,r3,31          # count%32
2988         addc r4,flags,flags     # CF_IN->xer[ca]
2989         RES2CF(r6)
2990         subfe r4,result,op1
2991         mulli r5,r3,29          # 29=ceil(256/9)
2992         CF_ROTCNT(r7)
2993         addze r6,r6
2994         CF_POL_INSERT(r0,23)
2995         srwi r5,r5,8            # count/9
2996         rlwnm r6,r6,r7,0x100
2997         xor r0,r0,r6            # (23)0:CF:data8
2998         rlwimi r5,r5,3,26,28        # 9*(count/9)
2999         rlwimi r0,r0,23,0,7     # CF:(data8):(14)0:CF:data8
3000         sub r3,r3,r5            # count%9
3001         beq- nop            # no flags changed if count 0
3002         ROTATE_FLAGS
3003         rlwnm r0,r0,r3,0x000001ff   # (23)0:NewCF:Result8
3004         rlwimi flags,r0,19,CF_VALUE
3005         stbx r0,MEM
3006         rlwimi flags,r0,18,OF_XOR
3007         NEXT
3008 
3009 /* Word rcl is performed on 33 bits (CF:data16:CF:(15 MSB of data16) */
3010 rclw_imm:   NEXTBYTE(r3)
3011         b 1f
3012 rclw_cl:    lbz r3,CL(state)
3013         b 1f
3014 rclw_1:     li r3,1
3015 1:      lhbrx r0,MEM
3016         andi. r3,r3,31          # count=count%32
3017         addc r4,flags,flags
3018         RES2CF(r6)
3019         subfe r4,result,op1
3020         addi r5,r3,15           # modulo 17: >=32 if >=17
3021         CF_ROTCNT(r7)
3022         addze r6,r6
3023         addi r7,r7,8
3024         CF_POL_INSERT(r0,15)
3025         srwi r5,r5,5            # count/17
3026         rlwnm r6,r6,r7,0x10000
3027         rlwimi r5,r5,4,27,27        # 17*(count/17)
3028         xor r0,r0,r6            # (15)0:CF:data16
3029         sub r3,r3,r5            # count%17
3030         rlwinm r4,r0,15,0xffff0000  # CF:(15 MSB of data16):(16)0
3031         slw r0,r0,r3            # New carry and MSBs
3032         rlwnm r4,r4,r3,16,31        # New LSBs
3033         beq- nop            # no flags changed if count 0
3034         ROTATE_FLAGS
3035         add r0,r0,r4            # result
3036         rlwimi flags,r0,11,CF_VALUE
3037         sthbrx r0,MEM
3038         rlwimi flags,r0,10,OF_XOR
3039         NEXT
3040 
3041 /* Longword rcl only needs 64 bits because the maximum rotate count is 31 ! */
3042 rcll_imm:   NEXTBYTE(r3)
3043         b 1f
3044 rcll_cl:    lbz r3,CL(state)
3045         b 1f
3046 rcll_1:     li r3,1
3047 1:      lwbrx r0,MEM
3048         andi. r3,r3,31          # count=count%32
3049         addc r4,r4,flags        # ~XER[CA]
3050         RES2CF(r6)
3051         subfe r4,result,op1
3052         CF_ROTCNT(r7)
3053         addze r6,r6
3054         srwi r4,r0,1            # 0:(31 MSB of data32)
3055         addi r7,r7,23
3056         CF_POL_INSERT(r4,0)
3057         rlwnm r6,r6,r7,0,0
3058         beq- nop            # no flags changed if count 0
3059         subfic r5,r3,32
3060         xor r4,r4,r6
3061         ROTATE_FLAGS
3062         slw r0,r0,r3            # New MSBs
3063         srw r5,r4,r5            # New LSBs
3064         rlwnm r4,r4,r3,0,0      # New Carry
3065         add r0,r0,r5            # result
3066         rlwimi flags,r4,28,CF_VALUE
3067         rlwimi flags,r0,27,OF_XOR
3068         stwbrx r0,MEM
3069         NEXT
3070 
3071 /* right rotates through carry are even worse because PPC only has a left
3072 rotate instruction. Somewhat tough when combined with modulo 9, 17, or
3073 33 operation and the rules of OF and CF flag settings. */
3074 /* Byte rcr is performed on 17 bits */
3075 rcrb_imm:   NEXTBYTE(r3)
3076         b 1f
3077 rcrb_cl:    lbz r3,CL(state)
3078         b 1f
3079 rcrb_1:     li r3,1
3080 1:      lbzx r0,MEM
3081         andi. r3,r3,31          # count%32
3082         addc r4,flags,flags     # cf_in->xer[ca]
3083         RES2CF(r6)
3084         mulli r5,r3,29          # 29=ceil(256/9)
3085         subfe r4,result,op1
3086         CF_ROTCNT(r7)
3087         addze r6,r6
3088         CF_POL_INSERT(r0,23)
3089         srwi r5,r5,8            # count/9
3090         rlwimi r0,r0,9,0x0001fe00   # (15)0:data8:0:data8
3091         rlwnm r6,r6,r7,0x100
3092         rlwimi r5,r5,3,26,28        # 9*(count/9)
3093         xor r0,r0,r6            # (15)0:data8:CF:data8
3094         sub r3,r3,r5            # count%9
3095         beq- nop            # no flags changed if count 0
3096         ROTATE_FLAGS
3097         srw r0,r0,r3            # (23)junk:NewCF:Result8
3098         rlwimi flags,r0,19,CF_VALUE|OF_XOR
3099         stbx r0,MEM
3100         NEXT
3101 
3102 /* Word rcr is a 33 bit right shift with a quirk, because the 33rd bit
3103 is only needed when the rotate count is 16 and rotating left or right
3104 by 16 a 32 bit quantity is the same ! */
3105 rcrw_imm:   NEXTBYTE(r3)
3106         b 1f
3107 rcrw_cl:    lbz r3,CL(state)
3108         b 1f
3109 rcrw_1:     li r3,1
3110 1:      lhbrx r0,MEM
3111         andi. r3,r3,31          # count%32
3112         addc r4,flags,flags     # cf_in->xer[ca]
3113         RES2CF(r6)
3114         subfe r4,result,op1
3115         addi r5,r3,15           # >=32 if >=17
3116         CF_ROTCNT(r7)
3117         addze r6,r6
3118         addi r7,r7,8
3119         CF_POL_INSERT(r0,15)
3120         srwi r5,r5,5            # count/17
3121         rlwnm r6,r6,r7,0x10000
3122         rlwinm r7,r0,16,0x01        # MSB of data16
3123         rlwimi r0,r0,17,0xfffe0000  # (15 MSB of data16):0:data16
3124         rlwimi r5,r5,4,27,27        # 17*(count/17)
3125         xor r0,r0,r6            # (15 MSB of data16):CF:data16
3126         sub r3,r3,r5            # count%17
3127         beq- nop            # no flags changed if count 0
3128         srw r0,r0,r3            # shift right
3129         rlwnm r7,r7,r3,0x10000      # just in case count=16
3130         ROTATE_FLAGS
3131         add r0,r0,r7            # junk15:NewCF:result16
3132         rlwimi flags,r0,11,CF_VALUE|OF_XOR
3133         sthbrx r0,MEM
3134         NEXT
3135 
3136 /* Longword rcr need only 64 bits since the rotate count is limited to 31 */
3137 rcrl_imm:   NEXTBYTE(r3)
3138         b 1f
3139 rcrl_cl:    lbz r3,CL(state)
3140         b 1f
3141 rcrl_1:     li r3,1
3142 1:      lwbrx r0,MEM
3143         andi. r3,r3,31          # count%32
3144         addc r4,flags,flags
3145         RES2CF(r6)
3146         subfe r4,result,op1
3147         CF_ROTCNT(r7)
3148         slwi r4,r0,1            # (31MSB of data32):0
3149         addze r6,r6
3150         addi r7,r7,24
3151         CF_POL_INSERT(r4,31)
3152         rlwnm r6,r6,r7,0x01
3153         beq- nop            # no flags changed if count 0
3154         subfic r7,r3,32
3155         xor r4,r4,r6
3156         srw r0,r0,r3            # Result LSB
3157         slw r5,r4,r7            # Result MSB
3158         srw r4,r4,r3            # NewCF in LSB
3159         add r0,r0,r5            # result
3160         rlwimi flags,r4,27,CF_VALUE
3161         stwbrx r0,MEM
3162         rlwimi flags,r0,27,OF_XOR
3163         NEXT
3164 
3165 /* After the rotates through carry, normal rotates are so simple ! */
3166 rolb_imm:   NEXTBYTE(r3)
3167         b 1f
3168 rolb_cl:    lbz r3,CL(state)
3169         b 1f
3170 rolb_1:     li r3,1
3171 1:      lbzx r0,MEM
3172         andi. r4,r3,31          # count%32 == 0 ?
3173         clrlwi r3,r3,29         # count%8
3174         rlwimi r0,r0,24,0xff000000  # replicate for shift in
3175         beq- nop            # no flags changed if count 0
3176         ROTATE_FLAGS
3177         rotlw r0,r0,r3
3178         rlwimi flags,r0,27,CF_VALUE # New CF
3179         stbx r0,MEM
3180         rlwimi flags,r0,26,OF_XOR   # New OF (CF xor MSB)
3181         NEXT
3182 
3183 rolw_imm:   NEXTBYTE(r3)
3184         b 1f
3185 rolw_cl:    lbz r3,CL(state)
3186         b 1f
3187 rolw_1:     li r3,1
3188 1:      lhbrx r0,MEM
3189         andi. r3,r3,31
3190         rlwimi r0,r0,16,0,15        # duplicate
3191         beq- nop            # no flags changed if count 0
3192         ROTATE_FLAGS
3193         rotlw r0,r0,r3          # result word duplicated
3194         rlwimi flags,r0,27,CF_VALUE # New CF
3195         sthbrx r0,MEM
3196         rlwimi flags,r0,26,OF_XOR   # New OF (CF xor MSB)
3197         NEXT
3198 
3199 roll_imm:   NEXTBYTE(r3)
3200         b 1f
3201 roll_cl:    lbz r3,CL(state)
3202         b 1f
3203 roll_1:     li r3,1
3204 1:      lwbrx r0,MEM
3205         andi. r3,r3,31
3206         beq- nop            # no flags changed if count 0
3207         ROTATE_FLAGS
3208         rotlw r0,r0,r3          # result
3209         rlwimi flags,r0,27,CF_VALUE # New CF
3210         stwbrx r0,MEM
3211         rlwimi flags,r0,26,OF_XOR   # New OF (CF xor MSB)
3212         NEXT
3213 
3214 rorb_imm:   NEXTBYTE(r3)
3215         b 1f
3216 rorb_cl:    lbz r3,CL(state)
3217         b 1f
3218 rorb_1:     li r3,1
3219 1:      lbzx r0,MEM
3220         andi. r4,r3,31          # count%32 == 0 ?
3221         clrlwi r3,r3,29         # count%8
3222         rlwimi r0,r0,8,0x0000ff00   # replicate for shift in
3223         beq- nop            # no flags changed if count 0
3224         ROTATE_FLAGS
3225         srw r0,r0,r3
3226         rlwimi flags,r0,20,CF_VALUE
3227         stbx r0,MEM
3228         rlwimi flags,r0,19,OF_XOR
3229         NEXT
3230 
3231 rorw_imm:   NEXTBYTE(r3)
3232         b 1f
3233 rorw_cl:    lbz r3,CL(state)
3234         b 1f
3235 rorw_1:     li r3,1
3236 1:      lhbrx r0,MEM
3237         andi. r4,r3,31
3238         clrlwi r3,r3,28         # count %16
3239         rlwimi r0,r0,16,0xffff0000  # duplicate
3240         beq- nop            # no flags changed if count 0
3241         ROTATE_FLAGS
3242         srw r0,r0,r3            # junk16:result16
3243         rlwimi flags,r0,12,CF_VALUE
3244         sthbrx r0,MEM
3245         rlwimi flags,r0,11,OF_XOR
3246         NEXT
3247 
3248 rorl_imm:   NEXTBYTE(r3)
3249         b 1f
3250 rorl_cl:    lbz r3,CL(state)
3251         b 1f
3252 rorl_1:     li r3,1
3253 1:      lwbrx r0,MEM
3254         andi. r4,r3,31
3255         neg r3,r3
3256         beq- nop            # no flags changed if count 0
3257         ROTATE_FLAGS
3258         rotlw r0,r0,r3          # result
3259         rlwimi flags,r0,28,CF_VALUE
3260         stwbrx r0,MEM
3261         rlwimi flags,r0,27,OF_XOR
3262         NEXT
3263 
3264 /* Right arithmetic shifts: they clear OF whenever count!=0 */
3265 #define SAR_FLAGS       CF_ZERO|OF_ZERO|RESL
3266 sarb_imm:   NEXTBYTE(r3)
3267         b 1f
3268 sarb_cl:    lbz r3,CL(state)
3269         b 1f
3270 sarb_1:     li r3,1
3271 1:      lbzx r4,MEM
3272         andi. r3,r3,31
3273         addi r5,r3,-1
3274         extsb r4,r4
3275         beq- nop            # no flags changed if count 0
3276         SET_FLAGS(SAR_FLAGS)
3277         sraw result,r4,r3
3278         srw r5,r4,r5
3279         stbx result,MEM
3280         rlwimi flags,r5,27,CF_VALUE
3281         NEXT
3282 
3283 sarw_imm:   NEXTBYTE(r3)
3284         b 1f
3285 sarw_cl:    lbz r3,CL(state)
3286         b 1f
3287 sarw_1:     li r3,1
3288 1:      lhbrx r4,MEM
3289         andi. r3,r3,31
3290         addi r5,r3,-1
3291         extsh r4,r4
3292         beq- nop            # no flags changed if count 0
3293         SET_FLAGS(SAR_FLAGS)
3294         sraw result,r4,r3
3295         srw r5,r4,r5
3296         sthbrx result,MEM
3297         rlwimi flags,r5,27,CF_VALUE
3298         NEXT
3299 
3300 sarl_imm:   NEXTBYTE(r3)
3301         b 1f
3302 sarl_cl:    lbz r3,CL(state)
3303         b 1f
3304 sarl_1:     li r3,1
3305 1:      lwbrx r4,MEM
3306         andi. r3,r3,31
3307         addi r5,r3,-1
3308         beq- nop            # no flags changed if count 0
3309         SET_FLAGS(SAR_FLAGS)
3310         sraw result,r4,r3
3311         srw r5,r4,r5
3312         stwbrx result,MEM
3313         rlwimi flags,r5,27,CF_VALUE
3314         NEXT
3315 
3316 /* Left shifts are quite easy: they use the flag mechanism of add */
3317 shlb_imm:   NEXTBYTE(r3)
3318         b 1f
3319 shlb_cl:    lbz r3,CL(state)
3320         b 1f
3321 shlb_1:     li r3,1
3322 1:      andi. r3,r3,31
3323         beq- nop            # no flags changed if count 0
3324         lbzx op1,MEM
3325         SET_FLAGS(FLAGS_ADD(B))
3326         slw result,op1,r3
3327         addi op2,op1,0          # for OF computation only !
3328         stbx result,MEM
3329         NEXT
3330 
3331 shlw_imm:   NEXTBYTE(r3)
3332         b 1f
3333 shlw_cl:    lbz r3,CL(state)
3334         b 1f
3335 shlw_1:     li r3,1
3336 1:      andi. r3,r3,31
3337         beq- nop            # no flags changed if count 0
3338         lhbrx op1,MEM
3339         SET_FLAGS(FLAGS_ADD(W))
3340         slw result,op1,r3
3341         addi op2,op1,0          # for OF computation only !
3342         sthbrx result,MEM
3343         NEXT
3344 
3345 /* That one may be wrong */
3346 shll_imm:   NEXTBYTE(r3)
3347         b 1f
3348 shll_cl:    lbz r3,CL(state)
3349         b 1f
3350 shll_1:     li r3,1
3351 1:      andi. r3,r3,31
3352         beq- nop            # no flags changed if count 0
3353         lwbrx op1,MEM
3354         addi r4,r3,-1
3355         SET_FLAGS(FLAGS_ADD(L))
3356         slw result,op1,r3
3357         addi op2,op1,0          # for OF computation only !
3358         slw op1,op1,r4          # for CF computation
3359         stwbrx result,MEM
3360         NEXT
3361 
3362 /* Right shifts are quite complex, because of funny flag rules ! */
3363 shrb_imm:   NEXTBYTE(r3)
3364         b 1f
3365 shrb_cl:    lbz r3,CL(state)
3366         b 1f
3367 shrb_1:     li r3,1
3368 1:      andi. r3,r3,31
3369         beq- nop            # no flags changed if count 0
3370         lbzx op1,MEM
3371         addi r4,r3,-1
3372         SET_FLAGS(FLAGS_SHR(B))
3373         srw result,op1,r3
3374         srw r4,op1,r4
3375         li op2,-1           # for OF computation only !
3376         stbx result,MEM
3377         rlwimi flags,r4,27,CF_VALUE # Set CF
3378         NEXT
3379 
3380 shrw_imm:   NEXTBYTE(r3)
3381         b 1f
3382 shrw_cl:    lbz r3,CL(state)
3383         b 1f
3384 shrw_1:     li r3,1
3385 1:      andi. r3,r3,31
3386         beq- nop            # no flags changed if count 0
3387         lhbrx op1,MEM
3388         addi r4,r3,-1
3389         SET_FLAGS(FLAGS_SHR(W))
3390         srw result,op1,r3
3391         srw r4,op1,r4
3392         li op2,-1           # for OF computation only !
3393         sthbrx result,MEM
3394         rlwimi flags,r4,27,CF_VALUE # Set CF
3395         NEXT
3396 
3397 shrl_imm:   NEXTBYTE(r3)
3398         b 1f
3399 shrl_cl:    lbz r3,CL(state)
3400         b 1f
3401 shrl_1:     li r3,1
3402 1:      andi. r3,r3,31
3403         beq- nop            # no flags changed if count 0
3404         lwbrx op1,MEM
3405         addi r4,r3,-1
3406         SET_FLAGS(FLAGS_SHR(L))
3407         srw result,op1,r3
3408         srw r4,op1,r4
3409         li op2,-1           # for OF computation only !
3410         stwbrx result,MEM
3411         rlwimi flags,r4,27,CF_VALUE # Set CF
3412         NEXT
3413 
3414 /* Double length shifts, shldw uses FLAGS_ADD for simplicity */
3415 shldw_imm:  NEXTBYTE(r3)
3416         b 1f
3417 shldw_cl:   lbz r3,CL(state)
3418 1:      andi. r3,r3,31
3419         beq- nop
3420         lhbrx op1,MEM
3421         SET_FLAGS(FLAGS_ADD(W))
3422         lhbrx op2,REG
3423         rlwimi op1,op2,16,0,15      # op2:op1
3424         addi op2,op1,0
3425         rotlw result,op1,r3
3426         sthbrx result,MEM
3427         NEXT
3428 
3429 shldl_imm:  NEXTBYTE(r3)
3430         b 1f
3431 shldl_cl:   lbz r3,CL(state)
3432 1:      andi. r3,r3,31
3433         beq- nop
3434         lwbrx op1,MEM
3435         SET_FLAGS(FLAGS_DBLSH(L))
3436         lwbrx op2,REG
3437         subfic r4,r3,32
3438         slw result,op1,r3
3439         srw r4,op2,r4
3440         rotlw r3,op1,r3
3441         or result,result,r4
3442         addi op2,op1,0
3443         rlwimi flags,r3,27,CF_VALUE
3444         stwbrx result,MEM
3445         NEXT
3446 
3447 shrdw_imm:  NEXTBYTE(r3)
3448         b 1f
3449 shrdw_cl:   lbz r3,CL(state)
3450 1:      andi. r3,r3,31
3451         beq- nop
3452         lhbrx op1,MEM
3453         SET_FLAGS(FLAGS_DBLSH(W))
3454         lhbrx op2,REG
3455         addi r4,r3,-1
3456         rlwimi op1,op2,16,0,15      # op2:op1
3457         addi op2,op1,0
3458         srw result,op1,r3
3459         srw r4,op1,r4
3460         sthbrx result,MEM
3461         rlwimi flags,r4,27,CF_VALUE
3462         NEXT
3463 
3464 shrdl_imm:  NEXTBYTE(r3)
3465         b 1f
3466 shrdl_cl:   lbz r3,CL(state)
3467 1:      andi. r3,r3,31
3468         beq- nop
3469         lwbrx op1,MEM
3470         SET_FLAGS(FLAGS_DBLSH(L))
3471         lwbrx op2,REG
3472         subfic r4,r3,32
3473         srw result,op1,r3
3474         addi r3,r3,-1
3475         slw r4,op2,r4
3476         srw r3,op1,r3
3477         or result,result,r4
3478         addi op2,op1,0
3479         rlwimi flags,r3,27,CF_VALUE
3480         stwbrx result,MEM
3481         NEXT
3482 
3483 /* One operand multiplies: with result double the operand size, unsigned */
3484 mulb:       lbzx op2,MEM
3485         lbz op1,AL(state)
3486         mullw result,op1,op2
3487         SET_FLAGS(FLAGS_MUL)
3488         subfic r3,result,255
3489         sthbrx result,AX,state
3490         rlwimi flags,r3,0,CF_VALUE|OF_VALUE
3491         NEXT
3492 
3493 mulw:       lhbrx op2,MEM
3494         lhbrx op1,AX,state
3495         mullw result,op1,op2
3496         SET_FLAGS(FLAGS_MUL)
3497         li r4,DX
3498         srwi r3,result,16
3499         sthbrx result,AX,state
3500         neg r5,r3
3501         sthbrx r3,r4,state      # DX
3502         rlwimi flags,r5,0,CF_VALUE|OF_VALUE
3503         NEXT
3504 
3505 mull:       lwbrx op2,MEM
3506         lwbrx op1,EAX,state
3507         mullw result,op1,op2
3508         mulhwu. r3,op1,op2
3509         SET_FLAGS(FLAGS_MUL)
3510         stwbrx result,EAX,state
3511         li r4,EDX
3512         stwbrx r3,r4,state
3513         beq+ nop
3514         oris flags,flags,(CF_SET|OF_SET)>>16
3515         NEXT
3516 
3517 /* One operand multiplies: with result double the operand size, signed */
3518 imulb:      lbzx op2,MEM
3519         extsb op2,op2
3520         lbz op1,AL(state)
3521         extsb op1,op1
3522         mullw result,op1,op2
3523         SET_FLAGS(FLAGS_MUL)
3524         extsb r3,result
3525         sthbrx result,AX,state
3526         cmpw r3,result
3527         beq+ nop
3528         oris flags,flags,(CF_SET|OF_SET)>>16
3529         NEXT
3530 
3531 imulw:      lhbrx op2,MEM
3532         extsh op2,op2
3533         lhbrx op1,AX,state
3534         extsh op1,op1
3535         mullw result,op1,op2
3536         SET_FLAGS(FLAGS_MUL)
3537         li r3,DX
3538         extsh r4,result
3539         srwi r5,result,16
3540         sthbrx result,AX,state
3541         cmpw r4,result
3542         sthbrx r5,r3,state
3543         beq+ nop
3544         oris flags,flags,(CF_SET|OF_SET)>>16
3545         NEXT
3546 
3547 imull:      lwbrx op2,MEM
3548         SET_FLAGS(FLAGS_MUL)
3549         lwbrx op1,EAX,state
3550         li r3,EDX
3551         mulhw r4,op1,op2
3552         mullw result,op1,op2
3553         stwbrx r4,r3,state
3554         srawi r3,result,31
3555         cmpw r3,r4
3556         beq+ nop
3557         oris flags,flags,(CF_SET|OF_SET)>>16
3558         NEXT
3559 
3560 /* Other multiplies */
3561 imulw_mem_reg:  lhbrx op2,REG
3562         extsh op2,op2
3563         b 1f
3564 
3565 imulw_imm:  NEXTWORD(op2)
3566         extsh op2,op2
3567         b 1f
3568 
3569 imulw_imm8: NEXTBYTE(op2)
3570         extsb op2,op2
3571 1:      lhbrx op1,MEM
3572         extsh op1,op1
3573         mullw result,op1,op2
3574         SET_FLAGS(FLAGS_MUL)
3575         extsh r3,result
3576         sthbrx result,REG
3577         cmpw r3,result
3578         beq+ nop
3579         oris flags,flags,(CF_SET|OF_SET)>>16
3580         NEXT            # SF/ZF/AF/PF undefined !
3581 
3582 imull_mem_reg:  lwbrx op2,REG
3583         b 1f
3584 
3585 imull_imm:  NEXTDWORD(op2)
3586         b 1f
3587 
3588 imull_imm8: NEXTBYTE(op2)
3589         extsb op2,op2
3590 1:      lwbrx op1,MEM
3591         mullw result,op1,op2
3592         SET_FLAGS(FLAGS_MUL)
3593         mulhw r3,op1,op2
3594         srawi r4,result,31
3595         stwbrx result,REG
3596         cmpw r3,r4
3597         beq+ nop
3598         oris flags,flags,(CF_SET|OF_SET)>>16
3599         NEXT            # SF/ZF/AF/PF undefined !
3600 
3601 /* aad is indeed a multiply */
3602 aad:        NEXTBYTE(r3)
3603         lbz op1,AH(state)
3604         lbz op2,AL(state)
3605         mullw result,op1,r3     # AH*imm
3606         SET_FLAGS(FLAGS_LOG(B))     # SF/ZF/PF from result
3607         add result,result,op2       # AH*imm+AL
3608         slwi r3,result,8
3609         sth r3,AX(state)            # AH=0
3610         NEXT                # OF/AF/CF undefined
3611 
3612 /* Unsigned divides: we may destroy all flags */
3613 divb:       lhbrx r4,AX,state
3614         lbzx r3,MEM
3615         srwi r5,r4,8
3616         cmplw r5,r3
3617         bnl- _divide_error
3618         divwu r5,r4,r3
3619         mullw r3,r5,r3
3620         sub r3,r4,r3
3621         stb r5,AL(state)
3622         stb r3,AH(state)
3623         NEXT
3624 
3625 divw:       li opreg,DX
3626         lhbrx r4,AX,state
3627         lhbrx r5,REG
3628         lhbrx r3,MEM
3629         insrwi r4,r5,16,0
3630         cmplw r5,r3
3631         bnl- _divide_error
3632         divwu r5,r4,r3
3633         mullw r3,r5,r3
3634         sub r3,r4,r3
3635         sthbrx r5,AX,state
3636         sthbrx r3,REG
3637         NEXT
3638 
3639 divl:       li opreg,EDX            # Not yet fully implemented
3640         lwbrx r3,MEM
3641         lwbrx r4,REG
3642         lwbrx r5,EAX,state
3643         cmplw r4,r3
3644         bnl- _divide_error
3645         cmplwi r4,0
3646         bne- 1f
3647         divwu r4,r5,r3
3648         mullw r3,r4,r3
3649         stwbrx r4,EAX,state
3650         sub r3,r5,r3
3651         stwbrx r3,REG
3652         NEXT
3653 /* full implementation of 64:32 unsigned divide, slow but rarely used */
3654 1:      bl _div_64_32
3655         stwbrx r5,EAX,state
3656         stwbrx r4,REG
3657         NEXT
3658 /*
3659  * Divide r4:r5 by r3, quotient in r5, remainder in r4.
3660  * The algorithm is stupid because it won't be used very often.
3661  */
3662 _div_64_32: li r7,32
3663         mtctr r7
3664 1:      cmpwi r4,0          # always subtract in case
3665         addc r5,r5,r5           # MSB is set
3666         adde r4,r4,r4
3667         blt 2f
3668         cmplw r4,r3
3669         blt 3f
3670 2:      sub r4,r4,r3
3671         addi r5,r5,1
3672 3:      bdnz 1b
3673 
3674 /* Signed divides: we may destroy all flags */
3675 idivb:      lbzx r3,MEM
3676         lhbrx r4,AX,state
3677         cmpwi r3,0
3678         beq- _divide_error
3679         divw r5,r4,r3
3680         extsb r7,r5
3681         mullw r3,r5,r3
3682         cmpw r5,r7
3683         sub r3,r4,r3
3684         bne- _divide_error
3685         stb r5,AL(state)
3686         stb r3,AH(state)
3687         NEXT
3688 
3689 idivw:      li opreg,DX
3690         lhbrx r4,AX,state
3691         lhbrx r5,REG
3692         lhbrx r3,MEM
3693         insrwi r4,r5,16,0
3694         cmpwi r3,0
3695         beq- _divide_error
3696         divw r5,r4,r3
3697         extsh r7,r5
3698         mullw r3,r5,r3
3699         cmpw r5,r7
3700         sub r3,r4,r3
3701         bne- _divide_error
3702         sthbrx r5,AX,state
3703         sthbrx r3,REG
3704         NEXT
3705 
3706 idivl:      li opreg,EDX            # Not yet fully implemented
3707         lwbrx r3,MEM
3708         lwbrx r5,EAX,state
3709         cmpwi cr1,r3,0
3710         lwbrx r4,REG
3711         srwi  r7,r5,31
3712         beq- _divide_error
3713         add. r7,r7,r4
3714         bne- 1f             # EDX not sign extension of EAX
3715         divw r4,r5,r3
3716         xoris r7,r5,0x8000      # only overflow case is
3717         orc. r7,r7,r3           # 0x80000000 divided by -1
3718         mullw r3,r4,r3
3719         beq- _divide_error
3720         stwbrx r4,EAX,state
3721         sub r3,r5,r3
3722         stwbrx r3,REG
3723         NEXT
3724 
3725 /* full 64 by 32 signed divide, checks for overflow might be right now */
3726 1:      srawi r6,r4,31          # absolute value of r4:r5
3727         srawi r0,r3,31          # absolute value of r3
3728         xor r5,r5,r6
3729         xor r3,r3,r0
3730         subfc r5,r6,r5
3731         xor r4,r4,r6
3732         sub r3,r3,r0
3733         subfe r4,r6,r4
3734         xor r0,r0,r6            # sign of result
3735         cmplw r4,r3         # coarse overflow detection
3736         bnl- _divide_error      # (probably not necessary)
3737         bl _div_64_32
3738         xor r5,r5,r0            # apply sign to result
3739         sub r5,r5,r0
3740         xor. r7,r0,r5           # wrong sign: overflow
3741         xor r4,r4,r6            # apply sign to remainder
3742         blt- _divide_error
3743         stwbrx r5,EAX,state
3744         sub r4,r4,r6
3745         stwbrx r4,REG
3746         NEXT
3747 
3748 /* aam is indeed a divide */
3749 aam:        NEXTBYTE(r3)
3750         lbz r4,AL(state)
3751         cmpwi r3,0
3752         beq- _divide_error      # zero divide
3753         divwu op2,r4,r3         # AL/imm8
3754         SET_FLAGS(FLAGS_LOG(B))     # SF/ZF/PF from AL
3755         mullw r3,op2,r3         # (AL/imm8)*imm8
3756         stb op2,AH(state)
3757         sub result,r4,r3        # AL-imm8*(AL/imm8)
3758         stb result,AL(state)
3759         NEXT                # OF/AF/CF undefined
3760 
3761 _divide_error:  li r3,code_divide_err
3762         b complex
3763 
3764 /* Instructions dealing with segment registers */
3765 pushw_sp_sr:    li r3,SP
3766         rlwinm opreg,opcode,31,27,29
3767         addi r5,state,SELECTORS+2
3768         lhbrx r4,state,r3
3769         lhzx r0,r5,opreg
3770         addi r4,r4,-2
3771         sthbrx r4,state,r3
3772         clrlwi r4,r4,16
3773         sthbrx r0,r4,ssb
3774         NEXT
3775 
3776 pushl_sp_sr:    li r3,SP
3777         rlwinm opreg,opcode,31,27,29
3778         addi r5,state,SELECTORS+2
3779         lhbrx r4,state,r3
3780         lhzx r0,r5,opreg
3781         addi r4,r4,-4
3782         sthbrx r4,state,r3
3783         clrlwi r4,r4,16
3784         stwbrx r0,r4,ssb
3785         NEXT
3786 
3787 movl_sr_mem:    cmpwi opreg,20
3788         addi opreg,opreg,SELECTORS+2
3789         cmpw cr1,base,state     # Only registers are sensitive
3790         bgt- ud             # to word/longword difference
3791         lhzx r0,REG
3792         bne cr1,1f
3793         stwbrx r0,MEM           # Actually a register
3794         NEXT
3795 
3796 movw_sr_mem:    cmpwi opreg,20          # SREG 0 to 5 only
3797         addi opreg,opreg,SELECTORS+2
3798         bgt- ud
3799         lhzx r0,REG
3800 1:      sthbrx r0,MEM
3801         NEXT
3802 
3803 /* Now the instructions that modify the segment registers, note that
3804 move/pop to ss disable interrupts and traps for one instruction ! */
3805 popl_sp_sr: li r6,4
3806         b 1f
3807 popw_sp_sr: li r6,2
3808 1:      li r7,SP
3809         rlwinm opreg,opcode,31,27,29
3810         lhbrx offset,state,r7
3811         addi opreg,opreg,SELBASES
3812         lhbrx r4,ssb,offset     # new selector
3813         add offset,offset,r6
3814         bl _segment_load
3815         sthbrx offset,state,r7      # update sp
3816         cmpwi opreg,8           # is ss ?
3817         stwux r3,REG
3818         stw r4,SELECTORS-SELBASES(opreg)
3819         lwz esb,esbase(state)
3820         bne+ nop
3821         lwz ssb,ssbase(state)       # pop ss
3822         crmove RF,TF            # prevent traps
3823         NEXT
3824 
3825 movw_mem_sr:    cmpwi opreg,20
3826         addi r7,state,SELBASES
3827         bgt- ud
3828         cmpwi opreg,4           # CS illegal
3829         beq- ud
3830         lhbrx r4,MEM
3831         bl _segment_load
3832         stwux r3,r7,opreg
3833         cmpwi opreg,8
3834         stw r4,SELECTORS-SELBASES(r7)
3835         lwz esb,esbase(state)
3836         bne+ nop
3837         lwz ssb,ssbase(state)
3838         crmove RF,TF            # prevent traps
3839         NEXT
3840 
3841         .equ movl_mem_sr, movw_mem_sr
3842 
3843 /* The encoding of les/lss/lds/lfs/lgs is strange, opcode is c4/b2/c5/b4/b5
3844 for es/ss/ds/fs/gs which are sreg 0/2/3/4/5. And obviously there is
3845 no lcs instruction, it's called a far jump. */
3846 
3847 ldlptrl:    lwzux r7,MEM
3848         li r4,4
3849         bl 1f
3850         stwx r7,REG
3851         NEXT
3852 ldlptrw:    lhzux r7,MEM
3853         li r4,2
3854         bl 1f
3855         sthx r7,REG
3856         NEXT
3857 
3858 1:      cmpw base,state
3859         lis r3,0xc011           # es/ss/ds/fs/gs
3860         rlwinm r5,opcode,2,0x0c     # 00/08/04/00/04
3861         mflr r0
3862         addi r3,r3,0x4800       # r4=0xc0114800
3863         rlwimi r5,opcode,0,0x10     # 00/18/04/10/14
3864         lhbrx r4,r4,offset
3865         rlwnm opcode,r3,r5,0x1c     # 00/08/0c/10/14 = sreg*4 !
3866         beq- ud             # Only mem operands allowed !
3867         bl _segment_load
3868         addi r5,opcode,SELBASES
3869         stwux r3,r5,state
3870         mtlr r0
3871         stw r4,SELECTORS-SELBASES(r5)
3872         lwz esb,esbase(state)       # keep shadow state in sync
3873         lwz ssb,ssbase(state)
3874         blr
3875 
3876 /* Intructions that may modify the current code segment: the next optimization
3877  * might be to avoid calling C code when the code segment does not change. But
3878  * it's probably not worth the effort.
3879  */
3880 /* Far calls, jumps and returns */
3881 lcall_w:    NEXTWORD(r4)
3882         NEXTWORD(r5)
3883         li r3,code_lcallw
3884         b complex
3885 
3886 lcall_l:    NEXTDWORD(r4)
3887         NEXTWORD(r5)
3888         li r3,code_lcalll
3889         b complex
3890 
3891 lcallw:     lhbrx r4,MEM
3892         addi offset,offset,2
3893         lhbrx r5,MEM
3894         li r3,code_lcallw
3895         b complex
3896 
3897 lcalll:     lwbrx r4,MEM
3898         addi offset,offset,4
3899         lhbrx r5,MEM
3900         li r3,code_lcalll
3901         b complex
3902 
3903 ljmp_w:     NEXTWORD(r4)
3904         NEXTWORD(r5)
3905         li r3,code_ljmpw
3906         b complex
3907 
3908 ljmp_l:     NEXTDWORD(r4)
3909         NEXTWORD(r5)
3910         li r3,code_ljmpl
3911         b complex
3912 
3913 ljmpw:      lhbrx r4,MEM
3914         addi offset,offset,2
3915         lhbrx r5,MEM
3916         li r3,code_ljmpw
3917         b complex
3918 
3919 ljmpl:      lwbrx r4,MEM
3920         addi offset,offset,4
3921         lhbrx r5,MEM
3922         li r3,code_ljmpl
3923         b complex
3924 
3925 lretw_imm:  NEXTWORD(r4)
3926         b 1f
3927 lretw:      li r4,0
3928 1:      li r3,code_lretw
3929         b complex
3930 
3931 lretl_imm:  NEXTWORD(r4)
3932         b 1f
3933 lretl:      li r4,0
3934 1:      li r3,code_lretl
3935         b complex
3936 
3937 /* Interrupts */
3938 int:        li r3,code_softint      # handled by C code
3939         NEXTBYTE(r4)
3940         b complex
3941 
3942 int3:       li r3,code_int3         # handled by C code
3943         b complex
3944 
3945 into:       EVAL_OF
3946         bf+ OF,nop
3947         li r3,code_into
3948         b complex           # handled by C code
3949 
3950 iretw:      li r3,code_iretw        # handled by C code
3951         b complex
3952 
3953 iretl:      li r3,code_iretl
3954         b complex
3955 
3956 /* Miscellaneous flag control instructions */
3957 clc:        oris flags,flags,(CF_IN_CR|CF_STATE_MASK|ABOVE_IN_CR)>>16
3958         xoris flags,flags,(CF_IN_CR|CF_STATE_MASK|ABOVE_IN_CR)>>16
3959         NEXT
3960 
3961 cmc:        oris flags,flags,(CF_IN_CR|ABOVE_IN_CR)>>16
3962         xoris flags,flags,(CF_IN_CR|CF_COMPLEMENT|ABOVE_IN_CR)>>16
3963         NEXT
3964 
3965 stc:        oris flags,flags,\
3966             (CF_IN_CR|CF_LOCATION|CF_COMPLEMENT|ABOVE_IN_CR)>>16
3967         xoris flags,flags,(CF_IN_CR|CF_LOCATION|ABOVE_IN_CR)>>16
3968         NEXT
3969 
3970 cld:        crclr DF
3971         NEXT
3972 
3973 std:        crset DF
3974         NEXT
3975 
3976 cli:        crclr IF
3977         NEXT
3978 
3979 sti:        crset IF
3980         NEXT
3981 
3982 lahf:       bl _eval_flags
3983         stb r3,AH(state)
3984         NEXT
3985 
3986 sahf:       andis. r3,flags,OF_EXPLICIT>>16
3987         lbz r0,AH(state)
3988         beql+ _eval_of          # save OF just in case
3989         rlwinm op1,r0,31,0x08       # AF
3990         rlwinm flags,flags,0,OF_STATE_MASK
3991         extsb result,r0         # SF/PF
3992         ZF862ZF(r0)
3993         oris flags,flags,(ZF_PROTECT|ZF_IN_CR|SF_IN_CR)>>16
3994         addi op2,op1,0          # AF
3995         ori result,result,0x00fb    # set all except PF
3996         mtcrf 0x02,r0           # SF/ZF
3997         rlwimi flags,r0,27,CF_VALUE # CF
3998         xori result,result,0x00ff   # 00 if PF set, 04 if clear
3999         NEXT
4000 
4001 pushfw_sp:  bl _eval_flags
4002         li r4,SP
4003         lhbrx r5,r4,state
4004         addi r5,r5,-2
4005         sthbrx r5,r4,state
4006         clrlwi r5,r5,16
4007         sthbrx r3,ssb,r5
4008         NEXT
4009 
4010 pushfl_sp:  bl _eval_flags
4011         li r4,SP
4012         lhbrx r5,r4,state
4013         addi r5,r5,-4
4014         sthbrx r5,r4,state
4015         clrlwi r5,r5,16
4016         stwbrx r3,ssb,r5
4017         NEXT
4018 
4019 popfl_sp:   li r4,SP
4020         lhbrx r5,r4,state
4021         lwbrx r3,ssb,r5
4022         addi r5,r5,4
4023         stw r3,eflags(state)
4024         sthbrx r5,r4,state
4025         b 1f
4026 
4027 popfw_sp:   li r4,SP
4028         lhbrx r5,r4,state
4029         lhbrx r3,ssb,r5
4030         addi r5,r5,2
4031         sth r3,eflags+2(state)
4032         sthbrx r5,r4,state
4033 1:      rlwinm op1,r3,31,0x08           # AF
4034         xori result,r3,4            # PF
4035         ZF862ZF(r3)             # cr6
4036         lis flags,(OF_EXPLICIT|ZF_PROTECT|ZF_IN_CR|SF_IN_CR)>>16
4037         addi op2,op1,0              # AF
4038         rlwinm result,result,0,0x04     # PF
4039         rlwimi flags,r3,27,CF_VALUE     # CF
4040         mtcrf 0x6,r3                # IF,DF,TF,SF,ZF
4041         rlwimi result,r3,24,0,0         # SF
4042         rlwimi flags,r3,15,OF_VALUE     # OF
4043         NEXT
4044 
4045 /* SETcc is slightly faster for setz/setnz */
4046 setz:       EVAL_ZF
4047         bt ZF,1f
4048 0:      cmpwi opreg,0
4049         bne- ud
4050         stbx opreg,MEM
4051         NEXT
4052 
4053 setnz:      EVAL_ZF
4054         bt ZF,0b
4055 1:      cmpwi opreg,0
4056         bne- ud
4057         stbx one,MEM
4058         NEXT
4059 
4060 #define SETCC(cond, eval, flag) \
4061 set##cond:  EVAL_##eval; bt flag,1b; b 0b; \
4062 setn##cond: EVAL_##eval; bt flag,0b; b 1b
4063 
4064         SETCC(c, CF, CF)
4065         SETCC(a, ABOVE, ABOVE)
4066         SETCC(s, SF, SF)
4067         SETCC(g, SIGNED, SGT)
4068         SETCC(l, SIGNED, SLT)
4069         SETCC(o, OF, OF)
4070         SETCC(p, PF, PF)
4071 
4072 /* No wait for a 486SX */
4073         .equ wait, nop
4074 
4075 /* ARPL is not recognized in real mode */
4076         .equ arpl, ud
4077 
4078 /* clts and in general control and debug registers are not implemented */
4079         .equ clts, unimpl
4080 
4081 aaa:        lhbrx r0,AX,state
4082         bl _eval_af
4083         rlwinm r3,r3,0,0x10
4084         SET_FLAGS(FLAGS_ADD(W))
4085         rlwimi r3,r0,0,0x0f
4086         li r4,0x106
4087         addi r3,r3,-10
4088         srwi r3,r3,16           # carry ? 0 : 0xffff
4089         andc op1,r4,r3          # carry ? 0x106 : 0
4090         add result,r0,op1
4091         rlwinm result,result,0,28,23    # clear high half of AL
4092         li op2,10           # sets AF indirectly
4093         sthbrx r3,AX,state      # OF/SF/ZF/PF undefined !
4094         rlwimi result,op1,8,0x10000 # insert CF
4095         NEXT
4096 
4097 aas:        lhbrx r0,AX,state
4098         bl _eval_af
4099         rlwinm r3,r3,0,0x10
4100         SET_FLAGS(FLAGS_ADD(W))
4101         rlwimi r3,r0,0,0x0f     # AF:AL&0x0f
4102         li r4,0x106
4103         addi r3,r3,-10
4104         srwi r3,r3,16           # carry ? 0 : 0xffff
4105         andc op1,r4,r3          # carry ? 0x106 : 0
4106         sub result,r0,op1
4107         rlwinm result,result,0,28,23    # clear high half of AL
4108         li op2,10           # sets AF indirectly
4109         sthbrx r3,AX,state      # OF/SF/ZF/PF undefined !
4110         rlwimi result,op1,8,0x10000 # insert CF
4111         NEXT
4112 
4113 daa:        lbz r0,AL(state)
4114         bl _eval_af
4115         rlwinm r7,r3,0,0x10
4116         bl _eval_cf         # r3=CF<<8
4117         rlwimi r7,r0,0,0x0f
4118         SET_FLAGS(FLAGS_ADD(B))
4119         addi r4,r7,-10
4120         rlwinm r4,r4,3,0x06     # 6 if AF or >9, 0 otherwise
4121         srwi op1,r7,1           # 0..4, no AF, 5..f AF set
4122         add r0,r0,r4            # conditional add
4123         li op2,11           # sets AF depnding on op1
4124         or r0,r0,r3
4125         subfic r3,r0,159
4126         rlwinm r3,r3,7,0x60     # mask value to add
4127         add result,r0,r3        # final result for SF/ZF/PF
4128         stb result,AL(state)
4129         rlwimi result,r3,2,0x100    # set CF if added
4130         NEXT
4131 
4132 das:        lbz r0,AL(state)
4133         bl _eval_af
4134         rlwinm r7,r3,0,0x10
4135         bl _eval_cf
4136         rlwimi r7,r0,0,0x0f
4137         SET_FLAGS(FLAGS_ADD(B))
4138         addi r4,r7,-10
4139         rlwinm r4,r4,3,0x06
4140         srwi op1,r7,1           # 0..4, no AF, 5..f AF set
4141         sub r0,r0,r4            # conditional add
4142         li op2,11           # sets AF depending on op1
4143         or r4,r0,r3         # insert CF
4144         addi r3,r4,-160
4145         rlwinm r3,r3,7,0x60     # mask value to add
4146         sub result,r4,r3        # final result for SF/ZF/PF
4147         stb result,AL(state)
4148         rlwimi result,r3,2,0x100    # set CF
4149         NEXT
4150 
4151 /* 486 specific instructions */
4152 
4153 /* For cmpxchg, only the zero flag is important */
4154 
4155 cmpxchgb:   lbz op1,AL(state)
4156         SET_FLAGS(FLAGS_SUB(B)|ZF_IN_CR)
4157         lbzx op2,MEM
4158         cmpw cr6,op1,op2
4159         sub result,op1,op2
4160         bne cr6,1f
4161         lbzx r3,REG         # success: swap
4162         stbx r3,MEM
4163         NEXT
4164 1:      stb op2,AL(state)
4165         NEXT
4166 
4167 cmpxchgw:   lhbrx op1,AX,state
4168         SET_FLAGS(FLAGS_SUB(W)|ZF_IN_CR)
4169         lhbrx op2,MEM
4170         cmpw cr6,op1,op2
4171         sub result,op1,op2
4172         bne cr6,1f
4173         lhzx r3,REG         # success: swap
4174         sthx r3,MEM
4175         NEXT
4176 1:      sthbrx op2,AX,state
4177         NEXT
4178 
4179 cmpxchgl:   lwbrx op1,EAX,state
4180         SET_FLAGS(FLAGS_SUB(L)|ZF_IN_CR|SIGNED_IN_CR)
4181         lwbrx op2,MEM
4182         cmpw cr6,op1,op2
4183         sub result,op1,op2
4184         bne cr6,1f
4185         lwzx r3,REG         # success: swap
4186         stwx r3,MEM
4187         NEXT
4188 1:      stwbrx op2,EAX,state
4189         NEXT
4190 
4191 xaddb:      lbzx op2,MEM
4192         SET_FLAGS(FLAGS_ADD(B))
4193         lbzx op1,REG
4194         add result,op1,op2
4195         stbx result,MEM
4196         stbx op2,REG
4197         NEXT
4198 
4199 xaddw:      lhbrx op2,MEM
4200         SET_FLAGS(FLAGS_ADD(W))
4201         lhbrx op1,REG
4202         add result,op1,op2
4203         sthbrx result,MEM
4204         sthbrx op2,REG
4205         NEXT
4206 
4207 xaddl:      lwbrx op2,MEM
4208         SET_FLAGS(FLAGS_ADD(L))
4209         lwbrx op1,REG
4210         add result,op1,op2
4211         stwbrx result,MEM
4212         stwbrx op2,REG
4213         NEXT
4214 
4215 /* All FPU instructions skipped. This is a 486 SX ! */
4216 esc:        li r3,code_dna          # DNA interrupt
4217         b complex
4218 
4219         .equ hlt, unimpl        # Cannot stop
4220 
4221         .equ invd, unimpl
4222 
4223 /* Undefined in real address mode */
4224         .equ lar, ud
4225 
4226         .equ lgdt, unimpl
4227         .equ lidt, unimpl
4228         .equ lldt, ud
4229         .equ lmsw, unimpl
4230 
4231 /* protected mode only */
4232         .equ lsl, ud
4233         .equ ltr, ud
4234 
4235         .equ movl_cr_reg, unimpl
4236         .equ movl_reg_cr, unimpl
4237         .equ movl_dr_reg, unimpl
4238         .equ movl_reg_dr, unimpl
4239 
4240         .equ sgdt, unimpl
4241 
4242         .equ sidt, unimpl
4243         .equ sldt, ud
4244         .equ smsw, unimpl
4245 
4246         .equ str, ud
4247 
4248 ud:     li r3,code_ud
4249         li r4,0
4250         b complex
4251 
4252 unimpl:     li r3,code_ud
4253         li r4,1
4254         b complex
4255 
4256         .equ verr, ud
4257         .equ verw, ud
4258         .equ wbinvd, unimpl
4259 
4260 em86_end:
4261         .size em86_enter,em86_end-em86_enter
4262 #ifdef  __BOOT__
4263         .data
4264 #define ENTRY(x,t) .long x+t-_jtables
4265 #else
4266         .section .rodata
4267 #define ENTRY(x,t) .long x+t
4268 #endif
4269 
4270 #define BOP(x)  ENTRY(x,2)  /* Byte operation with mod/rm byte */
4271 #define WLOP(x) ENTRY(x,3)  /* 16 or 32 bit operation with mod/rm byte */
4272 #define EXTOP(x) ENTRY(x,0) /* Opcode with extension in mod/rm byte */
4273 #define OP(x)   ENTRY(x,1)  /* Direct one byte opcode/prefix */
4274 
4275 /* A few macros for the main table */
4276 #define gen6(op, wl, axeax) \
4277         BOP(op##b##_reg_mem); WLOP(op##wl##_reg_mem); \
4278         BOP(op##b##_mem_reg); WLOP(op##wl##_mem_reg); \
4279         OP(op##b##_imm_al); OP(op##wl##_imm_##axeax)
4280 
4281 #define rep7(l,t) \
4282         ENTRY(l,t); ENTRY(l,t); ENTRY(l,t); ENTRY(l,t); \
4283         ENTRY(l,t); ENTRY(l,t); ENTRY(l,t)
4284 
4285 #define rep8(l) l ; l; l; l; l; l; l; l;
4286 
4287 #define allcond(pfx, sfx, t) \
4288         ENTRY(pfx##o##sfx, t); ENTRY(pfx##no##sfx, t); \
4289         ENTRY(pfx##c##sfx, t); ENTRY(pfx##nc##sfx, t); \
4290         ENTRY(pfx##z##sfx, t); ENTRY(pfx##nz##sfx, t); \
4291         ENTRY(pfx##na##sfx, t); ENTRY(pfx##a##sfx, t); \
4292         ENTRY(pfx##s##sfx, t); ENTRY(pfx##ns##sfx, t); \
4293         ENTRY(pfx##p##sfx, t); ENTRY(pfx##np##sfx, t); \
4294         ENTRY(pfx##l##sfx, t); ENTRY(pfx##nl##sfx, t); \
4295         ENTRY(pfx##ng##sfx, t); ENTRY(pfx##g##sfx, t)
4296 
4297 /* single/double register sign extensions and other oddities */
4298 #define h2sextw cbw     /* Half to Single sign extension */
4299 #define s2dextw cwd     /* Single to Double sign extension */
4300 #define h2sextl cwde
4301 #define s2dextl cdq
4302 #define j_a16_cxz_w jcxz_w
4303 #define j_a32_cxz_w jecxz_w
4304 #define j_a16_cxz_l jcxz_l
4305 #define j_a32_cxz_l jecxz_l
4306 #define loopa16_w loopw_w
4307 #define loopa16_l loopw_l
4308 #define loopa32_w loopl_w
4309 #define loopa32_l loopl_l
4310 #define loopnza16_w loopnzw_w
4311 #define loopnza16_l loopnzw_l
4312 #define loopnza32_w loopnzl_w
4313 #define loopnza32_l loopnzl_l
4314 #define loopza16_w loopzw_w
4315 #define loopza16_l loopzw_l
4316 #define loopza32_w loopzl_w
4317 #define loopza32_l loopzl_l
4318 /* No FP support */
4319 
4320 /* Addressing mode table */
4321         .align 5
4322 #             (%bx,%si),    (%bx,%di),    (%bp,%si),    (%bp,%di)
4323 adtable:    .long 0x00004360,   0x00004370,   0x80004560,   0x80004570
4324 #             (%si),        (%di),        o16,          (%bx)
4325         .long 0x00004600,   0x00004700,   0x00002000,   0x00004300
4326 #             o8(%bx,%si),  o8(%bx,%di),  o8(%bp,%si),  o8(%bp,%di)
4327         .long 0x00004360,   0x00004370,   0x80004560,   0x80004570
4328 #             o8(%si),      o8(%di),      o8(%bp),      o8(%bx)
4329         .long 0x00004600,   0x00004700,   0x80004500,   0x00004300
4330 #             o16(%bx,%si), o16(%bx,%di), o16(%bp,%si), o16(%bp,%di)
4331         .long 0x00004360,   0x00004370,   0x80004560,   0x80004570
4332 #             o16(%si),     o16(%di),     o16(%bp),     o16(%bx)
4333         .long 0x00004600,   0x00004700,   0x80004500,   0x00004300
4334 #       register addressing modes do not use the table
4335         .long 0, 0, 0, 0, 0, 0, 0, 0
4336 #now 32 bit modes
4337 #             (%eax),       (%ecx),       (%edx),       (%ebx)
4338         .long 0x00004090,   0x00004190,   0x00004290,   0x00004390
4339 #             sib,      o32,      (%esi),       (%edi)
4340         .long 0x00003090,   0x00002090,   0x00004690,   0x00004790
4341 #             o8(%eax),     o8(%ecx),     o8(%edx),     o8(%ebx)
4342         .long 0x00004090,   0x00004190,   0x00004290,   0x00004390
4343 #             sib,      o8(%ebp),     o8(%esi),     o8(%edi)
4344         .long 0x00003090,   0x80004590,   0x00004690,   0x00004790
4345 #             o32(%eax),    o32(%ecx),    o32(%edx),    o32(%ebx)
4346         .long 0x00004090,   0x00004190,   0x00004290,   0x00004390
4347 #             sib,      o32(%ebp),    o32(%esi),    o32(%edi)
4348         .long 0x00003090,   0x80004590,   0x00004690,   0x00004790
4349 #       register addressing modes do not use the table
4350         .long 0, 0, 0, 0, 0, 0, 0, 0
4351 
4352 #define jtable(wl, awl, spesp, axeax, name ) \
4353         .align  5; \
4354 jtab_##name:    gen6(add, wl, axeax); \
4355         OP(push##wl##_##spesp##_sr); \
4356         OP(pop##wl##_##spesp##_sr); \
4357         gen6(or, wl, axeax); \
4358         OP(push##wl##_##spesp##_sr); \
4359         OP(_twobytes); \
4360         gen6(adc, wl, axeax); \
4361         OP(push##wl##_##spesp##_sr); \
4362         OP(pop##wl##_##spesp##_sr); \
4363         gen6(sbb, wl, axeax); \
4364         OP(push##wl##_##spesp##_sr); \
4365         OP(pop##wl##_##spesp##_sr); \
4366         gen6(and, wl, axeax); OP(_es); OP(daa); \
4367         gen6(sub, wl, axeax); OP(_cs); OP(das); \
4368         gen6(xor, wl, axeax); OP(_ss); OP(aaa); \
4369         gen6(cmp, wl, axeax); OP(_ds); OP(aas); \
4370         rep8(OP(inc##wl##_reg)); \
4371         rep8(OP(dec##wl##_reg)); \
4372         rep8(OP(push##wl##_##spesp##_reg)); \
4373         rep8(OP(pop##wl##_##spesp##_reg)); \
4374         OP(pusha##wl##_##spesp); OP(popa##wl##_##spesp); \
4375         WLOP(bound##wl); WLOP(arpl); \
4376         OP(_fs); OP(_gs); OP(_opsize); OP(_adsize); \
4377         OP(push##wl##_##spesp##_imm); WLOP(imul##wl##_imm); \
4378         OP(push##wl##_##spesp##_imm8); WLOP(imul##wl##_imm8); \
4379         OP(insb_##awl); OP(ins##wl##_##awl); \
4380         OP(outsb_##awl); OP(outs##wl##_##awl); \
4381         allcond(sj,_##wl,1); \
4382         EXTOP(grp1b_imm); EXTOP(grp1##wl##_imm); \
4383         EXTOP(grp1b_imm); EXTOP(grp1##wl##_imm8); \
4384         BOP(testb_reg_mem); WLOP(test##wl##_reg_mem); \
4385         BOP(xchgb_reg_mem); WLOP(xchg##wl##_reg_mem); \
4386         BOP(movb_reg_mem); WLOP(mov##wl##_reg_mem); \
4387         BOP(movb_mem_reg); WLOP(mov##wl##_mem_reg); \
4388         WLOP(mov##wl##_sr_mem); WLOP(lea##wl); \
4389         WLOP(mov##wl##_mem_sr); WLOP(pop##wl##_##spesp##_##awl); \
4390         OP(nop); rep7(xchg##wl##_##axeax##_reg,1); \
4391         OP(h2sext##wl); OP(s2dext##wl); \
4392         OP(lcall_##wl); OP(wait); \
4393         OP(pushf##wl##_##spesp); OP(popf##wl##_##spesp); \
4394         OP(sahf); OP(lahf); \
4395         OP(movb_##awl##_al); OP(mov##wl##_##awl##_##axeax); \
4396         OP(movb_al_##awl); OP(mov##wl##_##axeax##_##awl); \
4397         OP(movsb_##awl); OP(movs##wl##_##awl); \
4398         OP(cmpsb_##awl); OP(cmps##wl##_##awl); \
4399         OP(testb_imm_al); OP(test##wl##_imm_##axeax); \
4400         OP(stosb_##awl); OP(stos##wl##_##awl); \
4401         OP(lodsb_##awl); OP(lods##wl##_##awl); \
4402         OP(scasb_##awl); OP(scas##wl##_##awl); \
4403         rep8(OP(movb_imm_reg)); \
4404         rep8(OP(mov##wl##_imm_reg)); \
4405         EXTOP(shiftb_imm); EXTOP(shift##wl##_imm); \
4406         OP(ret##wl##_##spesp##_imm); OP(ret##wl##_##spesp); \
4407         WLOP(ldlptr##wl); WLOP(ldlptr##wl); \
4408         BOP(movb_imm_mem); WLOP(mov##wl##_imm_mem); \
4409         OP(enter##wl##_##spesp); OP(leave##wl##_##spesp); \
4410         OP(lret##wl##_imm); OP(lret##wl); \
4411         OP(int3); OP(int); OP(into); OP(iret##wl); \
4412         EXTOP(shiftb_1); EXTOP(shift##wl##_1); \
4413         EXTOP(shiftb_cl); EXTOP(shift##wl##_cl); \
4414         OP(aam); OP(aad); OP(ud); OP(xlatb_##awl); \
4415         rep8(OP(esc)); \
4416         OP(loopnz##awl##_##wl); OP(loopz##awl##_##wl); \
4417         OP(loop##awl##_##wl); OP(j_##awl##_cxz_##wl); \
4418         OP(inb_port_al); OP(in##wl##_port_##axeax); \
4419         OP(outb_al_port); OP(out##wl##_##axeax##_port); \
4420         OP(call##wl##_##spesp); OP(jmp_##wl); \
4421         OP(ljmp_##wl); OP(sjmp_##wl); \
4422         OP(inb_dx_al); OP(in##wl##_dx_##axeax); \
4423         OP(outb_al_dx); OP(out##wl##_##axeax##_dx); \
4424         OP(_lock); OP(ud); OP(_repnz); OP(_repz); \
4425         OP(hlt); OP(cmc); \
4426         EXTOP(grp3b); EXTOP(grp3##wl); \
4427         OP(clc); OP(stc); OP(cli); OP(sti); \
4428         OP(cld); OP(std); \
4429         EXTOP(grp4b); EXTOP(grp5##wl##_##spesp); \
4430         /* Here we start the table for twobyte instructions */ \
4431         OP(ud); OP(ud); WLOP(lar); WLOP(lsl); \
4432         OP(ud); OP(ud); OP(clts); OP(ud); \
4433         OP(invd); OP(wbinvd); OP(ud); OP(ud); \
4434         OP(ud); OP(ud); OP(ud); OP(ud); \
4435         rep8(OP(ud)); \
4436         rep8(OP(ud)); \
4437         OP(movl_cr_reg); OP(movl_reg_cr); \
4438         OP(movl_dr_reg); OP(movl_reg_dr); \
4439         OP(ud); OP(ud); OP(ud); OP(ud); \
4440         rep8(OP(ud)); \
4441         /* .long    wrmsr, rdtsc, rdmsr, rdpmc; */\
4442         rep8(OP(ud)); \
4443         rep8(OP(ud)); \
4444         /* allcond(cmov, wl); */  \
4445         rep8(OP(ud)); rep8(OP(ud)); \
4446         rep8(OP(ud)); rep8(OP(ud)); \
4447         /* MMX Start */ \
4448         rep8(OP(ud)); rep8(OP(ud)); \
4449         rep8(OP(ud)); rep8(OP(ud)); \
4450         /* MMX End */ \
4451         allcond(j,_##wl, 1); \
4452         allcond(set,,2); \
4453         OP(push##wl##_##spesp##_sr); OP(pop##wl##_##spesp##_sr); \
4454         OP(ud) /* cpuid */; WLOP(bt##wl##_reg_mem); \
4455         WLOP(shld##wl##_imm); WLOP(shld##wl##_cl); \
4456         OP(ud); OP(ud); \
4457         OP(push##wl##_##spesp##_sr); OP(pop##wl##_##spesp##_sr); \
4458         OP(ud) /* rsm */; WLOP(bts##wl##_reg_mem); \
4459         WLOP(shrd##wl##_imm); WLOP(shrd##wl##_cl); \
4460         OP(ud); WLOP(imul##wl##_mem_reg); \
4461         BOP(cmpxchgb); WLOP(cmpxchg##wl); \
4462         WLOP(ldlptr##wl); WLOP(btr##wl##_reg_mem); \
4463         WLOP(ldlptr##wl); WLOP(ldlptr##wl); \
4464         WLOP(movzb##wl); WLOP(movzw##wl); \
4465         OP(ud); OP(ud); \
4466         EXTOP(grp8##wl); WLOP(btc##wl##_reg_mem); \
4467         WLOP(bsf##wl); WLOP(bsr##wl); \
4468         WLOP(movsb##wl); WLOP(movsw##wl); \
4469         BOP(xaddb); WLOP(xadd##wl); \
4470         OP(ud); OP(ud); \
4471         OP(ud); OP(ud); OP(ud); OP(ud); \
4472         rep8(OP(bswap)); \
4473         /* MMX Start */ \
4474         rep8(OP(ud)); rep8(OP(ud)); \
4475         rep8(OP(ud)); rep8(OP(ud)); \
4476         rep8(OP(ud)); rep8(OP(ud)); \
4477         /* MMX End */
4478         .align 5        /* 8kb of tables, 32 byte aligned */
4479 _jtables:   jtable(w, a16, sp, ax, www) /* data16, addr16 */
4480         jtable(l, a16, sp, eax, lww)    /* data32, addr16 */
4481         jtable(w, a32, sp, ax, wlw) /* data16, addr32 */
4482         jtable(l, a32, sp, eax, llw)    /* data32, addr32 */
4483 /* The other possible combinations are only required by protected mode
4484 code using a big stack segment */
4485 /* Here are the auxiliary tables for opcode extensions, note that
4486 all entries get 2 or 3 added. */
4487 #define grp1table(bwl,t,s8) \
4488 grp1##bwl##_imm##s8:; \
4489         ENTRY(add##bwl##_imm##s8,t); ENTRY(or##bwl##_imm##s8,t); \
4490         ENTRY(adc##bwl##_imm##s8,t); ENTRY(sbb##bwl##_imm##s8,t); \
4491         ENTRY(and##bwl##_imm##s8,t); ENTRY(sub##bwl##_imm##s8,t); \
4492         ENTRY(xor##bwl##_imm##s8,t); ENTRY(cmp##bwl##_imm##s8,t)
4493 
4494         grp1table(b,2,)
4495         grp1table(w,3,)
4496         grp1table(w,3,8)
4497         grp1table(l,3,)
4498         grp1table(l,3,8)
4499 
4500 #define shifttable(bwl,t,c) \
4501 shift##bwl##_##c:; \
4502         ENTRY(rol##bwl##_##c,t); ENTRY(ror##bwl##_##c,t); \
4503         ENTRY(rcl##bwl##_##c,t); ENTRY(rcr##bwl##_##c,t); \
4504         ENTRY(shl##bwl##_##c,t); ENTRY(shr##bwl##_##c,t); \
4505         OP(ud); ENTRY(sar##bwl##_##c,t)
4506 
4507         shifttable(b,2,1)
4508         shifttable(w,3,1)
4509         shifttable(l,3,1)
4510 
4511         shifttable(b,2,cl)
4512         shifttable(w,3,cl)
4513         shifttable(l,3,cl)
4514 
4515         shifttable(b,2,imm)
4516         shifttable(w,3,imm)
4517         shifttable(l,3,imm)
4518 
4519 #define grp3table(bwl,t) \
4520 grp3##bwl:  ENTRY(test##bwl##_imm,t); OP(ud); \
4521         ENTRY(not##bwl,t); ENTRY(neg##bwl,t); \
4522         ENTRY(mul##bwl,t); ENTRY(imul##bwl,t); \
4523         ENTRY(div##bwl,t); ENTRY(idiv##bwl,t)
4524 
4525         grp3table(b,2)
4526         grp3table(w,3)
4527         grp3table(l,3)
4528 
4529 grp4b:      BOP(incb); BOP(decb); \
4530         OP(ud); OP(ud); \
4531         OP(ud); OP(ud); \
4532         OP(ud); OP(ud)
4533 
4534 #define grp5table(wl,spesp) \
4535 grp5##wl##_##spesp: \
4536         WLOP(inc##wl); WLOP(dec##wl); \
4537         WLOP(call##wl##_##spesp##_mem); WLOP(lcall##wl##); \
4538         WLOP(jmp##wl); WLOP(ljmp##wl); \
4539         WLOP(push##wl##_##spesp); OP(ud)
4540 
4541         grp5table(w,sp)
4542         grp5table(l,sp)
4543 
4544 #define grp8table(wl) \
4545 grp8##wl:   OP(ud); OP(ud); OP(ud); OP(ud); \
4546         WLOP(bt##wl##_imm); WLOP(bts##wl##_imm); \
4547         WLOP(btr##wl##_imm); WLOP(btc##wl##_imm)
4548 
4549         grp8table(w)
4550         grp8table(l)
4551 #ifdef __BOOT__
4552 _endjtables:    .long   0   /* Points to _jtables after relocation */
4553 #endif