Back to home page

LXR

 
 

    


File indexing completed on 2025-05-11 08:24:24

0001 /**
0002  *  @file
0003  *
0004  *  @brief Test FPU/SSE Context Save and Restore
0005  */
0006 
0007 /*
0008  * Authorship
0009  * ----------
0010  * This software was created by
0011  *     Till Straumann <strauman@slac.stanford.edu>, 2009,
0012  *     Stanford Linear Accelerator Center, Stanford University.
0013  *
0014  * Acknowledgement of sponsorship
0015  * ------------------------------
0016  * This software was produced by
0017  *     the Stanford Linear Accelerator Center, Stanford University,
0018  *     under Contract DE-AC03-76SFO0515 with the Department of Energy.
0019  *
0020  * Government disclaimer of liability
0021  * ----------------------------------
0022  * Neither the United States nor the United States Department of Energy,
0023  * nor any of their employees, makes any warranty, express or implied, or
0024  * assumes any legal liability or responsibility for the accuracy,
0025  * completeness, or usefulness of any data, apparatus, product, or process
0026  * disclosed, or represents that its use would not infringe privately owned
0027  * rights.
0028  *
0029  * Stanford disclaimer of liability
0030  * --------------------------------
0031  * Stanford University makes no representations or warranties, express or
0032  * implied, nor assumes any liability for the use of this software.
0033  *
0034  * Stanford disclaimer of copyright
0035  * --------------------------------
0036  * Stanford University, owner of the copyright, hereby disclaims its
0037  * copyright and all other rights in this software.  Hence, anyone may
0038  * freely use it for any purpose without restriction.
0039  *
0040  * Maintenance of notices
0041  * ----------------------
0042  * In the interest of clarity regarding the origin and status of this
0043  * SLAC software, this and all the preceding Stanford University notices
0044  * are to remain affixed to any copy or derivative of this software made
0045  * or distributed by the recipient and are to be affixed to any copy of
0046  * software made or distributed by the recipient that contains a copy or
0047  * derivative of this software.
0048  *
0049  * ------------------ SLAC Software Notices, Set 4 OTT.002a, 2004 FEB 03
0050  */
0051 
0052 
0053 /* Code for testing FPU/SSE context save/restore across exceptions
0054  * (including interrupts).
0055  *
0056  * There are two tasks and an IRQ/EXC handler involved. One task (LP)
0057  * is of lower priority than the other (HP) task.
0058  *
0059  * 1) LP task sets up a context area in memory (known contents; every
0060  *    register is loaded with different values)
0061  *
0062  * 2) LP task
0063  *       2a saves original FP/SSE context
0064  *       2b loads context from 1) into FPU/SSE
0065  *       2c raises an exception or interrupt
0066  *
0067  *   *  (2d save FPU/SSE context after irq/exception returns to
0068  *          separate area for verification
0069  *       2e reload original FP/SSE context.)
0070  *
0071  *   * All these five steps are coded in assembly to prevent
0072  *     gcc from manipulating the FP/SSE state. The last two steps,
0073  *     however, are effectively executed during 6 when control is
0074  *     returned to the LP task.
0075  *
0076  * 3) IRQ/EXC handler OS wrapper saves context, initializes FPU and
0077  *    MXCSR.
0078  *
0079  * 4) user (our) irq/exc handler clears exception condition, clobbers
0080  *    FPU and XMM regs and finally releases a semaphore on which HP
0081  *    task is waiting.
0082  *
0083  * 5) context switch to HP task. HP task clobbers FPU and XMM regs.
0084  *    Then it tries to re-acquire the synchronization semaphore and
0085  *    blocks.
0086  *
0087  * 6) task switch back to (interrupted) LP task. Original context is
0088  *    restored and verified against the context that was setup in 1).
0089  *
0090  *
0091  * Three methods for interrupting the LP task are tested
0092  *
0093  *  a) FP exception (by setting an exception status in the context from 1)
0094  *  b) SSE exception (by computing the square root of a vector of negative
0095  *     numbers.
0096  *  c) IRQ (software IRQ via 'INT xx' instruction)
0097  *
0098  */
0099 
0100 #ifdef HAVE_CONFIG_H
0101 #include "config.h"
0102 #endif
0103 
0104 #ifdef __rtems__
0105 #include <rtems.h>
0106 #include <rtems/score/cpu.h>
0107 #include <rtems/irq.h>
0108 #include <rtems/error.h>
0109 #endif
0110 
0111 #include <inttypes.h>
0112 #include <stdio.h>
0113 #include <stdlib.h>
0114 #include <string.h>
0115 #include <math.h>
0116 
0117 /* This is currently hardcoded (int xx opcode requires immediate operand) */
0118 #define SSE_TEST_IRQ  10
0119 
0120 typedef uint8_t   __v8  __attribute__((vector_size(16)));
0121 typedef uint32_t __v32  __attribute__((vector_size(16)));
0122 typedef float     __vf  __attribute__((vector_size(16)));
0123 
0124 #ifndef __rtems__
0125 /* Clone of what is defined in rtems/score/cpu.h (for testing under linux) */
0126 typedef struct Context_Control_sse {
0127   uint16_t  fcw;
0128   uint16_t  fsw;
0129   uint8_t   ftw;
0130   uint8_t   res_1;
0131   uint16_t  fop;
0132   uint32_t  fpu_ip;
0133   uint16_t  cs;
0134   uint16_t  res_2;
0135   uint32_t  fpu_dp;
0136   uint16_t  ds;
0137   uint16_t  res_3;
0138   uint32_t  mxcsr;
0139   uint32_t  mxcsr_mask;
0140   struct {
0141     uint8_t fpreg[10];
0142     uint8_t res_4[ 6];
0143   } fp_mmregs[8];
0144   uint8_t   xmmregs[8][16];
0145   uint8_t   res_5[224];
0146 } Context_Control_sse
0147 __attribute__((aligned(16)))
0148 ;
0149 #endif
0150 
0151 #define MXCSR_FZ          (1<<15)   /* Flush to zero */
0152 #define MXCSR_RC(x) (((x)&3)<<13)   /* Rounding ctrl */
0153 #define MXCSR_PM          (1<<12)   /* Precision msk */
0154 #define MXCSR_UM          (1<<11)   /* Underflow msk */
0155 #define MXCSR_OM          (1<<10)   /* Overflow  msk */
0156 #define MXCSR_ZM          (1<< 9)   /* Divbyzero msk */
0157 #define MXCSR_DM          (1<< 8)   /* Denormal  msk */
0158 #define MXCSR_IM          (1<< 7)   /* Invalidop msk */
0159 #define MXCSR_DAZ         (1<< 6)   /* Denorml are 0 */
0160 #define MXCSR_PE          (1<< 5)   /* Precision flg */
0161 #define MXCSR_UE          (1<< 4)   /* Underflow flg */
0162 #define MXCSR_OE          (1<< 3)   /* Overflow  flg */
0163 #define MXCSR_ZE          (1<< 2)   /* Divbyzero flg */
0164 #define MXCSR_DE          (1<< 1)   /* Denormal  flg */
0165 #define MXCSR_IE          (1<< 0)   /* Invalidop flg */
0166 
0167 #define MXCSR_ALLM (MXCSR_PM | MXCSR_UM | MXCSR_OM | MXCSR_ZM | MXCSR_DM | MXCSR_IM)
0168 #define MXCSR_ALLE (MXCSR_PE | MXCSR_UE | MXCSR_OE | MXCSR_ZE | MXCSR_DE | MXCSR_IE)
0169 
0170 #define FPSR_B            (1<<15)   /* FPU busy      */
0171 #define FPSR_C3           (1<<14)   /* Cond code C3  */
0172 #define FPSR_TOP(x) (((x)&7)<<11)   /* TOP           */
0173 #define FPSR_C2           (1<<10)   /* Cond code C2  */
0174 #define FPSR_C1           (1<< 9)   /* Cond code C1  */
0175 #define FPSR_C0           (1<< 8)   /* Cond code C0  */
0176 #define FPSR_ES           (1<< 7)   /* Error summary */
0177 #define FPSR_SF           (1<< 6)   /* Stack fault   */
0178 #define FPSR_PE           (1<< 5)   /* Precision flg */
0179 #define FPSR_UE           (1<< 4)   /* Underflow flg */
0180 #define FPSR_OE           (1<< 3)   /* Overflow  flg */
0181 #define FPSR_ZE           (1<< 2)   /* Divbyzero flg */
0182 #define FPSR_DE           (1<< 1)   /* Denormal  flg */
0183 #define FPSR_IE           (1<< 0)   /* Invalidop flg */
0184 
0185 #define FPCW_X            (1<<12)   /* Infinity ctrl */
0186 #define FPCW_RC(x)  (((x)&3)<<10)   /* Rounding ctrl */
0187 #define FPCW_PC(x)  (((x)&3)<< 8)   /* Precision ctl */
0188 #define FPCW_PM           (1<< 5)   /* Precision msk */
0189 #define FPCW_UM           (1<< 4)   /* Underflow msk */
0190 #define FPCW_OM           (1<< 3)   /* Overflow  msk */
0191 #define FPCW_ZM           (1<< 2)   /* Divbyzero msk */
0192 #define FPCW_DM           (1<< 1)   /* Denormal  msk */
0193 #define FPCW_IM           (1<< 0)   /* Invalidop msk */
0194 
0195 #define FPCW_ALLM (FPCW_PM | FPCW_UM | FPCW_OM | FPCW_ZM | FPCW_DM | FPCW_IM)
0196 #define FPSR_ALLE (FPSR_ES | FPSR_SF | FPSR_PE | FPSR_UE | FPSR_OE | FPSR_ZE | FPSR_DE | FPSR_IE)
0197 
0198 /* Store 'double' into 80-bit register image */
0199 void
0200 fp_st1(uint8_t (*p_dst)[10], double v)
0201 {
0202     asm volatile("fstpt %0":"=m"(*p_dst):"t"(v):"st");
0203 }
0204 
0205 /* Store 'double' into 80-bit register image #i in context */
0206 void
0207 fp_st(Context_Control_sse *p_ctxt, int i, double v)
0208 {
0209     fp_st1(&p_ctxt->fp_mmregs[i].fpreg,v);
0210 }
0211 
0212 /* Load 'double' from 80-bit register image */
0213 double
0214 fp_ld1(uint8_t (*p_src)[10])
0215 {
0216 double v;
0217 
0218     asm volatile("fldt %1":"=t"(v):"m"((*p_src)[0]),"m"(*p_src));
0219     return v;
0220 }
0221 
0222 /* Load 'double' from 80-bit register image #i in context */
0223 double
0224 fp_ld(Context_Control_sse *p_ctxt, int i)
0225 {
0226     return fp_ld1(&p_ctxt->fp_mmregs[i].fpreg);
0227 }
0228 
0229 #define FPUCLOBBER \
0230     "st","st(1)","st(2)","st(3)",   \
0231     "st(4)","st(5)","st(6)","st(7)",\
0232     "fpsr","memory"
0233 
0234 /* There seems to be no way to say that mxcsr was clobbered */
0235 
0236 #define SSECLOBBER \
0237     "xmm0","xmm1","xmm2","xmm3",    \
0238     "xmm4","xmm5","xmm6","xmm7"
0239 
0240 static void
0241 sse_clobber(uint32_t x)
0242 {
0243 __v32 v = { x, x, x, x };
0244     asm volatile (
0245         "   movdqa %0,     %%xmm0      \n"
0246         "   movdqa %%xmm0, %%xmm1      \n"
0247         "   movdqa %%xmm0, %%xmm2      \n"
0248         "   movdqa %%xmm0, %%xmm3      \n"
0249         "   movdqa %%xmm0, %%xmm4      \n"
0250         "   movdqa %%xmm0, %%xmm5      \n"
0251         "   movdqa %%xmm0, %%xmm6      \n"
0252         "   movdqa %%xmm0, %%xmm7      \n"
0253         :
0254         :"m"(v)
0255         :SSECLOBBER
0256     );
0257 }
0258 
0259 void
0260 all_clobber(uint32_t v1, uint32_t v2);
0261 
0262 __asm__ (
0263 "all_clobber:               \n"
0264 "   finit                   \n"
0265 "   movq  0(%esp), %xmm0    \n"
0266 "   punpcklqdq %xmm0, %xmm0 \n"
0267 "   movdqa %xmm0, %xmm1     \n"
0268 "   movdqa %xmm0, %xmm2     \n"
0269 "   movdqa %xmm0, %xmm3     \n"
0270 "   movdqa %xmm0, %xmm4     \n"
0271 "   movdqa %xmm0, %xmm5     \n"
0272 "   movdqa %xmm0, %xmm6     \n"
0273 "   movdqa %xmm0, %xmm7     \n"
0274 "   ret                     \n"
0275 );
0276 
0277 /* Clear FPU and save FPU/SSE registers to context area */
0278 
0279 void
0280 init_ctxt(Context_Control_sse *p_ctxt);
0281 
0282 __asm__ (
0283 "init_ctxt:            \n"
0284 "   finit              \n"
0285 "   mov    4(%esp), %eax\n"
0286 "   fxsave (%eax)      \n"
0287 "   fwait              \n"
0288 "   ret                \n"
0289 );
0290 
0291 /* Save FPU/SSE registers to context area */
0292 
0293 static void
0294 stor_ctxt(Context_Control_sse *p_ctxt)
0295 {
0296     memset(p_ctxt, 0, sizeof(*p_ctxt));
0297     asm volatile(
0298 /*      "   finit                \n" */
0299         "   fxsave %0            \n"
0300         "   fwait                \n"
0301         : "=m"(*p_ctxt)
0302         :
0303         : FPUCLOBBER
0304     );
0305 }
0306 
0307 #define H08 "0x%02"PRIx8
0308 #define H16 "0x%04"PRIx16
0309 #define H32 "0x%08"PRIx32
0310 
0311 #define F16 "mismatch ("H16" != "H16")\n"
0312 
0313 #define FLDCMP(fld, fmt) \
0314     if ( a->fld != b->fld ) { \
0315         rval = 1;             \
0316         if ( !quiet )         \
0317            fprintf(stderr,#fld" mismatch ("fmt" != "fmt")\n",a->fld, b->fld); \
0318     }
0319 
0320 #define FLTCMP(i)                                   \
0321     do {                                            \
0322         if (   ( (a->ftw ^ b->ftw) & (1<<i))        \
0323             || ( (a->ftw & b->ftw  & (1<<i)) &&     \
0324                  memcmp(a->fp_mmregs[i].fpreg,      \
0325                     b->fp_mmregs[i].fpreg,          \
0326                     sizeof(a->fp_mmregs[i].fpreg))  \
0327                )                                    \
0328            ) {                                      \
0329             rval = 1;                               \
0330             if ( !quiet ) {                         \
0331               double fa = fp_ld(a, i);              \
0332               double fb = fp_ld(b, i);              \
0333               if ( ((a->ftw ^ b->ftw) & (1<<i)) )   \
0334                 fprintf(stderr,"fpreg[%u] TAG mismatch (%u != %u)\n",i,(a->ftw & (1<<i)) ? 1 : 0,(b->ftw & (1<<i)) ? 1 : 0); \
0335               else                                  \
0336                 fprintf(stderr,"fpreg[%u] mismatch (%g != %g)\n",i,fa,fb); \
0337             }                                       \
0338         }                                           \
0339     } while (0) 
0340 
0341 #define XMMCMP(i)                                   \
0342     do {                                            \
0343         if ( memcmp(&a->xmmregs[i],                 \
0344                     &b->xmmregs[i],                 \
0345                     sizeof(a->xmmregs[i]))          \
0346            ) {                                      \
0347             rval = 1;                               \
0348             if ( !quiet ) {                         \
0349               int _jj;                              \
0350               fprintf(stderr,"xmmreg[%u] mismatch:\n", i); \
0351               fprintf(stderr,"    ");               \
0352               for (_jj=0; _jj<16; _jj++)            \
0353                 fprintf(stderr,"%02x ",a->xmmregs[i][_jj]); \
0354               fprintf(stderr,"\n !=\n");            \
0355               fprintf(stderr,"    ");               \
0356               for (_jj=0; _jj<16; _jj++)            \
0357                 fprintf(stderr,"%02x ",b->xmmregs[i][_jj]); \
0358               fprintf(stderr,"\n");                 \
0359             }                                       \
0360         }                                           \
0361     } while (0) 
0362 
0363 
0364 /* Compare two FPU/SSE context areas and flag differences;
0365  * RETURNS: zero if the contexts match and nonzero otherwise
0366  */
0367 static int
0368 cmp_ctxt(Context_Control_sse *a, Context_Control_sse *b, int quiet)
0369 {
0370 int rval = 0;
0371 int i;
0372     FLDCMP(fcw,H16);
0373     FLDCMP(fsw,H16);
0374     FLDCMP(ftw,H08);
0375     FLDCMP(fop,H16);
0376     FLDCMP(fpu_ip,H32);
0377     FLDCMP(cs,H16);
0378     FLDCMP(fpu_dp,H32);
0379     FLDCMP(ds,H16);
0380     FLDCMP(mxcsr,H32);
0381     FLDCMP(mxcsr_mask,H32);
0382     for ( i=0; i<8; i++ ) {
0383         FLTCMP(i);
0384     }
0385     for ( i=0; i<8; i++ ) {
0386         XMMCMP(i);
0387     }
0388     return rval;
0389 }
0390 
0391 /* Possible arguments to exc_raise() */
0392 
0393 #define FP_EXC   0
0394 #define IRQ_EXC  1
0395 #define SSE_EXC -1
0396 
0397 /* Check stack alignment by raising the interrupt from a
0398  * non-16-byte aligned section of code. The exception/IRQ
0399  * handler must align the stack and SSE context area
0400  * properly or it will crash.
0401  */
0402 #define __INTRAISE(x) " int  $32+"#x" \n"
0403 #define INTRAISE(x)   __INTRAISE(x)
0404 
0405 __asm__ (
0406 "do_raise:               \n"
0407 "   fwait                \n"
0408 "   test    %eax, %eax   \n"
0409 "   je      2f           \n"
0410 "   jl      1f           \n"
0411 INTRAISE(SSE_TEST_IRQ)
0412 "   jmp     2f           \n"
0413 "1: sqrtps  %xmm0, %xmm0 \n"
0414 "2:                      \n"
0415 "   ret                  \n"
0416 );
0417 
0418 #define SSE_TEST_HP_FAILED       1
0419 #define SSE_TEST_FSPR_FAILED     2
0420 #define SSE_TEST_CTXTCMP_FAILED  4
0421 
0422 static const char *fail_msgs[] = {
0423     "Seems that HP task was not executing",
0424     "FPSR 'Invalid-operation' flag should be clear",
0425     "Restored context does NOT match the saved one",
0426 };
0427 
0428 static void prstat(int st, const char *where)
0429 {
0430 int i,msk;
0431     for ( i=0, msk=1; i<sizeof(fail_msgs)/sizeof(fail_msgs[0]); i++, msk<<=1 ) {
0432         if ( (st & msk) ) {
0433             fprintf(stderr,"sse_test ERROR: %s (testing: %s)\n", fail_msgs[i], where);
0434         }
0435     }
0436 }
0437 
0438 int                 sse_test_debug   = 0;
0439 
0440 static int
0441 exc_raise(int kind)
0442 {
0443 Context_Control_sse nctxt;
0444 Context_Control_sse octxt;
0445 Context_Control_sse orig_ctxt;
0446 int                 i,j,rval;
0447 double              s2;
0448 uint16_t            fsw;
0449 __vf                f4  = { -1., -2., -3., -4. };
0450 __vf                tmp;
0451 __v32               sgn = { (1<<31), (1<<31), (1<<31), (1<<31) };
0452 
0453     stor_ctxt(&octxt);
0454 
0455     octxt.fsw   &= ~FPSR_ALLE;
0456     octxt.mxcsr &= ~MXCSR_ALLE;
0457 
0458     for ( i=0; i<8; i++ ) {
0459         fp_st(&octxt, i, (double)i+0.1);
0460         for (j=0; j<16; j++) {
0461             octxt.xmmregs[i][j]=(i<<4)+j;
0462         }
0463     }
0464 
0465 
0466     if ( SSE_EXC == kind ) {
0467         memcpy(octxt.xmmregs[0], &f4, sizeof(f4));
0468         octxt.mxcsr &= ~MXCSR_IM;
0469     }
0470 
0471     /* set tags to 'valid'            */
0472     octxt.ftw = 0xff;
0473 
0474     /* enable 'invalid arg' exception */
0475     octxt.fcw &= ~ ( FPCW_IM );
0476     
0477     if ( FP_EXC == kind ) {
0478         octxt.fsw |=   ( FPSR_IE | FPSR_ES );
0479     }
0480 
0481     if ( sse_test_debug )
0482         printk("RAISE (fsw was 0x%04x)\n", orig_ctxt.fsw);
0483     asm volatile(
0484         "   fxsave  %2           \n"
0485 #ifdef __rtems__
0486         "   movl    %4, sse_test_check\n"
0487 #endif
0488         "   fxrstor %3           \n"
0489         "   call    do_raise     \n"
0490 #ifdef __rtems__
0491         "   movl    sse_test_check, %1\n"
0492 #else
0493         "   movl    $0, %1       \n"
0494 #endif
0495 #ifdef TEST_MISMATCH
0496         "   pxor %%xmm0, %%xmm0  \n"
0497 #endif
0498         "   fxsave  %0           \n"
0499         "   fxrstor %2           \n"
0500     : "=m"(nctxt),"=&r"(rval),"=m"(orig_ctxt)
0501     : "m"(octxt), "i"(SSE_TEST_HP_FAILED),"a"(kind)
0502     : "xmm0"
0503     );
0504 
0505     if ( ( FPSR_IE & nctxt.fsw ) ) {
0506         rval |= SSE_TEST_FSPR_FAILED;
0507     }
0508     if ( FP_EXC == kind )
0509         nctxt.fsw |= (FPSR_IE | FPSR_ES);
0510     else if ( SSE_EXC == kind ) {
0511         tmp = __builtin_ia32_sqrtps( (__vf)(~sgn & (__v32)f4) );
0512         /* sqrt raises PE; just clear it */
0513         nctxt.mxcsr &= ~MXCSR_PE;
0514         memcpy( octxt.xmmregs[0], &tmp, sizeof(tmp) );
0515     }
0516 
0517     if ( cmp_ctxt(&nctxt, &octxt, 0) ) {
0518         rval |= SSE_TEST_CTXTCMP_FAILED;
0519     }
0520 
0521     s2 = sqrt(2.0);
0522 
0523     asm volatile("fstsw %0":"=m"(fsw));
0524 
0525     if ( sse_test_debug )
0526         printf("sqrt(2): %f (FSTW: 0x%02"PRIx16")\n", sqrt(2.0), fsw);
0527 
0528     return rval;
0529 }
0530 
0531 #ifdef __rtems__
0532 static void
0533 sse_test_ehdl(CPU_Exception_frame *p_f);
0534 
0535 rtems_id            sse_test_sync    = 0;
0536 cpuExcHandlerType   sse_test_ohdl    = 0;
0537 
0538 CPU_Exception_frame *sse_test_frame  = 0;
0539 volatile int        sse_test_check   = SSE_TEST_HP_FAILED;
0540 unsigned            sse_tests        = 0;
0541 
0542 rtems_task
0543 sse_test_hp_task(rtems_task_argument arg)
0544 {
0545 rtems_id sync = (rtems_id)arg;
0546 
0547 uint16_t            fp_cw;
0548 uint32_t            mxcsr;
0549 rtems_status_code   sc;
0550 const char *        msgs[] = {"FPU_EXC", "SSE_EXC", "IRQ_EXC"};
0551 int                 i;
0552 
0553     /* verify that FPU control word is default value */
0554     asm volatile("fstcw %0":"=m"(fp_cw));
0555     if ( fp_cw != _CPU_Null_fp_context.fpucw ) {
0556         fprintf(
0557             stderr,
0558             "ERROR: FPU CW initialization mismatch: got 0x%04"PRIx16"; expected 0x%04"PRIx16"\n",
0559             fp_cw,
0560             _CPU_Null_fp_context.fpucw
0561         );
0562     }
0563 
0564     /* check MXCSR default value                     */
0565     asm volatile("stmxcsr %0":"=m"(mxcsr));
0566     if ( mxcsr != _CPU_Null_fp_context.mxcsr ) {
0567         fprintf(
0568             stderr,
0569             "ERROR: MXCSR initialization mismatch: got 0x%08"PRIx32"; expected 0x%08"PRIx32"\n",
0570             mxcsr,
0571             _CPU_Null_fp_context.mxcsr
0572         );
0573     }
0574 
0575 
0576     for (i=0; i<sizeof(msgs)/sizeof(msgs[0]); i++ ) {
0577         if ( ( sse_tests & (1<<i) ) ) {
0578             if ( sse_test_debug )
0579                 printk("HP task will now block for %s\n",msgs[i]);
0580 
0581             /* Blocking here lets the low-priority task continue */
0582             sc = rtems_semaphore_obtain(sync, RTEMS_WAIT, 500);
0583 
0584             all_clobber(0xaffeaffe, 0xcafecafe);
0585 
0586             if ( RTEMS_SUCCESSFUL != sc ) {
0587                 rtems_error(sc,"ERROR: sse_test hp task wasn't notified of exception\n");
0588                 goto bail;
0589             }
0590 
0591             /* set flag indicating that we executed until here */
0592             sse_test_check = 0;
0593         }
0594     }
0595 
0596 bail:
0597     rtems_task_suspend(RTEMS_SELF);
0598 }
0599 
0600 /* Flags to skip individual tests */
0601 #define SSE_TEST_FPU_EXC  (1<<0)
0602 #define SSE_TEST_SSE_EXC  (1<<1)
0603 #define SSE_TEST_IRQ_EXC  (1<<2)
0604 
0605 #define SSE_TEST_ALL      7
0606 
0607 /* If this flag is given the executing task is not deleted
0608  * when the test finishes. This is useful if you want to
0609  * execute from a shell or similar.
0610  */
0611 #define SSE_TEST_NO_DEL    (1<<0)
0612 
0613 /* Task arg is bitmask of these flags */
0614 rtems_task
0615 sse_test_lp_task(rtems_task_argument arg)
0616 {
0617 rtems_id                hp_task = 0;
0618 rtems_status_code       sc;
0619 rtems_task_priority     pri;
0620 uint16_t                fp_cw,fp_cw_set;
0621 uint32_t                mxcsr, mxcsr_set;
0622 rtems_irq_connect_data  irqd;
0623 int                     flags = (int)arg;
0624 int                     st;
0625 int                     errs = 0;
0626 
0627     sse_tests = SSE_TEST_ALL & ~(flags>>1);
0628 
0629     sse_test_ohdl = 0;
0630 
0631     fp_cw_set = _CPU_Null_fp_context.fpucw | FPCW_RC(3) ;
0632     mxcsr_set = _CPU_Null_fp_context.mxcsr | MXCSR_RC(3) ;
0633     asm volatile("ldmxcsr %0"::"m"(mxcsr_set));
0634     asm volatile("fldcw   %0"::"m"(fp_cw_set));
0635 
0636     sc = rtems_semaphore_create(
0637             rtems_build_name('s','s','e','S'),
0638             0,
0639             RTEMS_SIMPLE_BINARY_SEMAPHORE,
0640             0,
0641             &sse_test_sync
0642         );
0643     if ( RTEMS_SUCCESSFUL != sc ) {
0644         rtems_error(sc, "sse_test ERROR: creation of 'sync' semaphore failed");
0645         errs++;
0646         goto bail;
0647     }
0648 
0649     rtems_task_set_priority( RTEMS_SELF, RTEMS_CURRENT_PRIORITY, &pri );
0650 
0651     sc = rtems_task_create(
0652             rtems_build_name('s','s','e','H'),
0653             pri - 2,
0654             20000,
0655             RTEMS_DEFAULT_MODES,
0656             RTEMS_FLOATING_POINT,
0657             &hp_task
0658         );
0659     if ( RTEMS_SUCCESSFUL != sc ) {
0660         hp_task = 0;
0661         rtems_error( sc, "sse_test ERROR: creation of high-priority task failed");
0662         errs++;
0663         goto bail;
0664     }
0665 
0666     sc = rtems_task_start( hp_task, sse_test_hp_task, (rtems_task_argument)sse_test_sync );
0667     if ( RTEMS_SUCCESSFUL != sc ) {
0668         rtems_error( sc, "sse_test ERROR: start of high-priority task failed");
0669         goto bail;
0670     }
0671 
0672     /* Test if FP/SSE context is saved/restored across an exception */
0673     sse_test_ohdl      = _currentExcHandler;
0674     _currentExcHandler = sse_test_ehdl;
0675 
0676     if ( (sse_tests & SSE_TEST_FPU_EXC) ) {
0677         if ( (st = exc_raise(FP_EXC)) ) {
0678             prstat(st,"FP_EXC");
0679             errs++;
0680         }
0681 
0682         /* Test modified FPCW/MXCSR */
0683         asm volatile("fstcw   %0":"=m"(fp_cw));
0684         asm volatile("stmxcsr %0":"=m"(mxcsr));
0685         mxcsr &= ~(MXCSR_ALLE);
0686         if ( fp_cw != fp_cw_set ) {
0687             fprintf(stderr,"sse_test ERROR: FPCW mismatch (after FP_EXC): expected 0x%04"PRIx16", got 0x%04"PRIx16"\n", fp_cw_set, fp_cw);
0688             errs++;
0689         }
0690         if ( mxcsr != mxcsr_set ) {
0691             fprintf(stderr,"sse_test ERROR: MXCSR mismatch (after FP_EXC): expected 0x%08"PRIx32", got 0x%08"PRIx32"\n", mxcsr_set, mxcsr);
0692             errs++;
0693         }
0694     }
0695 
0696     if ( (sse_tests & SSE_TEST_SSE_EXC) ) {
0697         if ( (st = exc_raise(SSE_EXC)) ) {
0698             prstat(st, "SSE_EXC");
0699             errs++;
0700         }
0701 
0702         /* Test modified FPCW/MXCSR */
0703         asm volatile("fstcw   %0":"=m"(fp_cw));
0704         asm volatile("stmxcsr %0":"=m"(mxcsr));
0705         mxcsr &= ~(MXCSR_ALLE);
0706         if ( fp_cw != fp_cw_set ) {
0707             fprintf(stderr,"sse_test ERROR: FPCW mismatch (after SSE_EXC): expected 0x%04"PRIx16", got 0x%04"PRIx16"\n", fp_cw_set, fp_cw);
0708             errs++;
0709         }
0710         if ( mxcsr != mxcsr_set ) {
0711             fprintf(stderr,"sse_test ERROR: MXCSR mismatch (after SSE_EXC): expected 0x%08"PRIx32", got 0x%08"PRIx32"\n", mxcsr_set, mxcsr);
0712             errs++;
0713         }
0714     }
0715 
0716 
0717     if ( (sse_tests & SSE_TEST_IRQ_EXC) ) {
0718         memset( &irqd, 0, sizeof(irqd) );
0719         irqd.name   = SSE_TEST_IRQ;
0720         irqd.hdl    = (void*)sse_test_ehdl;
0721         irqd.handle = 0;
0722 
0723         if ( ! BSP_install_rtems_irq_handler( &irqd ) ) {
0724             fprintf(stderr, "sse_test ERROR: Unable to install ISR\n");
0725             errs++;
0726             goto bail;
0727         }
0728 
0729         /* Test if FP/SSE context is saved/restored across an interrupt */
0730         if ( (st = exc_raise(IRQ_EXC)) ) {
0731             prstat(st, "IRQ");
0732             errs++;
0733         }
0734 
0735         if ( ! BSP_remove_rtems_irq_handler( &irqd ) ) {
0736             fprintf(stderr, "sse_test ERROR: Unable to uninstall ISR\n");
0737         }
0738 
0739         /* Test modified FPCW/MXCSR */
0740         asm volatile("fstcw   %0":"=m"(fp_cw));
0741         asm volatile("stmxcsr %0":"=m"(mxcsr));
0742         mxcsr &= ~(MXCSR_ALLE);
0743         if ( fp_cw != fp_cw_set ) {
0744             fprintf(stderr,"sse_test ERROR: FPCW mismatch (after IRQ): expected 0x%04"PRIx16", got 0x%04"PRIx16"\n", fp_cw_set, fp_cw);
0745             errs++;
0746         }
0747         if ( mxcsr != mxcsr_set ) {
0748             fprintf(stderr,"sse_test ERROR: MXCSR mismatch (after IRQ): expected 0x%08"PRIx32", got 0x%08"PRIx32"\n", mxcsr_set, mxcsr);
0749             errs++;
0750         }
0751     }
0752 
0753 
0754 bail:
0755     /* Wait for console to calm down... */
0756     rtems_task_wake_after(5);
0757     fprintf(stderr,"SSE/FPU Test %s (%u errors)\n", errs ? "FAILED":"PASSED", errs);
0758     if ( sse_test_ohdl ) {
0759         _currentExcHandler = sse_test_ohdl;
0760         sse_test_ohdl      = 0;
0761     }
0762     if ( sse_test_sync )
0763         rtems_semaphore_delete( sse_test_sync );
0764     sse_test_sync = 0;
0765     if ( hp_task )
0766         rtems_task_delete( hp_task );
0767 
0768     if ( ! (flags & SSE_TEST_NO_DEL) )
0769         rtems_task_exit();
0770 }
0771 
0772 static void
0773 sse_test_ehdl(CPU_Exception_frame *p_f)
0774 {
0775 int i,j,start = 0;
0776 int mismatch;
0777 __vf    f4;
0778 
0779     if ( p_f ) {
0780         printk("Got exception #%u\n",        p_f->idtIndex);
0781         printk("EIP: 0x%08x, ESP: 0x%08x\n", p_f->eip, p_f->esp0);
0782         printk("TID: 0x%08x\n",              _Thread_Executing->Object.id);
0783 
0784         if ( ! p_f->fp_ctxt ) {
0785             printk("ERROR: NO FP/SSE CONTEXT ATTACHED ??\n");
0786             sse_test_ohdl(p_f);
0787         }
0788         if ( 16 == p_f->idtIndex ) {
0789             printk("Resetting FP status (0x%04"PRIx16")\n", p_f->fp_ctxt->fsw);
0790             p_f->fp_ctxt->fsw = 0;
0791         } else if ( 19 == p_f->idtIndex ) {
0792             start = 1;
0793             memcpy(&f4, p_f->fp_ctxt->xmmregs[0], sizeof(f4));
0794             f4 = -f4;
0795             memcpy(p_f->fp_ctxt->xmmregs[0], &f4, sizeof(f4));
0796             p_f->fp_ctxt->mxcsr &= ~MXCSR_ALLE;
0797         } else {
0798             printk("(skipping non-FP exception)\n");
0799             sse_test_ohdl(p_f);
0800         }
0801 
0802         printk("Checking XMM regs -- ");
0803         for ( mismatch=0, i=start; i<8; i++ ) {
0804             for ( j=0; j<16; j++ ) {
0805                 if ( p_f->fp_ctxt->xmmregs[i][j] != ((i<<4) | j) )
0806                     mismatch++;
0807             }
0808         }
0809         if ( mismatch ) {
0810             printk("%u mismatches; dump:\n", mismatch);
0811             for ( i=0; i<8; i++ ) {
0812                 for ( j=0; j<16; j++ ) {
0813                     printk("0x%02x ", p_f->fp_ctxt->xmmregs[i][j]);
0814                 }
0815                 printk("\n");
0816             }
0817         } else {
0818             printk("OK\n");
0819         }
0820     } else {
0821         printk("IRQ %u\n", SSE_TEST_IRQ);
0822     }
0823     printk("Clobbering FPU/SSE state\n");
0824     asm volatile("finit");
0825     sse_clobber(0xdeadbeef);
0826     printk("Notifying task\n");
0827     rtems_semaphore_release( sse_test_sync );   
0828 }
0829 
0830 #else
0831 
0832 /* Code using signals for testing under linux; unfortunately, 32-bit
0833  * linux seems to pass no SSE context info to the sigaction...
0834  */
0835 
0836 #include <signal.h>
0837 #include <ucontext.h>
0838 
0839 #define MKCASE(X) case FPE_##X: msg="FPE_"#X; break;
0840 
0841 #define CLRXMM(i) __asm__ volatile("pxor %%xmm"#i", %%xmm"#i:::"xmm"#i)
0842 
0843 static void
0844 fpe_act(int signum, siginfo_t *p_info, void *arg3)
0845 {
0846 ucontext_t *p_ctxt = arg3;
0847 const char *msg    = "FPE_UNKNOWN";
0848 uint16_t   *p_fst;
0849 
0850     if ( SIGFPE != signum ) {
0851         fprintf(stderr,"WARNING: fpe_act handles SIGFPE\n");
0852         return;
0853     }
0854     switch ( p_info->si_code ) {
0855         default:
0856             fprintf(stderr,"WARNING: fpe_act got unkown code %u\n", p_info->si_code);
0857             return;
0858         MKCASE(INTDIV);
0859         MKCASE(INTOVF);
0860         MKCASE(FLTDIV);
0861         MKCASE(FLTOVF);
0862         MKCASE(FLTUND);
0863         MKCASE(FLTRES);
0864         MKCASE(FLTINV);
0865         MKCASE(FLTSUB);
0866     }
0867     fprintf(stderr,"Got SIGFPE (%s) @%p\n", msg, p_info->si_addr);
0868 #ifdef __linux__
0869     fprintf(stderr,"Resetting FP status 0x%02lx\n", p_ctxt->uc_mcontext.fpregs->sw);
0870     p_ctxt->uc_mcontext.fpregs->sw = 0;
0871 #ifdef TEST_MISMATCH
0872     fp_st1((void*)&p_ctxt->uc_mcontext.fpregs->_st[3],2.345);
0873 #endif
0874 #endif
0875 
0876     /* Clear FPU; if context is properly saved/restored around exception
0877      * then this shouldn't disturb the register contents of the interrupted
0878      * task/process.
0879      */
0880     asm volatile("finit");
0881     sse_clobber(0xdeadbeef);
0882 }
0883 
0884 static void
0885 test(void)
0886 {
0887 Context_Control_sse ctxt;
0888 
0889     stor_ctxt(&ctxt);
0890     printf("FPCW: 0x%"PRIx16"\nFPSW: 0x%"PRIx16"\n", ctxt.fcw, ctxt.fsw);
0891     printf("FTAG: 0x%"PRIx8"\n",ctxt.ftw);
0892 }
0893 
0894 int
0895 main(int argc, char **argv)
0896 {
0897 struct sigaction a1, a2;
0898 uint32_t         mxcsr;
0899 
0900     memset(&a1, 0, sizeof(a1));
0901 
0902     a1.sa_sigaction = fpe_act;
0903     a1.sa_flags     = SA_SIGINFO;   
0904 
0905     if ( sigaction(SIGFPE, &a1, &a2) ) {
0906         perror("sigaction");
0907         return 1;
0908     }
0909 
0910     asm volatile("stmxcsr %0":"=m"(mxcsr));
0911     printf("MXCSR: 0x%08"PRIx32"\n", mxcsr);
0912 
0913     test();
0914     exc_raise(0);
0915     return 0;
0916 }
0917 #endif
0918 
0919 /* Helpers to access CR4 and MXCSR */
0920 
0921 uint32_t
0922 mfcr4()
0923 {
0924 uint32_t rval;
0925     asm volatile("mov %%cr4, %0":"=r"(rval));
0926     return rval;
0927 }
0928 
0929 void
0930 mtcr4(uint32_t rval)
0931 {
0932     asm volatile("mov %0, %%cr4"::"r"(rval));
0933 }
0934 
0935 uint32_t
0936 mfmxcsr()
0937 {
0938 uint32_t rval;
0939     asm volatile("stmxcsr %0":"=m"(rval));
0940     return rval;
0941 }
0942 
0943 void
0944 mtmxcsr(uint32_t rval)
0945 {
0946     asm volatile("ldmxcsr %0"::"m"(rval));
0947 }
0948 
0949 
0950 float
0951 sseraise()
0952 {
0953 __vf f4={-2., -2., -2. -2.};
0954 float f;
0955      f4 = __builtin_ia32_sqrtps( f4 );
0956     memcpy(&f,&f4,sizeof(f));
0957     return f;
0958 }