File indexing completed on 2025-05-11 08:24:24
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056
0057
0058
0059
0060
0061
0062
0063
0064
0065
0066
0067
0068
0069
0070
0071
0072
0073
0074
0075
0076
0077
0078
0079
0080
0081
0082
0083
0084
0085
0086
0087
0088
0089
0090
0091
0092
0093
0094
0095
0096
0097
0098
0099
0100 #ifdef HAVE_CONFIG_H
0101 #include "config.h"
0102 #endif
0103
0104 #ifdef __rtems__
0105 #include <rtems.h>
0106 #include <rtems/score/cpu.h>
0107 #include <rtems/irq.h>
0108 #include <rtems/error.h>
0109 #endif
0110
0111 #include <inttypes.h>
0112 #include <stdio.h>
0113 #include <stdlib.h>
0114 #include <string.h>
0115 #include <math.h>
0116
0117
0118 #define SSE_TEST_IRQ 10
0119
0120 typedef uint8_t __v8 __attribute__((vector_size(16)));
0121 typedef uint32_t __v32 __attribute__((vector_size(16)));
0122 typedef float __vf __attribute__((vector_size(16)));
0123
0124 #ifndef __rtems__
0125
0126 typedef struct Context_Control_sse {
0127 uint16_t fcw;
0128 uint16_t fsw;
0129 uint8_t ftw;
0130 uint8_t res_1;
0131 uint16_t fop;
0132 uint32_t fpu_ip;
0133 uint16_t cs;
0134 uint16_t res_2;
0135 uint32_t fpu_dp;
0136 uint16_t ds;
0137 uint16_t res_3;
0138 uint32_t mxcsr;
0139 uint32_t mxcsr_mask;
0140 struct {
0141 uint8_t fpreg[10];
0142 uint8_t res_4[ 6];
0143 } fp_mmregs[8];
0144 uint8_t xmmregs[8][16];
0145 uint8_t res_5[224];
0146 } Context_Control_sse
0147 __attribute__((aligned(16)))
0148 ;
0149 #endif
0150
0151 #define MXCSR_FZ (1<<15)
0152 #define MXCSR_RC(x) (((x)&3)<<13)
0153 #define MXCSR_PM (1<<12)
0154 #define MXCSR_UM (1<<11)
0155 #define MXCSR_OM (1<<10)
0156 #define MXCSR_ZM (1<< 9)
0157 #define MXCSR_DM (1<< 8)
0158 #define MXCSR_IM (1<< 7)
0159 #define MXCSR_DAZ (1<< 6)
0160 #define MXCSR_PE (1<< 5)
0161 #define MXCSR_UE (1<< 4)
0162 #define MXCSR_OE (1<< 3)
0163 #define MXCSR_ZE (1<< 2)
0164 #define MXCSR_DE (1<< 1)
0165 #define MXCSR_IE (1<< 0)
0166
0167 #define MXCSR_ALLM (MXCSR_PM | MXCSR_UM | MXCSR_OM | MXCSR_ZM | MXCSR_DM | MXCSR_IM)
0168 #define MXCSR_ALLE (MXCSR_PE | MXCSR_UE | MXCSR_OE | MXCSR_ZE | MXCSR_DE | MXCSR_IE)
0169
0170 #define FPSR_B (1<<15)
0171 #define FPSR_C3 (1<<14)
0172 #define FPSR_TOP(x) (((x)&7)<<11)
0173 #define FPSR_C2 (1<<10)
0174 #define FPSR_C1 (1<< 9)
0175 #define FPSR_C0 (1<< 8)
0176 #define FPSR_ES (1<< 7)
0177 #define FPSR_SF (1<< 6)
0178 #define FPSR_PE (1<< 5)
0179 #define FPSR_UE (1<< 4)
0180 #define FPSR_OE (1<< 3)
0181 #define FPSR_ZE (1<< 2)
0182 #define FPSR_DE (1<< 1)
0183 #define FPSR_IE (1<< 0)
0184
0185 #define FPCW_X (1<<12)
0186 #define FPCW_RC(x) (((x)&3)<<10)
0187 #define FPCW_PC(x) (((x)&3)<< 8)
0188 #define FPCW_PM (1<< 5)
0189 #define FPCW_UM (1<< 4)
0190 #define FPCW_OM (1<< 3)
0191 #define FPCW_ZM (1<< 2)
0192 #define FPCW_DM (1<< 1)
0193 #define FPCW_IM (1<< 0)
0194
0195 #define FPCW_ALLM (FPCW_PM | FPCW_UM | FPCW_OM | FPCW_ZM | FPCW_DM | FPCW_IM)
0196 #define FPSR_ALLE (FPSR_ES | FPSR_SF | FPSR_PE | FPSR_UE | FPSR_OE | FPSR_ZE | FPSR_DE | FPSR_IE)
0197
0198
0199 void
0200 fp_st1(uint8_t (*p_dst)[10], double v)
0201 {
0202 asm volatile("fstpt %0":"=m"(*p_dst):"t"(v):"st");
0203 }
0204
0205
0206 void
0207 fp_st(Context_Control_sse *p_ctxt, int i, double v)
0208 {
0209 fp_st1(&p_ctxt->fp_mmregs[i].fpreg,v);
0210 }
0211
0212
0213 double
0214 fp_ld1(uint8_t (*p_src)[10])
0215 {
0216 double v;
0217
0218 asm volatile("fldt %1":"=t"(v):"m"((*p_src)[0]),"m"(*p_src));
0219 return v;
0220 }
0221
0222
0223 double
0224 fp_ld(Context_Control_sse *p_ctxt, int i)
0225 {
0226 return fp_ld1(&p_ctxt->fp_mmregs[i].fpreg);
0227 }
0228
0229 #define FPUCLOBBER \
0230 "st","st(1)","st(2)","st(3)", \
0231 "st(4)","st(5)","st(6)","st(7)",\
0232 "fpsr","memory"
0233
0234
0235
0236 #define SSECLOBBER \
0237 "xmm0","xmm1","xmm2","xmm3", \
0238 "xmm4","xmm5","xmm6","xmm7"
0239
0240 static void
0241 sse_clobber(uint32_t x)
0242 {
0243 __v32 v = { x, x, x, x };
0244 asm volatile (
0245 " movdqa %0, %%xmm0 \n"
0246 " movdqa %%xmm0, %%xmm1 \n"
0247 " movdqa %%xmm0, %%xmm2 \n"
0248 " movdqa %%xmm0, %%xmm3 \n"
0249 " movdqa %%xmm0, %%xmm4 \n"
0250 " movdqa %%xmm0, %%xmm5 \n"
0251 " movdqa %%xmm0, %%xmm6 \n"
0252 " movdqa %%xmm0, %%xmm7 \n"
0253 :
0254 :"m"(v)
0255 :SSECLOBBER
0256 );
0257 }
0258
0259 void
0260 all_clobber(uint32_t v1, uint32_t v2);
0261
0262 __asm__ (
0263 "all_clobber: \n"
0264 " finit \n"
0265 " movq 0(%esp), %xmm0 \n"
0266 " punpcklqdq %xmm0, %xmm0 \n"
0267 " movdqa %xmm0, %xmm1 \n"
0268 " movdqa %xmm0, %xmm2 \n"
0269 " movdqa %xmm0, %xmm3 \n"
0270 " movdqa %xmm0, %xmm4 \n"
0271 " movdqa %xmm0, %xmm5 \n"
0272 " movdqa %xmm0, %xmm6 \n"
0273 " movdqa %xmm0, %xmm7 \n"
0274 " ret \n"
0275 );
0276
0277
0278
0279 void
0280 init_ctxt(Context_Control_sse *p_ctxt);
0281
0282 __asm__ (
0283 "init_ctxt: \n"
0284 " finit \n"
0285 " mov 4(%esp), %eax\n"
0286 " fxsave (%eax) \n"
0287 " fwait \n"
0288 " ret \n"
0289 );
0290
0291
0292
0293 static void
0294 stor_ctxt(Context_Control_sse *p_ctxt)
0295 {
0296 memset(p_ctxt, 0, sizeof(*p_ctxt));
0297 asm volatile(
0298
0299 " fxsave %0 \n"
0300 " fwait \n"
0301 : "=m"(*p_ctxt)
0302 :
0303 : FPUCLOBBER
0304 );
0305 }
0306
0307 #define H08 "0x%02"PRIx8
0308 #define H16 "0x%04"PRIx16
0309 #define H32 "0x%08"PRIx32
0310
0311 #define F16 "mismatch ("H16" != "H16")\n"
0312
0313 #define FLDCMP(fld, fmt) \
0314 if ( a->fld != b->fld ) { \
0315 rval = 1; \
0316 if ( !quiet ) \
0317 fprintf(stderr,#fld" mismatch ("fmt" != "fmt")\n",a->fld, b->fld); \
0318 }
0319
0320 #define FLTCMP(i) \
0321 do { \
0322 if ( ( (a->ftw ^ b->ftw) & (1<<i)) \
0323 || ( (a->ftw & b->ftw & (1<<i)) && \
0324 memcmp(a->fp_mmregs[i].fpreg, \
0325 b->fp_mmregs[i].fpreg, \
0326 sizeof(a->fp_mmregs[i].fpreg)) \
0327 ) \
0328 ) { \
0329 rval = 1; \
0330 if ( !quiet ) { \
0331 double fa = fp_ld(a, i); \
0332 double fb = fp_ld(b, i); \
0333 if ( ((a->ftw ^ b->ftw) & (1<<i)) ) \
0334 fprintf(stderr,"fpreg[%u] TAG mismatch (%u != %u)\n",i,(a->ftw & (1<<i)) ? 1 : 0,(b->ftw & (1<<i)) ? 1 : 0); \
0335 else \
0336 fprintf(stderr,"fpreg[%u] mismatch (%g != %g)\n",i,fa,fb); \
0337 } \
0338 } \
0339 } while (0)
0340
0341 #define XMMCMP(i) \
0342 do { \
0343 if ( memcmp(&a->xmmregs[i], \
0344 &b->xmmregs[i], \
0345 sizeof(a->xmmregs[i])) \
0346 ) { \
0347 rval = 1; \
0348 if ( !quiet ) { \
0349 int _jj; \
0350 fprintf(stderr,"xmmreg[%u] mismatch:\n", i); \
0351 fprintf(stderr," "); \
0352 for (_jj=0; _jj<16; _jj++) \
0353 fprintf(stderr,"%02x ",a->xmmregs[i][_jj]); \
0354 fprintf(stderr,"\n !=\n"); \
0355 fprintf(stderr," "); \
0356 for (_jj=0; _jj<16; _jj++) \
0357 fprintf(stderr,"%02x ",b->xmmregs[i][_jj]); \
0358 fprintf(stderr,"\n"); \
0359 } \
0360 } \
0361 } while (0)
0362
0363
0364
0365
0366
0367 static int
0368 cmp_ctxt(Context_Control_sse *a, Context_Control_sse *b, int quiet)
0369 {
0370 int rval = 0;
0371 int i;
0372 FLDCMP(fcw,H16);
0373 FLDCMP(fsw,H16);
0374 FLDCMP(ftw,H08);
0375 FLDCMP(fop,H16);
0376 FLDCMP(fpu_ip,H32);
0377 FLDCMP(cs,H16);
0378 FLDCMP(fpu_dp,H32);
0379 FLDCMP(ds,H16);
0380 FLDCMP(mxcsr,H32);
0381 FLDCMP(mxcsr_mask,H32);
0382 for ( i=0; i<8; i++ ) {
0383 FLTCMP(i);
0384 }
0385 for ( i=0; i<8; i++ ) {
0386 XMMCMP(i);
0387 }
0388 return rval;
0389 }
0390
0391
0392
0393 #define FP_EXC 0
0394 #define IRQ_EXC 1
0395 #define SSE_EXC -1
0396
0397
0398
0399
0400
0401
0402 #define __INTRAISE(x) " int $32+"#x" \n"
0403 #define INTRAISE(x) __INTRAISE(x)
0404
0405 __asm__ (
0406 "do_raise: \n"
0407 " fwait \n"
0408 " test %eax, %eax \n"
0409 " je 2f \n"
0410 " jl 1f \n"
0411 INTRAISE(SSE_TEST_IRQ)
0412 " jmp 2f \n"
0413 "1: sqrtps %xmm0, %xmm0 \n"
0414 "2: \n"
0415 " ret \n"
0416 );
0417
0418 #define SSE_TEST_HP_FAILED 1
0419 #define SSE_TEST_FSPR_FAILED 2
0420 #define SSE_TEST_CTXTCMP_FAILED 4
0421
0422 static const char *fail_msgs[] = {
0423 "Seems that HP task was not executing",
0424 "FPSR 'Invalid-operation' flag should be clear",
0425 "Restored context does NOT match the saved one",
0426 };
0427
0428 static void prstat(int st, const char *where)
0429 {
0430 int i,msk;
0431 for ( i=0, msk=1; i<sizeof(fail_msgs)/sizeof(fail_msgs[0]); i++, msk<<=1 ) {
0432 if ( (st & msk) ) {
0433 fprintf(stderr,"sse_test ERROR: %s (testing: %s)\n", fail_msgs[i], where);
0434 }
0435 }
0436 }
0437
0438 int sse_test_debug = 0;
0439
0440 static int
0441 exc_raise(int kind)
0442 {
0443 Context_Control_sse nctxt;
0444 Context_Control_sse octxt;
0445 Context_Control_sse orig_ctxt;
0446 int i,j,rval;
0447 double s2;
0448 uint16_t fsw;
0449 __vf f4 = { -1., -2., -3., -4. };
0450 __vf tmp;
0451 __v32 sgn = { (1<<31), (1<<31), (1<<31), (1<<31) };
0452
0453 stor_ctxt(&octxt);
0454
0455 octxt.fsw &= ~FPSR_ALLE;
0456 octxt.mxcsr &= ~MXCSR_ALLE;
0457
0458 for ( i=0; i<8; i++ ) {
0459 fp_st(&octxt, i, (double)i+0.1);
0460 for (j=0; j<16; j++) {
0461 octxt.xmmregs[i][j]=(i<<4)+j;
0462 }
0463 }
0464
0465
0466 if ( SSE_EXC == kind ) {
0467 memcpy(octxt.xmmregs[0], &f4, sizeof(f4));
0468 octxt.mxcsr &= ~MXCSR_IM;
0469 }
0470
0471
0472 octxt.ftw = 0xff;
0473
0474
0475 octxt.fcw &= ~ ( FPCW_IM );
0476
0477 if ( FP_EXC == kind ) {
0478 octxt.fsw |= ( FPSR_IE | FPSR_ES );
0479 }
0480
0481 if ( sse_test_debug )
0482 printk("RAISE (fsw was 0x%04x)\n", orig_ctxt.fsw);
0483 asm volatile(
0484 " fxsave %2 \n"
0485 #ifdef __rtems__
0486 " movl %4, sse_test_check\n"
0487 #endif
0488 " fxrstor %3 \n"
0489 " call do_raise \n"
0490 #ifdef __rtems__
0491 " movl sse_test_check, %1\n"
0492 #else
0493 " movl $0, %1 \n"
0494 #endif
0495 #ifdef TEST_MISMATCH
0496 " pxor %%xmm0, %%xmm0 \n"
0497 #endif
0498 " fxsave %0 \n"
0499 " fxrstor %2 \n"
0500 : "=m"(nctxt),"=&r"(rval),"=m"(orig_ctxt)
0501 : "m"(octxt), "i"(SSE_TEST_HP_FAILED),"a"(kind)
0502 : "xmm0"
0503 );
0504
0505 if ( ( FPSR_IE & nctxt.fsw ) ) {
0506 rval |= SSE_TEST_FSPR_FAILED;
0507 }
0508 if ( FP_EXC == kind )
0509 nctxt.fsw |= (FPSR_IE | FPSR_ES);
0510 else if ( SSE_EXC == kind ) {
0511 tmp = __builtin_ia32_sqrtps( (__vf)(~sgn & (__v32)f4) );
0512
0513 nctxt.mxcsr &= ~MXCSR_PE;
0514 memcpy( octxt.xmmregs[0], &tmp, sizeof(tmp) );
0515 }
0516
0517 if ( cmp_ctxt(&nctxt, &octxt, 0) ) {
0518 rval |= SSE_TEST_CTXTCMP_FAILED;
0519 }
0520
0521 s2 = sqrt(2.0);
0522
0523 asm volatile("fstsw %0":"=m"(fsw));
0524
0525 if ( sse_test_debug )
0526 printf("sqrt(2): %f (FSTW: 0x%02"PRIx16")\n", sqrt(2.0), fsw);
0527
0528 return rval;
0529 }
0530
0531 #ifdef __rtems__
0532 static void
0533 sse_test_ehdl(CPU_Exception_frame *p_f);
0534
0535 rtems_id sse_test_sync = 0;
0536 cpuExcHandlerType sse_test_ohdl = 0;
0537
0538 CPU_Exception_frame *sse_test_frame = 0;
0539 volatile int sse_test_check = SSE_TEST_HP_FAILED;
0540 unsigned sse_tests = 0;
0541
0542 rtems_task
0543 sse_test_hp_task(rtems_task_argument arg)
0544 {
0545 rtems_id sync = (rtems_id)arg;
0546
0547 uint16_t fp_cw;
0548 uint32_t mxcsr;
0549 rtems_status_code sc;
0550 const char * msgs[] = {"FPU_EXC", "SSE_EXC", "IRQ_EXC"};
0551 int i;
0552
0553
0554 asm volatile("fstcw %0":"=m"(fp_cw));
0555 if ( fp_cw != _CPU_Null_fp_context.fpucw ) {
0556 fprintf(
0557 stderr,
0558 "ERROR: FPU CW initialization mismatch: got 0x%04"PRIx16"; expected 0x%04"PRIx16"\n",
0559 fp_cw,
0560 _CPU_Null_fp_context.fpucw
0561 );
0562 }
0563
0564
0565 asm volatile("stmxcsr %0":"=m"(mxcsr));
0566 if ( mxcsr != _CPU_Null_fp_context.mxcsr ) {
0567 fprintf(
0568 stderr,
0569 "ERROR: MXCSR initialization mismatch: got 0x%08"PRIx32"; expected 0x%08"PRIx32"\n",
0570 mxcsr,
0571 _CPU_Null_fp_context.mxcsr
0572 );
0573 }
0574
0575
0576 for (i=0; i<sizeof(msgs)/sizeof(msgs[0]); i++ ) {
0577 if ( ( sse_tests & (1<<i) ) ) {
0578 if ( sse_test_debug )
0579 printk("HP task will now block for %s\n",msgs[i]);
0580
0581
0582 sc = rtems_semaphore_obtain(sync, RTEMS_WAIT, 500);
0583
0584 all_clobber(0xaffeaffe, 0xcafecafe);
0585
0586 if ( RTEMS_SUCCESSFUL != sc ) {
0587 rtems_error(sc,"ERROR: sse_test hp task wasn't notified of exception\n");
0588 goto bail;
0589 }
0590
0591
0592 sse_test_check = 0;
0593 }
0594 }
0595
0596 bail:
0597 rtems_task_suspend(RTEMS_SELF);
0598 }
0599
0600
0601 #define SSE_TEST_FPU_EXC (1<<0)
0602 #define SSE_TEST_SSE_EXC (1<<1)
0603 #define SSE_TEST_IRQ_EXC (1<<2)
0604
0605 #define SSE_TEST_ALL 7
0606
0607
0608
0609
0610
0611 #define SSE_TEST_NO_DEL (1<<0)
0612
0613
0614 rtems_task
0615 sse_test_lp_task(rtems_task_argument arg)
0616 {
0617 rtems_id hp_task = 0;
0618 rtems_status_code sc;
0619 rtems_task_priority pri;
0620 uint16_t fp_cw,fp_cw_set;
0621 uint32_t mxcsr, mxcsr_set;
0622 rtems_irq_connect_data irqd;
0623 int flags = (int)arg;
0624 int st;
0625 int errs = 0;
0626
0627 sse_tests = SSE_TEST_ALL & ~(flags>>1);
0628
0629 sse_test_ohdl = 0;
0630
0631 fp_cw_set = _CPU_Null_fp_context.fpucw | FPCW_RC(3) ;
0632 mxcsr_set = _CPU_Null_fp_context.mxcsr | MXCSR_RC(3) ;
0633 asm volatile("ldmxcsr %0"::"m"(mxcsr_set));
0634 asm volatile("fldcw %0"::"m"(fp_cw_set));
0635
0636 sc = rtems_semaphore_create(
0637 rtems_build_name('s','s','e','S'),
0638 0,
0639 RTEMS_SIMPLE_BINARY_SEMAPHORE,
0640 0,
0641 &sse_test_sync
0642 );
0643 if ( RTEMS_SUCCESSFUL != sc ) {
0644 rtems_error(sc, "sse_test ERROR: creation of 'sync' semaphore failed");
0645 errs++;
0646 goto bail;
0647 }
0648
0649 rtems_task_set_priority( RTEMS_SELF, RTEMS_CURRENT_PRIORITY, &pri );
0650
0651 sc = rtems_task_create(
0652 rtems_build_name('s','s','e','H'),
0653 pri - 2,
0654 20000,
0655 RTEMS_DEFAULT_MODES,
0656 RTEMS_FLOATING_POINT,
0657 &hp_task
0658 );
0659 if ( RTEMS_SUCCESSFUL != sc ) {
0660 hp_task = 0;
0661 rtems_error( sc, "sse_test ERROR: creation of high-priority task failed");
0662 errs++;
0663 goto bail;
0664 }
0665
0666 sc = rtems_task_start( hp_task, sse_test_hp_task, (rtems_task_argument)sse_test_sync );
0667 if ( RTEMS_SUCCESSFUL != sc ) {
0668 rtems_error( sc, "sse_test ERROR: start of high-priority task failed");
0669 goto bail;
0670 }
0671
0672
0673 sse_test_ohdl = _currentExcHandler;
0674 _currentExcHandler = sse_test_ehdl;
0675
0676 if ( (sse_tests & SSE_TEST_FPU_EXC) ) {
0677 if ( (st = exc_raise(FP_EXC)) ) {
0678 prstat(st,"FP_EXC");
0679 errs++;
0680 }
0681
0682
0683 asm volatile("fstcw %0":"=m"(fp_cw));
0684 asm volatile("stmxcsr %0":"=m"(mxcsr));
0685 mxcsr &= ~(MXCSR_ALLE);
0686 if ( fp_cw != fp_cw_set ) {
0687 fprintf(stderr,"sse_test ERROR: FPCW mismatch (after FP_EXC): expected 0x%04"PRIx16", got 0x%04"PRIx16"\n", fp_cw_set, fp_cw);
0688 errs++;
0689 }
0690 if ( mxcsr != mxcsr_set ) {
0691 fprintf(stderr,"sse_test ERROR: MXCSR mismatch (after FP_EXC): expected 0x%08"PRIx32", got 0x%08"PRIx32"\n", mxcsr_set, mxcsr);
0692 errs++;
0693 }
0694 }
0695
0696 if ( (sse_tests & SSE_TEST_SSE_EXC) ) {
0697 if ( (st = exc_raise(SSE_EXC)) ) {
0698 prstat(st, "SSE_EXC");
0699 errs++;
0700 }
0701
0702
0703 asm volatile("fstcw %0":"=m"(fp_cw));
0704 asm volatile("stmxcsr %0":"=m"(mxcsr));
0705 mxcsr &= ~(MXCSR_ALLE);
0706 if ( fp_cw != fp_cw_set ) {
0707 fprintf(stderr,"sse_test ERROR: FPCW mismatch (after SSE_EXC): expected 0x%04"PRIx16", got 0x%04"PRIx16"\n", fp_cw_set, fp_cw);
0708 errs++;
0709 }
0710 if ( mxcsr != mxcsr_set ) {
0711 fprintf(stderr,"sse_test ERROR: MXCSR mismatch (after SSE_EXC): expected 0x%08"PRIx32", got 0x%08"PRIx32"\n", mxcsr_set, mxcsr);
0712 errs++;
0713 }
0714 }
0715
0716
0717 if ( (sse_tests & SSE_TEST_IRQ_EXC) ) {
0718 memset( &irqd, 0, sizeof(irqd) );
0719 irqd.name = SSE_TEST_IRQ;
0720 irqd.hdl = (void*)sse_test_ehdl;
0721 irqd.handle = 0;
0722
0723 if ( ! BSP_install_rtems_irq_handler( &irqd ) ) {
0724 fprintf(stderr, "sse_test ERROR: Unable to install ISR\n");
0725 errs++;
0726 goto bail;
0727 }
0728
0729
0730 if ( (st = exc_raise(IRQ_EXC)) ) {
0731 prstat(st, "IRQ");
0732 errs++;
0733 }
0734
0735 if ( ! BSP_remove_rtems_irq_handler( &irqd ) ) {
0736 fprintf(stderr, "sse_test ERROR: Unable to uninstall ISR\n");
0737 }
0738
0739
0740 asm volatile("fstcw %0":"=m"(fp_cw));
0741 asm volatile("stmxcsr %0":"=m"(mxcsr));
0742 mxcsr &= ~(MXCSR_ALLE);
0743 if ( fp_cw != fp_cw_set ) {
0744 fprintf(stderr,"sse_test ERROR: FPCW mismatch (after IRQ): expected 0x%04"PRIx16", got 0x%04"PRIx16"\n", fp_cw_set, fp_cw);
0745 errs++;
0746 }
0747 if ( mxcsr != mxcsr_set ) {
0748 fprintf(stderr,"sse_test ERROR: MXCSR mismatch (after IRQ): expected 0x%08"PRIx32", got 0x%08"PRIx32"\n", mxcsr_set, mxcsr);
0749 errs++;
0750 }
0751 }
0752
0753
0754 bail:
0755
0756 rtems_task_wake_after(5);
0757 fprintf(stderr,"SSE/FPU Test %s (%u errors)\n", errs ? "FAILED":"PASSED", errs);
0758 if ( sse_test_ohdl ) {
0759 _currentExcHandler = sse_test_ohdl;
0760 sse_test_ohdl = 0;
0761 }
0762 if ( sse_test_sync )
0763 rtems_semaphore_delete( sse_test_sync );
0764 sse_test_sync = 0;
0765 if ( hp_task )
0766 rtems_task_delete( hp_task );
0767
0768 if ( ! (flags & SSE_TEST_NO_DEL) )
0769 rtems_task_exit();
0770 }
0771
0772 static void
0773 sse_test_ehdl(CPU_Exception_frame *p_f)
0774 {
0775 int i,j,start = 0;
0776 int mismatch;
0777 __vf f4;
0778
0779 if ( p_f ) {
0780 printk("Got exception #%u\n", p_f->idtIndex);
0781 printk("EIP: 0x%08x, ESP: 0x%08x\n", p_f->eip, p_f->esp0);
0782 printk("TID: 0x%08x\n", _Thread_Executing->Object.id);
0783
0784 if ( ! p_f->fp_ctxt ) {
0785 printk("ERROR: NO FP/SSE CONTEXT ATTACHED ??\n");
0786 sse_test_ohdl(p_f);
0787 }
0788 if ( 16 == p_f->idtIndex ) {
0789 printk("Resetting FP status (0x%04"PRIx16")\n", p_f->fp_ctxt->fsw);
0790 p_f->fp_ctxt->fsw = 0;
0791 } else if ( 19 == p_f->idtIndex ) {
0792 start = 1;
0793 memcpy(&f4, p_f->fp_ctxt->xmmregs[0], sizeof(f4));
0794 f4 = -f4;
0795 memcpy(p_f->fp_ctxt->xmmregs[0], &f4, sizeof(f4));
0796 p_f->fp_ctxt->mxcsr &= ~MXCSR_ALLE;
0797 } else {
0798 printk("(skipping non-FP exception)\n");
0799 sse_test_ohdl(p_f);
0800 }
0801
0802 printk("Checking XMM regs -- ");
0803 for ( mismatch=0, i=start; i<8; i++ ) {
0804 for ( j=0; j<16; j++ ) {
0805 if ( p_f->fp_ctxt->xmmregs[i][j] != ((i<<4) | j) )
0806 mismatch++;
0807 }
0808 }
0809 if ( mismatch ) {
0810 printk("%u mismatches; dump:\n", mismatch);
0811 for ( i=0; i<8; i++ ) {
0812 for ( j=0; j<16; j++ ) {
0813 printk("0x%02x ", p_f->fp_ctxt->xmmregs[i][j]);
0814 }
0815 printk("\n");
0816 }
0817 } else {
0818 printk("OK\n");
0819 }
0820 } else {
0821 printk("IRQ %u\n", SSE_TEST_IRQ);
0822 }
0823 printk("Clobbering FPU/SSE state\n");
0824 asm volatile("finit");
0825 sse_clobber(0xdeadbeef);
0826 printk("Notifying task\n");
0827 rtems_semaphore_release( sse_test_sync );
0828 }
0829
0830 #else
0831
0832
0833
0834
0835
0836 #include <signal.h>
0837 #include <ucontext.h>
0838
0839 #define MKCASE(X) case FPE_##X: msg="FPE_"#X; break;
0840
0841 #define CLRXMM(i) __asm__ volatile("pxor %%xmm"#i", %%xmm"#i:::"xmm"#i)
0842
0843 static void
0844 fpe_act(int signum, siginfo_t *p_info, void *arg3)
0845 {
0846 ucontext_t *p_ctxt = arg3;
0847 const char *msg = "FPE_UNKNOWN";
0848 uint16_t *p_fst;
0849
0850 if ( SIGFPE != signum ) {
0851 fprintf(stderr,"WARNING: fpe_act handles SIGFPE\n");
0852 return;
0853 }
0854 switch ( p_info->si_code ) {
0855 default:
0856 fprintf(stderr,"WARNING: fpe_act got unkown code %u\n", p_info->si_code);
0857 return;
0858 MKCASE(INTDIV);
0859 MKCASE(INTOVF);
0860 MKCASE(FLTDIV);
0861 MKCASE(FLTOVF);
0862 MKCASE(FLTUND);
0863 MKCASE(FLTRES);
0864 MKCASE(FLTINV);
0865 MKCASE(FLTSUB);
0866 }
0867 fprintf(stderr,"Got SIGFPE (%s) @%p\n", msg, p_info->si_addr);
0868 #ifdef __linux__
0869 fprintf(stderr,"Resetting FP status 0x%02lx\n", p_ctxt->uc_mcontext.fpregs->sw);
0870 p_ctxt->uc_mcontext.fpregs->sw = 0;
0871 #ifdef TEST_MISMATCH
0872 fp_st1((void*)&p_ctxt->uc_mcontext.fpregs->_st[3],2.345);
0873 #endif
0874 #endif
0875
0876
0877
0878
0879
0880 asm volatile("finit");
0881 sse_clobber(0xdeadbeef);
0882 }
0883
0884 static void
0885 test(void)
0886 {
0887 Context_Control_sse ctxt;
0888
0889 stor_ctxt(&ctxt);
0890 printf("FPCW: 0x%"PRIx16"\nFPSW: 0x%"PRIx16"\n", ctxt.fcw, ctxt.fsw);
0891 printf("FTAG: 0x%"PRIx8"\n",ctxt.ftw);
0892 }
0893
0894 int
0895 main(int argc, char **argv)
0896 {
0897 struct sigaction a1, a2;
0898 uint32_t mxcsr;
0899
0900 memset(&a1, 0, sizeof(a1));
0901
0902 a1.sa_sigaction = fpe_act;
0903 a1.sa_flags = SA_SIGINFO;
0904
0905 if ( sigaction(SIGFPE, &a1, &a2) ) {
0906 perror("sigaction");
0907 return 1;
0908 }
0909
0910 asm volatile("stmxcsr %0":"=m"(mxcsr));
0911 printf("MXCSR: 0x%08"PRIx32"\n", mxcsr);
0912
0913 test();
0914 exc_raise(0);
0915 return 0;
0916 }
0917 #endif
0918
0919
0920
0921 uint32_t
0922 mfcr4()
0923 {
0924 uint32_t rval;
0925 asm volatile("mov %%cr4, %0":"=r"(rval));
0926 return rval;
0927 }
0928
0929 void
0930 mtcr4(uint32_t rval)
0931 {
0932 asm volatile("mov %0, %%cr4"::"r"(rval));
0933 }
0934
0935 uint32_t
0936 mfmxcsr()
0937 {
0938 uint32_t rval;
0939 asm volatile("stmxcsr %0":"=m"(rval));
0940 return rval;
0941 }
0942
0943 void
0944 mtmxcsr(uint32_t rval)
0945 {
0946 asm volatile("ldmxcsr %0"::"m"(rval));
0947 }
0948
0949
0950 float
0951 sseraise()
0952 {
0953 __vf f4={-2., -2., -2. -2.};
0954 float f;
0955 f4 = __builtin_ia32_sqrtps( f4 );
0956 memcpy(&f,&f4,sizeof(f));
0957 return f;
0958 }