shared/fpsp/sacos.S

0001 #include "fpsp-namespace.h"
0002 //
0003 //
0004 //  sacos.sa 3.3 12/19/90
0005 //
0006 //  Description: The entry point sAcos computes the inverse cosine of
0007 //      an input argument; sAcosd does the same except for denormalized
0008 //      input.
0009 //
0010 //  Input: Double-extended number X in location pointed to
0011 //      by address register a0.
0012 //
0013 //  Output: The value arccos(X) returned in floating-point register Fp0.
0014 //
0015 //  Accuracy and Monotonicity: The returned result is within 3 ulps in
0016 //      64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
0017 //      result is subsequently rounded to double precision. The
0018 //      result is provably monotonic in double precision.
0019 //
0020 //  Speed: The program sCOS takes approximately 310 cycles.
0021 //
0022 //  Algorithm:
0023 //
0024 //  ACOS
0025 //  1. If |X| >= 1, go to 3.
0026 //
0027 //  2. (|X| < 1) Calculate acos(X) by
0028 //      z := (1-X) / (1+X)
0029 //      acos(X) = 2 * atan( sqrt(z) ).
0030 //      Exit.
0031 //
0032 //  3. If |X| > 1, go to 5.
0033 //
0034 //  4. (|X| = 1) If X > 0, return 0. Otherwise, return Pi. Exit.
0035 //
0036 //  5. (|X| > 1) Generate an invalid operation by 0 * infinity.
0037 //      Exit.
0038 //
0039
0040 //      Copyright (C) Motorola, Inc. 1990
0041 //          All Rights Reserved
0042 //
0043 //  THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
0044 //  The copyright notice above does not evidence any
0045 //  actual or intended publication of such source code.
0046
0047 //SACOS idnt    2,1 | Motorola 040 Floating Point Software Package
0048
0049     |section    8
0050
0051 PI: .long 0x40000000,0xC90FDAA2,0x2168C235,0x00000000
0052 PIBY2:  .long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
0053
0054     |xref   t_operr
0055     |xref   t_frcinx
0056     |xref   satan
0057
0058     .global sacosd
0059 sacosd:
0060 //--ACOS(X) = PI/2 FOR DENORMALIZED X
0061     fmovel      %d1,%fpcr       // ...load user's rounding mode/precision
0062     fmovex      PIBY2,%fp0
0063     bra     t_frcinx
0064
0065     .global sacos
0066 sacos:
0067     fmovex      (%a0),%fp0  // ...LOAD INPUT
0068
0069     movel       (%a0),%d0       // ...pack exponent with upper 16 fraction
0070     movew       4(%a0),%d0
0071     andil       #0x7FFFFFFF,%d0
0072     cmpil       #0x3FFF8000,%d0
0073     bges        ACOSBIG
0074
0075 //--THIS IS THE USUAL CASE, |X| < 1
0076 //--ACOS(X) = 2 * ATAN( SQRT( (1-X)/(1+X) ) )
0077
0078     fmoves      #0x3F800000,%fp1
0079     faddx       %fp0,%fp1       // ...1+X
0080     fnegx       %fp0        // ... -X
0081     fadds       #0x3F800000,%fp0    // ...1-X
0082     fdivx       %fp1,%fp0       // ...(1-X)/(1+X)
0083     fsqrtx      %fp0        // ...SQRT((1-X)/(1+X))
0084     fmovemx %fp0-%fp0,(%a0) // ...overwrite input
0085     movel       %d1,-(%sp)  //save original users fpcr
0086     clrl        %d1
0087     bsr     satan       // ...ATAN(SQRT([1-X]/[1+X]))
0088     fmovel      (%sp)+,%fpcr    //restore users exceptions
0089     faddx       %fp0,%fp0       // ...2 * ATAN( STUFF )
0090     bra     t_frcinx
0091
0092 ACOSBIG:
0093     fabsx       %fp0
0094     fcmps       #0x3F800000,%fp0
0095     fbgt        t_operr     //cause an operr exception
0096
0097 //--|X| = 1, ACOS(X) = 0 OR PI
0098     movel       (%a0),%d0       // ...pack exponent with upper 16 fraction
0099     movew       4(%a0),%d0
0100     cmpl        #0,%d0      //D0 has original exponent+fraction
0101     bgts        ACOSP1
0102
0103 //--X = -1
0104 //Returns PI and inexact exception
0105     fmovex      PI,%fp0
0106     fmovel      %d1,%FPCR
0107     fadds       #0x00800000,%fp0    //cause an inexact exception to be put
0108 //                  ;into the 040 - will not trap until next
0109 //                  ;fp inst.
0110     bra     t_frcinx
0111
0112 ACOSP1:
0113     fmovel      %d1,%FPCR
0114     fmoves      #0x00000000,%fp0
0115     rts             //Facos ; of +1 is exact
0116
0117     |end