File indexing completed on 2025-05-11 08:23:48
0001 #include "fpsp-namespace.h"
0002 //
0003 //
0004 // satan.sa 3.3 12/19/90
0005 //
0006 // The entry point satan computes the arctangent of an
0007 // input value. satand does the same except the input value is a
0008 // denormalized number.
0009 //
0010 // Input: Double-extended value in memory location pointed to by address
0011 // register a0.
0012 //
0013 // Output: Arctan(X) returned in floating-point register Fp0.
0014 //
0015 // Accuracy and Monotonicity: The returned result is within 2 ulps in
0016 // 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
0017 // result is subsequently rounded to double precision. The
0018 // result is provably monotonic in double precision.
0019 //
0020 // Speed: The program satan takes approximately 160 cycles for input
0021 // argument X such that 1/16 < |X| < 16. For the other arguments,
0022 // the program will run no worse than 10% slower.
0023 //
0024 // Algorithm:
0025 // Step 1. If |X| >= 16 or |X| < 1/16, go to Step 5.
0026 //
0027 // Step 2. Let X = sgn * 2**k * 1.xxxxxxxx...x. Note that k = -4, -3,..., or 3.
0028 // Define F = sgn * 2**k * 1.xxxx1, i.e. the first 5 significant bits
0029 // of X with a bit-1 attached at the 6-th bit position. Define u
0030 // to be u = (X-F) / (1 + X*F).
0031 //
0032 // Step 3. Approximate arctan(u) by a polynomial poly.
0033 //
0034 // Step 4. Return arctan(F) + poly, arctan(F) is fetched from a table of values
0035 // calculated beforehand. Exit.
0036 //
0037 // Step 5. If |X| >= 16, go to Step 7.
0038 //
0039 // Step 6. Approximate arctan(X) by an odd polynomial in X. Exit.
0040 //
0041 // Step 7. Define X' = -1/X. Approximate arctan(X') by an odd polynomial in X'.
0042 // Arctan(X) = sign(X)*Pi/2 + arctan(X'). Exit.
0043 //
0044
0045 // Copyright (C) Motorola, Inc. 1990
0046 // All Rights Reserved
0047 //
0048 // THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
0049 // The copyright notice above does not evidence any
0050 // actual or intended publication of such source code.
0051
0052 //satan idnt 2,1 | Motorola 040 Floating Point Software Package
0053
0054 |section 8
0055
0056 #include "fpsp.defs"
0057
0058 BOUNDS1: .long 0x3FFB8000,0x4002FFFF
0059
0060 ONE: .long 0x3F800000
0061
0062 .long 0x00000000
0063
0064 ATANA3: .long 0xBFF6687E,0x314987D8
0065 ATANA2: .long 0x4002AC69,0x34A26DB3
0066
0067 ATANA1: .long 0xBFC2476F,0x4E1DA28E
0068 ATANB6: .long 0x3FB34444,0x7F876989
0069
0070 ATANB5: .long 0xBFB744EE,0x7FAF45DB
0071 ATANB4: .long 0x3FBC71C6,0x46940220
0072
0073 ATANB3: .long 0xBFC24924,0x921872F9
0074 ATANB2: .long 0x3FC99999,0x99998FA9
0075
0076 ATANB1: .long 0xBFD55555,0x55555555
0077 ATANC5: .long 0xBFB70BF3,0x98539E6A
0078
0079 ATANC4: .long 0x3FBC7187,0x962D1D7D
0080 ATANC3: .long 0xBFC24924,0x827107B8
0081
0082 ATANC2: .long 0x3FC99999,0x9996263E
0083 ATANC1: .long 0xBFD55555,0x55555536
0084
0085 PPIBY2: .long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
0086 NPIBY2: .long 0xBFFF0000,0xC90FDAA2,0x2168C235,0x00000000
0087 PTINY: .long 0x00010000,0x80000000,0x00000000,0x00000000
0088 NTINY: .long 0x80010000,0x80000000,0x00000000,0x00000000
0089
0090 ATANTBL:
0091 .long 0x3FFB0000,0x83D152C5,0x060B7A51,0x00000000
0092 .long 0x3FFB0000,0x8BC85445,0x65498B8B,0x00000000
0093 .long 0x3FFB0000,0x93BE4060,0x17626B0D,0x00000000
0094 .long 0x3FFB0000,0x9BB3078D,0x35AEC202,0x00000000
0095 .long 0x3FFB0000,0xA3A69A52,0x5DDCE7DE,0x00000000
0096 .long 0x3FFB0000,0xAB98E943,0x62765619,0x00000000
0097 .long 0x3FFB0000,0xB389E502,0xF9C59862,0x00000000
0098 .long 0x3FFB0000,0xBB797E43,0x6B09E6FB,0x00000000
0099 .long 0x3FFB0000,0xC367A5C7,0x39E5F446,0x00000000
0100 .long 0x3FFB0000,0xCB544C61,0xCFF7D5C6,0x00000000
0101 .long 0x3FFB0000,0xD33F62F8,0x2488533E,0x00000000
0102 .long 0x3FFB0000,0xDB28DA81,0x62404C77,0x00000000
0103 .long 0x3FFB0000,0xE310A407,0x8AD34F18,0x00000000
0104 .long 0x3FFB0000,0xEAF6B0A8,0x188EE1EB,0x00000000
0105 .long 0x3FFB0000,0xF2DAF194,0x9DBE79D5,0x00000000
0106 .long 0x3FFB0000,0xFABD5813,0x61D47E3E,0x00000000
0107 .long 0x3FFC0000,0x8346AC21,0x0959ECC4,0x00000000
0108 .long 0x3FFC0000,0x8B232A08,0x304282D8,0x00000000
0109 .long 0x3FFC0000,0x92FB70B8,0xD29AE2F9,0x00000000
0110 .long 0x3FFC0000,0x9ACF476F,0x5CCD1CB4,0x00000000
0111 .long 0x3FFC0000,0xA29E7630,0x4954F23F,0x00000000
0112 .long 0x3FFC0000,0xAA68C5D0,0x8AB85230,0x00000000
0113 .long 0x3FFC0000,0xB22DFFFD,0x9D539F83,0x00000000
0114 .long 0x3FFC0000,0xB9EDEF45,0x3E900EA5,0x00000000
0115 .long 0x3FFC0000,0xC1A85F1C,0xC75E3EA5,0x00000000
0116 .long 0x3FFC0000,0xC95D1BE8,0x28138DE6,0x00000000
0117 .long 0x3FFC0000,0xD10BF300,0x840D2DE4,0x00000000
0118 .long 0x3FFC0000,0xD8B4B2BA,0x6BC05E7A,0x00000000
0119 .long 0x3FFC0000,0xE0572A6B,0xB42335F6,0x00000000
0120 .long 0x3FFC0000,0xE7F32A70,0xEA9CAA8F,0x00000000
0121 .long 0x3FFC0000,0xEF888432,0x64ECEFAA,0x00000000
0122 .long 0x3FFC0000,0xF7170A28,0xECC06666,0x00000000
0123 .long 0x3FFD0000,0x812FD288,0x332DAD32,0x00000000
0124 .long 0x3FFD0000,0x88A8D1B1,0x218E4D64,0x00000000
0125 .long 0x3FFD0000,0x9012AB3F,0x23E4AEE8,0x00000000
0126 .long 0x3FFD0000,0x976CC3D4,0x11E7F1B9,0x00000000
0127 .long 0x3FFD0000,0x9EB68949,0x3889A227,0x00000000
0128 .long 0x3FFD0000,0xA5EF72C3,0x4487361B,0x00000000
0129 .long 0x3FFD0000,0xAD1700BA,0xF07A7227,0x00000000
0130 .long 0x3FFD0000,0xB42CBCFA,0xFD37EFB7,0x00000000
0131 .long 0x3FFD0000,0xBB303A94,0x0BA80F89,0x00000000
0132 .long 0x3FFD0000,0xC22115C6,0xFCAEBBAF,0x00000000
0133 .long 0x3FFD0000,0xC8FEF3E6,0x86331221,0x00000000
0134 .long 0x3FFD0000,0xCFC98330,0xB4000C70,0x00000000
0135 .long 0x3FFD0000,0xD6807AA1,0x102C5BF9,0x00000000
0136 .long 0x3FFD0000,0xDD2399BC,0x31252AA3,0x00000000
0137 .long 0x3FFD0000,0xE3B2A855,0x6B8FC517,0x00000000
0138 .long 0x3FFD0000,0xEA2D764F,0x64315989,0x00000000
0139 .long 0x3FFD0000,0xF3BF5BF8,0xBAD1A21D,0x00000000
0140 .long 0x3FFE0000,0x801CE39E,0x0D205C9A,0x00000000
0141 .long 0x3FFE0000,0x8630A2DA,0xDA1ED066,0x00000000
0142 .long 0x3FFE0000,0x8C1AD445,0xF3E09B8C,0x00000000
0143 .long 0x3FFE0000,0x91DB8F16,0x64F350E2,0x00000000
0144 .long 0x3FFE0000,0x97731420,0x365E538C,0x00000000
0145 .long 0x3FFE0000,0x9CE1C8E6,0xA0B8CDBA,0x00000000
0146 .long 0x3FFE0000,0xA22832DB,0xCADAAE09,0x00000000
0147 .long 0x3FFE0000,0xA746F2DD,0xB7602294,0x00000000
0148 .long 0x3FFE0000,0xAC3EC0FB,0x997DD6A2,0x00000000
0149 .long 0x3FFE0000,0xB110688A,0xEBDC6F6A,0x00000000
0150 .long 0x3FFE0000,0xB5BCC490,0x59ECC4B0,0x00000000
0151 .long 0x3FFE0000,0xBA44BC7D,0xD470782F,0x00000000
0152 .long 0x3FFE0000,0xBEA94144,0xFD049AAC,0x00000000
0153 .long 0x3FFE0000,0xC2EB4ABB,0x661628B6,0x00000000
0154 .long 0x3FFE0000,0xC70BD54C,0xE602EE14,0x00000000
0155 .long 0x3FFE0000,0xCD000549,0xADEC7159,0x00000000
0156 .long 0x3FFE0000,0xD48457D2,0xD8EA4EA3,0x00000000
0157 .long 0x3FFE0000,0xDB948DA7,0x12DECE3B,0x00000000
0158 .long 0x3FFE0000,0xE23855F9,0x69E8096A,0x00000000
0159 .long 0x3FFE0000,0xE8771129,0xC4353259,0x00000000
0160 .long 0x3FFE0000,0xEE57C16E,0x0D379C0D,0x00000000
0161 .long 0x3FFE0000,0xF3E10211,0xA87C3779,0x00000000
0162 .long 0x3FFE0000,0xF919039D,0x758B8D41,0x00000000
0163 .long 0x3FFE0000,0xFE058B8F,0x64935FB3,0x00000000
0164 .long 0x3FFF0000,0x8155FB49,0x7B685D04,0x00000000
0165 .long 0x3FFF0000,0x83889E35,0x49D108E1,0x00000000
0166 .long 0x3FFF0000,0x859CFA76,0x511D724B,0x00000000
0167 .long 0x3FFF0000,0x87952ECF,0xFF8131E7,0x00000000
0168 .long 0x3FFF0000,0x89732FD1,0x9557641B,0x00000000
0169 .long 0x3FFF0000,0x8B38CAD1,0x01932A35,0x00000000
0170 .long 0x3FFF0000,0x8CE7A8D8,0x301EE6B5,0x00000000
0171 .long 0x3FFF0000,0x8F46A39E,0x2EAE5281,0x00000000
0172 .long 0x3FFF0000,0x922DA7D7,0x91888487,0x00000000
0173 .long 0x3FFF0000,0x94D19FCB,0xDEDF5241,0x00000000
0174 .long 0x3FFF0000,0x973AB944,0x19D2A08B,0x00000000
0175 .long 0x3FFF0000,0x996FF00E,0x08E10B96,0x00000000
0176 .long 0x3FFF0000,0x9B773F95,0x12321DA7,0x00000000
0177 .long 0x3FFF0000,0x9D55CC32,0x0F935624,0x00000000
0178 .long 0x3FFF0000,0x9F100575,0x006CC571,0x00000000
0179 .long 0x3FFF0000,0xA0A9C290,0xD97CC06C,0x00000000
0180 .long 0x3FFF0000,0xA22659EB,0xEBC0630A,0x00000000
0181 .long 0x3FFF0000,0xA388B4AF,0xF6EF0EC9,0x00000000
0182 .long 0x3FFF0000,0xA4D35F10,0x61D292C4,0x00000000
0183 .long 0x3FFF0000,0xA60895DC,0xFBE3187E,0x00000000
0184 .long 0x3FFF0000,0xA72A51DC,0x7367BEAC,0x00000000
0185 .long 0x3FFF0000,0xA83A5153,0x0956168F,0x00000000
0186 .long 0x3FFF0000,0xA93A2007,0x7539546E,0x00000000
0187 .long 0x3FFF0000,0xAA9E7245,0x023B2605,0x00000000
0188 .long 0x3FFF0000,0xAC4C84BA,0x6FE4D58F,0x00000000
0189 .long 0x3FFF0000,0xADCE4A4A,0x606B9712,0x00000000
0190 .long 0x3FFF0000,0xAF2A2DCD,0x8D263C9C,0x00000000
0191 .long 0x3FFF0000,0xB0656F81,0xF22265C7,0x00000000
0192 .long 0x3FFF0000,0xB1846515,0x0F71496A,0x00000000
0193 .long 0x3FFF0000,0xB28AAA15,0x6F9ADA35,0x00000000
0194 .long 0x3FFF0000,0xB37B44FF,0x3766B895,0x00000000
0195 .long 0x3FFF0000,0xB458C3DC,0xE9630433,0x00000000
0196 .long 0x3FFF0000,0xB525529D,0x562246BD,0x00000000
0197 .long 0x3FFF0000,0xB5E2CCA9,0x5F9D88CC,0x00000000
0198 .long 0x3FFF0000,0xB692CADA,0x7ACA1ADA,0x00000000
0199 .long 0x3FFF0000,0xB736AEA7,0xA6925838,0x00000000
0200 .long 0x3FFF0000,0xB7CFAB28,0x7E9F7B36,0x00000000
0201 .long 0x3FFF0000,0xB85ECC66,0xCB219835,0x00000000
0202 .long 0x3FFF0000,0xB8E4FD5A,0x20A593DA,0x00000000
0203 .long 0x3FFF0000,0xB99F41F6,0x4AFF9BB5,0x00000000
0204 .long 0x3FFF0000,0xBA7F1E17,0x842BBE7B,0x00000000
0205 .long 0x3FFF0000,0xBB471285,0x7637E17D,0x00000000
0206 .long 0x3FFF0000,0xBBFABE8A,0x4788DF6F,0x00000000
0207 .long 0x3FFF0000,0xBC9D0FAD,0x2B689D79,0x00000000
0208 .long 0x3FFF0000,0xBD306A39,0x471ECD86,0x00000000
0209 .long 0x3FFF0000,0xBDB6C731,0x856AF18A,0x00000000
0210 .long 0x3FFF0000,0xBE31CAC5,0x02E80D70,0x00000000
0211 .long 0x3FFF0000,0xBEA2D55C,0xE33194E2,0x00000000
0212 .long 0x3FFF0000,0xBF0B10B7,0xC03128F0,0x00000000
0213 .long 0x3FFF0000,0xBF6B7A18,0xDACB778D,0x00000000
0214 .long 0x3FFF0000,0xBFC4EA46,0x63FA18F6,0x00000000
0215 .long 0x3FFF0000,0xC0181BDE,0x8B89A454,0x00000000
0216 .long 0x3FFF0000,0xC065B066,0xCFBF6439,0x00000000
0217 .long 0x3FFF0000,0xC0AE345F,0x56340AE6,0x00000000
0218 .long 0x3FFF0000,0xC0F22291,0x9CB9E6A7,0x00000000
0219
0220 .set X,FP_SCR1
0221 .set XDCARE,X+2
0222 .set XFRAC,X+4
0223 .set XFRACLO,X+8
0224
0225 .set ATANF,FP_SCR2
0226 .set ATANFHI,ATANF+4
0227 .set ATANFLO,ATANF+8
0228
0229
0230 | xref t_frcinx
0231 |xref t_extdnrm
0232
0233 .global satand
0234 satand:
0235 //--ENTRY POINT FOR ATAN(X) FOR DENORMALIZED ARGUMENT
0236
0237 bra t_extdnrm
0238
0239 .global satan
0240 satan:
0241 //--ENTRY POINT FOR ATAN(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
0242
0243 fmovex (%a0),%fp0 // ...LOAD INPUT
0244
0245 movel (%a0),%d0
0246 movew 4(%a0),%d0
0247 fmovex %fp0,X(%a6)
0248 andil #0x7FFFFFFF,%d0
0249
0250 cmpil #0x3FFB8000,%d0 // ...|X| >= 1/16?
0251 bges ATANOK1
0252 bra ATANSM
0253
0254 ATANOK1:
0255 cmpil #0x4002FFFF,%d0 // ...|X| < 16 ?
0256 bles ATANMAIN
0257 bra ATANBIG
0258
0259
0260 //--THE MOST LIKELY CASE, |X| IN [1/16, 16). WE USE TABLE TECHNIQUE
0261 //--THE IDEA IS ATAN(X) = ATAN(F) + ATAN( [X-F] / [1+XF] ).
0262 //--SO IF F IS CHOSEN TO BE CLOSE TO X AND ATAN(F) IS STORED IN
0263 //--A TABLE, ALL WE NEED IS TO APPROXIMATE ATAN(U) WHERE
0264 //--U = (X-F)/(1+XF) IS SMALL (REMEMBER F IS CLOSE TO X). IT IS
0265 //--TRUE THAT A DIVIDE IS NOW NEEDED, BUT THE APPROXIMATION FOR
0266 //--ATAN(U) IS A VERY SHORT POLYNOMIAL AND THE INDEXING TO
0267 //--FETCH F AND SAVING OF REGISTERS CAN BE ALL HIDED UNDER THE
0268 //--DIVIDE. IN THE END THIS METHOD IS MUCH FASTER THAN A TRADITIONAL
0269 //--ONE. NOTE ALSO THAT THE TRADITIONAL SCHEME THAT APPROXIMATE
0270 //--ATAN(X) DIRECTLY WILL NEED TO USE A RATIONAL APPROXIMATION
0271 //--(DIVISION NEEDED) ANYWAY BECAUSE A POLYNOMIAL APPROXIMATION
0272 //--WILL INVOLVE A VERY LONG POLYNOMIAL.
0273
0274 //--NOW WE SEE X AS +-2^K * 1.BBBBBBB....B <- 1. + 63 BITS
0275 //--WE CHOSE F TO BE +-2^K * 1.BBBB1
0276 //--THAT IS IT MATCHES THE EXPONENT AND FIRST 5 BITS OF X, THE
0277 //--SIXTH BITS IS SET TO BE 1. SINCE K = -4, -3, ..., 3, THERE
0278 //--ARE ONLY 8 TIMES 16 = 2^7 = 128 |F|'S. SINCE ATAN(-|F|) IS
0279 //-- -ATAN(|F|), WE NEED TO STORE ONLY ATAN(|F|).
0280
0281 ATANMAIN:
0282
0283 movew #0x0000,XDCARE(%a6) // ...CLEAN UP X JUST IN CASE
0284 andil #0xF8000000,XFRAC(%a6) // ...FIRST 5 BITS
0285 oril #0x04000000,XFRAC(%a6) // ...SET 6-TH BIT TO 1
0286 movel #0x00000000,XFRACLO(%a6) // ...LOCATION OF X IS NOW F
0287
0288 fmovex %fp0,%fp1 // ...FP1 IS X
0289 fmulx X(%a6),%fp1 // ...FP1 IS X*F, NOTE THAT X*F > 0
0290 fsubx X(%a6),%fp0 // ...FP0 IS X-F
0291 fadds #0x3F800000,%fp1 // ...FP1 IS 1 + X*F
0292 fdivx %fp1,%fp0 // ...FP0 IS U = (X-F)/(1+X*F)
0293
0294 //--WHILE THE DIVISION IS TAKING ITS TIME, WE FETCH ATAN(|F|)
0295 //--CREATE ATAN(F) AND STORE IT IN ATANF, AND
0296 //--SAVE REGISTERS FP2.
0297
0298 movel %d2,-(%a7) // ...SAVE d2 TEMPORARILY
0299 movel %d0,%d2 // ...THE EXPO AND 16 BITS OF X
0300 andil #0x00007800,%d0 // ...4 VARYING BITS OF F'S FRACTION
0301 andil #0x7FFF0000,%d2 // ...EXPONENT OF F
0302 subil #0x3FFB0000,%d2 // ...K+4
0303 asrl #1,%d2
0304 addl %d2,%d0 // ...THE 7 BITS IDENTIFYING F
0305 asrl #7,%d0 // ...INDEX INTO TBL OF ATAN(|F|)
0306 lea ATANTBL,%a1
0307 addal %d0,%a1 // ...ADDRESS OF ATAN(|F|)
0308 movel (%a1)+,ATANF(%a6)
0309 movel (%a1)+,ATANFHI(%a6)
0310 movel (%a1)+,ATANFLO(%a6) // ...ATANF IS NOW ATAN(|F|)
0311 movel X(%a6),%d0 // ...LOAD SIGN AND EXPO. AGAIN
0312 andil #0x80000000,%d0 // ...SIGN(F)
0313 orl %d0,ATANF(%a6) // ...ATANF IS NOW SIGN(F)*ATAN(|F|)
0314 movel (%a7)+,%d2 // ...RESTORE d2
0315
0316 //--THAT'S ALL I HAVE TO DO FOR NOW,
0317 //--BUT ALAS, THE DIVIDE IS STILL CRANKING!
0318
0319 //--U IN FP0, WE ARE NOW READY TO COMPUTE ATAN(U) AS
0320 //--U + A1*U*V*(A2 + V*(A3 + V)), V = U*U
0321 //--THE POLYNOMIAL MAY LOOK STRANGE, BUT IS NEVERTHELESS CORRECT.
0322 //--THE NATURAL FORM IS U + U*V*(A1 + V*(A2 + V*A3))
0323 //--WHAT WE HAVE HERE IS MERELY A1 = A3, A2 = A1/A3, A3 = A2/A3.
0324 //--THE REASON FOR THIS REARRANGEMENT IS TO MAKE THE INDEPENDENT
0325 //--PARTS A1*U*V AND (A2 + ... STUFF) MORE LOAD-BALANCED
0326
0327
0328 fmovex %fp0,%fp1
0329 fmulx %fp1,%fp1
0330 fmoved ATANA3,%fp2
0331 faddx %fp1,%fp2 // ...A3+V
0332 fmulx %fp1,%fp2 // ...V*(A3+V)
0333 fmulx %fp0,%fp1 // ...U*V
0334 faddd ATANA2,%fp2 // ...A2+V*(A3+V)
0335 fmuld ATANA1,%fp1 // ...A1*U*V
0336 fmulx %fp2,%fp1 // ...A1*U*V*(A2+V*(A3+V))
0337
0338 faddx %fp1,%fp0 // ...ATAN(U), FP1 RELEASED
0339 fmovel %d1,%FPCR //restore users exceptions
0340 faddx ATANF(%a6),%fp0 // ...ATAN(X)
0341 bra t_frcinx
0342
0343 ATANBORS:
0344 //--|X| IS IN d0 IN COMPACT FORM. FP1, d0 SAVED.
0345 //--FP0 IS X AND |X| <= 1/16 OR |X| >= 16.
0346 cmpil #0x3FFF8000,%d0
0347 bgt ATANBIG // ...I.E. |X| >= 16
0348
0349 ATANSM:
0350 //--|X| <= 1/16
0351 //--IF |X| < 2^(-40), RETURN X AS ANSWER. OTHERWISE, APPROXIMATE
0352 //--ATAN(X) BY X + X*Y*(B1+Y*(B2+Y*(B3+Y*(B4+Y*(B5+Y*B6)))))
0353 //--WHICH IS X + X*Y*( [B1+Z*(B3+Z*B5)] + [Y*(B2+Z*(B4+Z*B6)] )
0354 //--WHERE Y = X*X, AND Z = Y*Y.
0355
0356 cmpil #0x3FD78000,%d0
0357 blt ATANTINY
0358 //--COMPUTE POLYNOMIAL
0359 fmulx %fp0,%fp0 // ...FP0 IS Y = X*X
0360
0361
0362 movew #0x0000,XDCARE(%a6)
0363
0364 fmovex %fp0,%fp1
0365 fmulx %fp1,%fp1 // ...FP1 IS Z = Y*Y
0366
0367 fmoved ATANB6,%fp2
0368 fmoved ATANB5,%fp3
0369
0370 fmulx %fp1,%fp2 // ...Z*B6
0371 fmulx %fp1,%fp3 // ...Z*B5
0372
0373 faddd ATANB4,%fp2 // ...B4+Z*B6
0374 faddd ATANB3,%fp3 // ...B3+Z*B5
0375
0376 fmulx %fp1,%fp2 // ...Z*(B4+Z*B6)
0377 fmulx %fp3,%fp1 // ...Z*(B3+Z*B5)
0378
0379 faddd ATANB2,%fp2 // ...B2+Z*(B4+Z*B6)
0380 faddd ATANB1,%fp1 // ...B1+Z*(B3+Z*B5)
0381
0382 fmulx %fp0,%fp2 // ...Y*(B2+Z*(B4+Z*B6))
0383 fmulx X(%a6),%fp0 // ...X*Y
0384
0385 faddx %fp2,%fp1 // ...[B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]
0386
0387
0388 fmulx %fp1,%fp0 // ...X*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))])
0389
0390 fmovel %d1,%FPCR //restore users exceptions
0391 faddx X(%a6),%fp0
0392
0393 bra t_frcinx
0394
0395 ATANTINY:
0396 //--|X| < 2^(-40), ATAN(X) = X
0397 movew #0x0000,XDCARE(%a6)
0398
0399 fmovel %d1,%FPCR //restore users exceptions
0400 fmovex X(%a6),%fp0 //last inst - possible exception set
0401
0402 bra t_frcinx
0403
0404 ATANBIG:
0405 //--IF |X| > 2^(100), RETURN SIGN(X)*(PI/2 - TINY). OTHERWISE,
0406 //--RETURN SIGN(X)*PI/2 + ATAN(-1/X).
0407 cmpil #0x40638000,%d0
0408 bgt ATANHUGE
0409
0410 //--APPROXIMATE ATAN(-1/X) BY
0411 //--X'+X'*Y*(C1+Y*(C2+Y*(C3+Y*(C4+Y*C5)))), X' = -1/X, Y = X'*X'
0412 //--THIS CAN BE RE-WRITTEN AS
0413 //--X'+X'*Y*( [C1+Z*(C3+Z*C5)] + [Y*(C2+Z*C4)] ), Z = Y*Y.
0414
0415 fmoves #0xBF800000,%fp1 // ...LOAD -1
0416 fdivx %fp0,%fp1 // ...FP1 IS -1/X
0417
0418
0419 //--DIVIDE IS STILL CRANKING
0420
0421 fmovex %fp1,%fp0 // ...FP0 IS X'
0422 fmulx %fp0,%fp0 // ...FP0 IS Y = X'*X'
0423 fmovex %fp1,X(%a6) // ...X IS REALLY X'
0424
0425 fmovex %fp0,%fp1
0426 fmulx %fp1,%fp1 // ...FP1 IS Z = Y*Y
0427
0428 fmoved ATANC5,%fp3
0429 fmoved ATANC4,%fp2
0430
0431 fmulx %fp1,%fp3 // ...Z*C5
0432 fmulx %fp1,%fp2 // ...Z*B4
0433
0434 faddd ATANC3,%fp3 // ...C3+Z*C5
0435 faddd ATANC2,%fp2 // ...C2+Z*C4
0436
0437 fmulx %fp3,%fp1 // ...Z*(C3+Z*C5), FP3 RELEASED
0438 fmulx %fp0,%fp2 // ...Y*(C2+Z*C4)
0439
0440 faddd ATANC1,%fp1 // ...C1+Z*(C3+Z*C5)
0441 fmulx X(%a6),%fp0 // ...X'*Y
0442
0443 faddx %fp2,%fp1 // ...[Y*(C2+Z*C4)]+[C1+Z*(C3+Z*C5)]
0444
0445
0446 fmulx %fp1,%fp0 // ...X'*Y*([B1+Z*(B3+Z*B5)]
0447 // ... +[Y*(B2+Z*(B4+Z*B6))])
0448 faddx X(%a6),%fp0
0449
0450 fmovel %d1,%FPCR //restore users exceptions
0451
0452 btstb #7,(%a0)
0453 beqs pos_big
0454
0455 neg_big:
0456 faddx NPIBY2,%fp0
0457 bra t_frcinx
0458
0459 pos_big:
0460 faddx PPIBY2,%fp0
0461 bra t_frcinx
0462
0463 ATANHUGE:
0464 //--RETURN SIGN(X)*(PIBY2 - TINY) = SIGN(X)*PIBY2 - SIGN(X)*TINY
0465 btstb #7,(%a0)
0466 beqs pos_huge
0467
0468 neg_huge:
0469 fmovex NPIBY2,%fp0
0470 fmovel %d1,%fpcr
0471 fsubx NTINY,%fp0
0472 bra t_frcinx
0473
0474 pos_huge:
0475 fmovex PPIBY2,%fp0
0476 fmovel %d1,%fpcr
0477 fsubx PTINY,%fp0
0478 bra t_frcinx
0479
0480 |end