File indexing completed on 2025-05-11 08:23:49
0001 #include "fpsp-namespace.h"
0002 //
0003 //
0004 // slogn.sa 3.1 12/10/90
0005 //
0006 // slogn computes the natural logarithm of an
0007 // input value. slognd does the same except the input value is a
0008 // denormalized number. slognp1 computes log(1+X), and slognp1d
0009 // computes log(1+X) for denormalized X.
0010 //
0011 // Input: Double-extended value in memory location pointed to by address
0012 // register a0.
0013 //
0014 // Output: log(X) or log(1+X) returned in floating-point register Fp0.
0015 //
0016 // Accuracy and Monotonicity: The returned result is within 2 ulps in
0017 // 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
0018 // result is subsequently rounded to double precision. The
0019 // result is provably monotonic in double precision.
0020 //
0021 // Speed: The program slogn takes approximately 190 cycles for input
0022 // argument X such that |X-1| >= 1/16, which is the the usual
0023 // situation. For those arguments, slognp1 takes approximately
0024 // 210 cycles. For the less common arguments, the program will
0025 // run no worse than 10% slower.
0026 //
0027 // Algorithm:
0028 // LOGN:
0029 // Step 1. If |X-1| < 1/16, approximate log(X) by an odd polynomial in
0030 // u, where u = 2(X-1)/(X+1). Otherwise, move on to Step 2.
0031 //
0032 // Step 2. X = 2**k * Y where 1 <= Y < 2. Define F to be the first seven
0033 // significant bits of Y plus 2**(-7), i.e. F = 1.xxxxxx1 in base
0034 // 2 where the six "x" match those of Y. Note that |Y-F| <= 2**(-7).
0035 //
0036 // Step 3. Define u = (Y-F)/F. Approximate log(1+u) by a polynomial in u,
0037 // log(1+u) = poly.
0038 //
0039 // Step 4. Reconstruct log(X) = log( 2**k * Y ) = k*log(2) + log(F) + log(1+u)
0040 // by k*log(2) + (log(F) + poly). The values of log(F) are calculated
0041 // beforehand and stored in the program.
0042 //
0043 // lognp1:
0044 // Step 1: If |X| < 1/16, approximate log(1+X) by an odd polynomial in
0045 // u where u = 2X/(2+X). Otherwise, move on to Step 2.
0046 //
0047 // Step 2: Let 1+X = 2**k * Y, where 1 <= Y < 2. Define F as done in Step 2
0048 // of the algorithm for LOGN and compute log(1+X) as
0049 // k*log(2) + log(F) + poly where poly approximates log(1+u),
0050 // u = (Y-F)/F.
0051 //
0052 // Implementation Notes:
0053 // Note 1. There are 64 different possible values for F, thus 64 log(F)'s
0054 // need to be tabulated. Moreover, the values of 1/F are also
0055 // tabulated so that the division in (Y-F)/F can be performed by a
0056 // multiplication.
0057 //
0058 // Note 2. In Step 2 of lognp1, in order to preserved accuracy, the value
0059 // Y-F has to be calculated carefully when 1/2 <= X < 3/2.
0060 //
0061 // Note 3. To fully exploit the pipeline, polynomials are usually separated
0062 // into two parts evaluated independently before being added up.
0063 //
0064
0065 // Copyright (C) Motorola, Inc. 1990
0066 // All Rights Reserved
0067 //
0068 // THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
0069 // The copyright notice above does not evidence any
0070 // actual or intended publication of such source code.
0071
0072 //slogn idnt 2,1 | Motorola 040 Floating Point Software Package
0073
0074 |section 8
0075
0076 #include "fpsp.defs"
0077
0078 BOUNDS1: .long 0x3FFEF07D,0x3FFF8841
0079 BOUNDS2: .long 0x3FFE8000,0x3FFFC000
0080
0081 LOGOF2: .long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
0082
0083 one: .long 0x3F800000
0084 zero: .long 0x00000000
0085 infty: .long 0x7F800000
0086 negone: .long 0xBF800000
0087
0088 LOGA6: .long 0x3FC2499A,0xB5E4040B
0089 LOGA5: .long 0xBFC555B5,0x848CB7DB
0090
0091 LOGA4: .long 0x3FC99999,0x987D8730
0092 LOGA3: .long 0xBFCFFFFF,0xFF6F7E97
0093
0094 LOGA2: .long 0x3FD55555,0x555555a4
0095 LOGA1: .long 0xBFE00000,0x00000008
0096
0097 LOGB5: .long 0x3F175496,0xADD7DAD6
0098 LOGB4: .long 0x3F3C71C2,0xFE80C7E0
0099
0100 LOGB3: .long 0x3F624924,0x928BCCFF
0101 LOGB2: .long 0x3F899999,0x999995EC
0102
0103 LOGB1: .long 0x3FB55555,0x55555555
0104 TWO: .long 0x40000000,0x00000000
0105
0106 LTHOLD: .long 0x3f990000,0x80000000,0x00000000,0x00000000
0107
0108 LOGTBL:
0109 .long 0x3FFE0000,0xFE03F80F,0xE03F80FE,0x00000000
0110 .long 0x3FF70000,0xFF015358,0x833C47E2,0x00000000
0111 .long 0x3FFE0000,0xFA232CF2,0x52138AC0,0x00000000
0112 .long 0x3FF90000,0xBDC8D83E,0xAD88D549,0x00000000
0113 .long 0x3FFE0000,0xF6603D98,0x0F6603DA,0x00000000
0114 .long 0x3FFA0000,0x9CF43DCF,0xF5EAFD48,0x00000000
0115 .long 0x3FFE0000,0xF2B9D648,0x0F2B9D65,0x00000000
0116 .long 0x3FFA0000,0xDA16EB88,0xCB8DF614,0x00000000
0117 .long 0x3FFE0000,0xEF2EB71F,0xC4345238,0x00000000
0118 .long 0x3FFB0000,0x8B29B775,0x1BD70743,0x00000000
0119 .long 0x3FFE0000,0xEBBDB2A5,0xC1619C8C,0x00000000
0120 .long 0x3FFB0000,0xA8D839F8,0x30C1FB49,0x00000000
0121 .long 0x3FFE0000,0xE865AC7B,0x7603A197,0x00000000
0122 .long 0x3FFB0000,0xC61A2EB1,0x8CD907AD,0x00000000
0123 .long 0x3FFE0000,0xE525982A,0xF70C880E,0x00000000
0124 .long 0x3FFB0000,0xE2F2A47A,0xDE3A18AF,0x00000000
0125 .long 0x3FFE0000,0xE1FC780E,0x1FC780E2,0x00000000
0126 .long 0x3FFB0000,0xFF64898E,0xDF55D551,0x00000000
0127 .long 0x3FFE0000,0xDEE95C4C,0xA037BA57,0x00000000
0128 .long 0x3FFC0000,0x8DB956A9,0x7B3D0148,0x00000000
0129 .long 0x3FFE0000,0xDBEB61EE,0xD19C5958,0x00000000
0130 .long 0x3FFC0000,0x9B8FE100,0xF47BA1DE,0x00000000
0131 .long 0x3FFE0000,0xD901B203,0x6406C80E,0x00000000
0132 .long 0x3FFC0000,0xA9372F1D,0x0DA1BD17,0x00000000
0133 .long 0x3FFE0000,0xD62B80D6,0x2B80D62C,0x00000000
0134 .long 0x3FFC0000,0xB6B07F38,0xCE90E46B,0x00000000
0135 .long 0x3FFE0000,0xD3680D36,0x80D3680D,0x00000000
0136 .long 0x3FFC0000,0xC3FD0329,0x06488481,0x00000000
0137 .long 0x3FFE0000,0xD0B69FCB,0xD2580D0B,0x00000000
0138 .long 0x3FFC0000,0xD11DE0FF,0x15AB18CA,0x00000000
0139 .long 0x3FFE0000,0xCE168A77,0x25080CE1,0x00000000
0140 .long 0x3FFC0000,0xDE1433A1,0x6C66B150,0x00000000
0141 .long 0x3FFE0000,0xCB8727C0,0x65C393E0,0x00000000
0142 .long 0x3FFC0000,0xEAE10B5A,0x7DDC8ADD,0x00000000
0143 .long 0x3FFE0000,0xC907DA4E,0x871146AD,0x00000000
0144 .long 0x3FFC0000,0xF7856E5E,0xE2C9B291,0x00000000
0145 .long 0x3FFE0000,0xC6980C69,0x80C6980C,0x00000000
0146 .long 0x3FFD0000,0x82012CA5,0xA68206D7,0x00000000
0147 .long 0x3FFE0000,0xC4372F85,0x5D824CA6,0x00000000
0148 .long 0x3FFD0000,0x882C5FCD,0x7256A8C5,0x00000000
0149 .long 0x3FFE0000,0xC1E4BBD5,0x95F6E947,0x00000000
0150 .long 0x3FFD0000,0x8E44C60B,0x4CCFD7DE,0x00000000
0151 .long 0x3FFE0000,0xBFA02FE8,0x0BFA02FF,0x00000000
0152 .long 0x3FFD0000,0x944AD09E,0xF4351AF6,0x00000000
0153 .long 0x3FFE0000,0xBD691047,0x07661AA3,0x00000000
0154 .long 0x3FFD0000,0x9A3EECD4,0xC3EAA6B2,0x00000000
0155 .long 0x3FFE0000,0xBB3EE721,0xA54D880C,0x00000000
0156 .long 0x3FFD0000,0xA0218434,0x353F1DE8,0x00000000
0157 .long 0x3FFE0000,0xB92143FA,0x36F5E02E,0x00000000
0158 .long 0x3FFD0000,0xA5F2FCAB,0xBBC506DA,0x00000000
0159 .long 0x3FFE0000,0xB70FBB5A,0x19BE3659,0x00000000
0160 .long 0x3FFD0000,0xABB3B8BA,0x2AD362A5,0x00000000
0161 .long 0x3FFE0000,0xB509E68A,0x9B94821F,0x00000000
0162 .long 0x3FFD0000,0xB1641795,0xCE3CA97B,0x00000000
0163 .long 0x3FFE0000,0xB30F6352,0x8917C80B,0x00000000
0164 .long 0x3FFD0000,0xB7047551,0x5D0F1C61,0x00000000
0165 .long 0x3FFE0000,0xB11FD3B8,0x0B11FD3C,0x00000000
0166 .long 0x3FFD0000,0xBC952AFE,0xEA3D13E1,0x00000000
0167 .long 0x3FFE0000,0xAF3ADDC6,0x80AF3ADE,0x00000000
0168 .long 0x3FFD0000,0xC2168ED0,0xF458BA4A,0x00000000
0169 .long 0x3FFE0000,0xAD602B58,0x0AD602B6,0x00000000
0170 .long 0x3FFD0000,0xC788F439,0xB3163BF1,0x00000000
0171 .long 0x3FFE0000,0xAB8F69E2,0x8359CD11,0x00000000
0172 .long 0x3FFD0000,0xCCECAC08,0xBF04565D,0x00000000
0173 .long 0x3FFE0000,0xA9C84A47,0xA07F5638,0x00000000
0174 .long 0x3FFD0000,0xD2420487,0x2DD85160,0x00000000
0175 .long 0x3FFE0000,0xA80A80A8,0x0A80A80B,0x00000000
0176 .long 0x3FFD0000,0xD7894992,0x3BC3588A,0x00000000
0177 .long 0x3FFE0000,0xA655C439,0x2D7B73A8,0x00000000
0178 .long 0x3FFD0000,0xDCC2C4B4,0x9887DACC,0x00000000
0179 .long 0x3FFE0000,0xA4A9CF1D,0x96833751,0x00000000
0180 .long 0x3FFD0000,0xE1EEBD3E,0x6D6A6B9E,0x00000000
0181 .long 0x3FFE0000,0xA3065E3F,0xAE7CD0E0,0x00000000
0182 .long 0x3FFD0000,0xE70D785C,0x2F9F5BDC,0x00000000
0183 .long 0x3FFE0000,0xA16B312E,0xA8FC377D,0x00000000
0184 .long 0x3FFD0000,0xEC1F392C,0x5179F283,0x00000000
0185 .long 0x3FFE0000,0x9FD809FD,0x809FD80A,0x00000000
0186 .long 0x3FFD0000,0xF12440D3,0xE36130E6,0x00000000
0187 .long 0x3FFE0000,0x9E4CAD23,0xDD5F3A20,0x00000000
0188 .long 0x3FFD0000,0xF61CCE92,0x346600BB,0x00000000
0189 .long 0x3FFE0000,0x9CC8E160,0xC3FB19B9,0x00000000
0190 .long 0x3FFD0000,0xFB091FD3,0x8145630A,0x00000000
0191 .long 0x3FFE0000,0x9B4C6F9E,0xF03A3CAA,0x00000000
0192 .long 0x3FFD0000,0xFFE97042,0xBFA4C2AD,0x00000000
0193 .long 0x3FFE0000,0x99D722DA,0xBDE58F06,0x00000000
0194 .long 0x3FFE0000,0x825EFCED,0x49369330,0x00000000
0195 .long 0x3FFE0000,0x9868C809,0x868C8098,0x00000000
0196 .long 0x3FFE0000,0x84C37A7A,0xB9A905C9,0x00000000
0197 .long 0x3FFE0000,0x97012E02,0x5C04B809,0x00000000
0198 .long 0x3FFE0000,0x87224C2E,0x8E645FB7,0x00000000
0199 .long 0x3FFE0000,0x95A02568,0x095A0257,0x00000000
0200 .long 0x3FFE0000,0x897B8CAC,0x9F7DE298,0x00000000
0201 .long 0x3FFE0000,0x94458094,0x45809446,0x00000000
0202 .long 0x3FFE0000,0x8BCF55DE,0xC4CD05FE,0x00000000
0203 .long 0x3FFE0000,0x92F11384,0x0497889C,0x00000000
0204 .long 0x3FFE0000,0x8E1DC0FB,0x89E125E5,0x00000000
0205 .long 0x3FFE0000,0x91A2B3C4,0xD5E6F809,0x00000000
0206 .long 0x3FFE0000,0x9066E68C,0x955B6C9B,0x00000000
0207 .long 0x3FFE0000,0x905A3863,0x3E06C43B,0x00000000
0208 .long 0x3FFE0000,0x92AADE74,0xC7BE59E0,0x00000000
0209 .long 0x3FFE0000,0x8F1779D9,0xFDC3A219,0x00000000
0210 .long 0x3FFE0000,0x94E9BFF6,0x15845643,0x00000000
0211 .long 0x3FFE0000,0x8DDA5202,0x37694809,0x00000000
0212 .long 0x3FFE0000,0x9723A1B7,0x20134203,0x00000000
0213 .long 0x3FFE0000,0x8CA29C04,0x6514E023,0x00000000
0214 .long 0x3FFE0000,0x995899C8,0x90EB8990,0x00000000
0215 .long 0x3FFE0000,0x8B70344A,0x139BC75A,0x00000000
0216 .long 0x3FFE0000,0x9B88BDAA,0x3A3DAE2F,0x00000000
0217 .long 0x3FFE0000,0x8A42F870,0x5669DB46,0x00000000
0218 .long 0x3FFE0000,0x9DB4224F,0xFFE1157C,0x00000000
0219 .long 0x3FFE0000,0x891AC73A,0xE9819B50,0x00000000
0220 .long 0x3FFE0000,0x9FDADC26,0x8B7A12DA,0x00000000
0221 .long 0x3FFE0000,0x87F78087,0xF78087F8,0x00000000
0222 .long 0x3FFE0000,0xA1FCFF17,0xCE733BD4,0x00000000
0223 .long 0x3FFE0000,0x86D90544,0x7A34ACC6,0x00000000
0224 .long 0x3FFE0000,0xA41A9E8F,0x5446FB9F,0x00000000
0225 .long 0x3FFE0000,0x85BF3761,0x2CEE3C9B,0x00000000
0226 .long 0x3FFE0000,0xA633CD7E,0x6771CD8B,0x00000000
0227 .long 0x3FFE0000,0x84A9F9C8,0x084A9F9D,0x00000000
0228 .long 0x3FFE0000,0xA8489E60,0x0B435A5E,0x00000000
0229 .long 0x3FFE0000,0x83993052,0x3FBE3368,0x00000000
0230 .long 0x3FFE0000,0xAA59233C,0xCCA4BD49,0x00000000
0231 .long 0x3FFE0000,0x828CBFBE,0xB9A020A3,0x00000000
0232 .long 0x3FFE0000,0xAC656DAE,0x6BCC4985,0x00000000
0233 .long 0x3FFE0000,0x81848DA8,0xFAF0D277,0x00000000
0234 .long 0x3FFE0000,0xAE6D8EE3,0x60BB2468,0x00000000
0235 .long 0x3FFE0000,0x80808080,0x80808081,0x00000000
0236 .long 0x3FFE0000,0xB07197A2,0x3C46C654,0x00000000
0237
0238 .set ADJK,L_SCR1
0239
0240 .set X,FP_SCR1
0241 .set XDCARE,X+2
0242 .set XFRAC,X+4
0243
0244 .set F,FP_SCR2
0245 .set FFRAC,F+4
0246
0247 .set KLOG2,FP_SCR3
0248
0249 .set SAVEU,FP_SCR4
0250
0251 | xref t_frcinx
0252 |xref t_extdnrm
0253 |xref t_operr
0254 |xref t_dz
0255
0256 .global slognd
0257 slognd:
0258 //--ENTRY POINT FOR LOG(X) FOR DENORMALIZED INPUT
0259
0260 movel #-100,ADJK(%a6) // ...INPUT = 2^(ADJK) * FP0
0261
0262 //----normalize the input value by left shifting k bits (k to be determined
0263 //----below), adjusting exponent and storing -k to ADJK
0264 //----the value TWOTO100 is no longer needed.
0265 //----Note that this code assumes the denormalized input is NON-ZERO.
0266
0267 moveml %d2-%d7,-(%a7) // ...save some registers
0268 movel #0x00000000,%d3 // ...D3 is exponent of smallest norm. #
0269 movel 4(%a0),%d4
0270 movel 8(%a0),%d5 // ...(D4,D5) is (Hi_X,Lo_X)
0271 clrl %d2 // ...D2 used for holding K
0272
0273 tstl %d4
0274 bnes HiX_not0
0275
0276 HiX_0:
0277 movel %d5,%d4
0278 clrl %d5
0279 movel #32,%d2
0280 clrl %d6
0281 bfffo %d4{#0:#32},%d6
0282 lsll %d6,%d4
0283 addl %d6,%d2 // ...(D3,D4,D5) is normalized
0284
0285 movel %d3,X(%a6)
0286 movel %d4,XFRAC(%a6)
0287 movel %d5,XFRAC+4(%a6)
0288 negl %d2
0289 movel %d2,ADJK(%a6)
0290 fmovex X(%a6),%fp0
0291 moveml (%a7)+,%d2-%d7 // ...restore registers
0292 lea X(%a6),%a0
0293 bras LOGBGN // ...begin regular log(X)
0294
0295
0296 HiX_not0:
0297 clrl %d6
0298 bfffo %d4{#0:#32},%d6 // ...find first 1
0299 movel %d6,%d2 // ...get k
0300 lsll %d6,%d4
0301 movel %d5,%d7 // ...a copy of D5
0302 lsll %d6,%d5
0303 negl %d6
0304 addil #32,%d6
0305 lsrl %d6,%d7
0306 orl %d7,%d4 // ...(D3,D4,D5) normalized
0307
0308 movel %d3,X(%a6)
0309 movel %d4,XFRAC(%a6)
0310 movel %d5,XFRAC+4(%a6)
0311 negl %d2
0312 movel %d2,ADJK(%a6)
0313 fmovex X(%a6),%fp0
0314 moveml (%a7)+,%d2-%d7 // ...restore registers
0315 lea X(%a6),%a0
0316 bras LOGBGN // ...begin regular log(X)
0317
0318
0319 .global slogn
0320 slogn:
0321 //--ENTRY POINT FOR LOG(X) FOR X FINITE, NON-ZERO, NOT NAN'S
0322
0323 fmovex (%a0),%fp0 // ...LOAD INPUT
0324 movel #0x00000000,ADJK(%a6)
0325
0326 LOGBGN:
0327 //--FPCR SAVED AND CLEARED, INPUT IS 2^(ADJK)*FP0, FP0 CONTAINS
0328 //--A FINITE, NON-ZERO, NORMALIZED NUMBER.
0329
0330 movel (%a0),%d0
0331 movew 4(%a0),%d0
0332
0333 movel (%a0),X(%a6)
0334 movel 4(%a0),X+4(%a6)
0335 movel 8(%a0),X+8(%a6)
0336
0337 cmpil #0,%d0 // ...CHECK IF X IS NEGATIVE
0338 blt LOGNEG // ...LOG OF NEGATIVE ARGUMENT IS INVALID
0339 cmp2l BOUNDS1,%d0 // ...X IS POSITIVE, CHECK IF X IS NEAR 1
0340 bcc LOGNEAR1 // ...BOUNDS IS ROUGHLY [15/16, 17/16]
0341
0342 LOGMAIN:
0343 //--THIS SHOULD BE THE USUAL CASE, X NOT VERY CLOSE TO 1
0344
0345 //--X = 2^(K) * Y, 1 <= Y < 2. THUS, Y = 1.XXXXXXXX....XX IN BINARY.
0346 //--WE DEFINE F = 1.XXXXXX1, I.E. FIRST 7 BITS OF Y AND ATTACH A 1.
0347 //--THE IDEA IS THAT LOG(X) = K*LOG2 + LOG(Y)
0348 //-- = K*LOG2 + LOG(F) + LOG(1 + (Y-F)/F).
0349 //--NOTE THAT U = (Y-F)/F IS VERY SMALL AND THUS APPROXIMATING
0350 //--LOG(1+U) CAN BE VERY EFFICIENT.
0351 //--ALSO NOTE THAT THE VALUE 1/F IS STORED IN A TABLE SO THAT NO
0352 //--DIVISION IS NEEDED TO CALCULATE (Y-F)/F.
0353
0354 //--GET K, Y, F, AND ADDRESS OF 1/F.
0355 asrl #8,%d0
0356 asrl #8,%d0 // ...SHIFTED 16 BITS, BIASED EXPO. OF X
0357 subil #0x3FFF,%d0 // ...THIS IS K
0358 addl ADJK(%a6),%d0 // ...ADJUST K, ORIGINAL INPUT MAY BE DENORM.
0359 lea LOGTBL,%a0 // ...BASE ADDRESS OF 1/F AND LOG(F)
0360 fmovel %d0,%fp1 // ...CONVERT K TO FLOATING-POINT FORMAT
0361
0362 //--WHILE THE CONVERSION IS GOING ON, WE GET F AND ADDRESS OF 1/F
0363 movel #0x3FFF0000,X(%a6) // ...X IS NOW Y, I.E. 2^(-K)*X
0364 movel XFRAC(%a6),FFRAC(%a6)
0365 andil #0xFE000000,FFRAC(%a6) // ...FIRST 7 BITS OF Y
0366 oril #0x01000000,FFRAC(%a6) // ...GET F: ATTACH A 1 AT THE EIGHTH BIT
0367 movel FFRAC(%a6),%d0 // ...READY TO GET ADDRESS OF 1/F
0368 andil #0x7E000000,%d0
0369 asrl #8,%d0
0370 asrl #8,%d0
0371 asrl #4,%d0 // ...SHIFTED 20, D0 IS THE DISPLACEMENT
0372 addal %d0,%a0 // ...A0 IS THE ADDRESS FOR 1/F
0373
0374 fmovex X(%a6),%fp0
0375 movel #0x3fff0000,F(%a6)
0376 clrl F+8(%a6)
0377 fsubx F(%a6),%fp0 // ...Y-F
0378 fmovemx %fp2-%fp2/%fp3,-(%sp) // ...SAVE FP2 WHILE FP0 IS NOT READY
0379 //--SUMMARY: FP0 IS Y-F, A0 IS ADDRESS OF 1/F, FP1 IS K
0380 //--REGISTERS SAVED: FPCR, FP1, FP2
0381
0382 LP1CONT1:
0383 //--AN RE-ENTRY POINT FOR LOGNP1
0384 fmulx (%a0),%fp0 // ...FP0 IS U = (Y-F)/F
0385 fmulx LOGOF2,%fp1 // ...GET K*LOG2 WHILE FP0 IS NOT READY
0386 fmovex %fp0,%fp2
0387 fmulx %fp2,%fp2 // ...FP2 IS V=U*U
0388 fmovex %fp1,KLOG2(%a6) // ...PUT K*LOG2 IN MEMORY, FREE FP1
0389
0390 //--LOG(1+U) IS APPROXIMATED BY
0391 //--U + V*(A1+U*(A2+U*(A3+U*(A4+U*(A5+U*A6))))) WHICH IS
0392 //--[U + V*(A1+V*(A3+V*A5))] + [U*V*(A2+V*(A4+V*A6))]
0393
0394 fmovex %fp2,%fp3
0395 fmovex %fp2,%fp1
0396
0397 fmuld LOGA6,%fp1 // ...V*A6
0398 fmuld LOGA5,%fp2 // ...V*A5
0399
0400 faddd LOGA4,%fp1 // ...A4+V*A6
0401 faddd LOGA3,%fp2 // ...A3+V*A5
0402
0403 fmulx %fp3,%fp1 // ...V*(A4+V*A6)
0404 fmulx %fp3,%fp2 // ...V*(A3+V*A5)
0405
0406 faddd LOGA2,%fp1 // ...A2+V*(A4+V*A6)
0407 faddd LOGA1,%fp2 // ...A1+V*(A3+V*A5)
0408
0409 fmulx %fp3,%fp1 // ...V*(A2+V*(A4+V*A6))
0410 addal #16,%a0 // ...ADDRESS OF LOG(F)
0411 fmulx %fp3,%fp2 // ...V*(A1+V*(A3+V*A5)), FP3 RELEASED
0412
0413 fmulx %fp0,%fp1 // ...U*V*(A2+V*(A4+V*A6))
0414 faddx %fp2,%fp0 // ...U+V*(A1+V*(A3+V*A5)), FP2 RELEASED
0415
0416 faddx (%a0),%fp1 // ...LOG(F)+U*V*(A2+V*(A4+V*A6))
0417 fmovemx (%sp)+,%fp2-%fp2/%fp3 // ...RESTORE FP2
0418 faddx %fp1,%fp0 // ...FP0 IS LOG(F) + LOG(1+U)
0419
0420 fmovel %d1,%fpcr
0421 faddx KLOG2(%a6),%fp0 // ...FINAL ADD
0422 bra t_frcinx
0423
0424
0425 LOGNEAR1:
0426 //--REGISTERS SAVED: FPCR, FP1. FP0 CONTAINS THE INPUT.
0427 fmovex %fp0,%fp1
0428 fsubs one,%fp1 // ...FP1 IS X-1
0429 fadds one,%fp0 // ...FP0 IS X+1
0430 faddx %fp1,%fp1 // ...FP1 IS 2(X-1)
0431 //--LOG(X) = LOG(1+U/2)-LOG(1-U/2) WHICH IS AN ODD POLYNOMIAL
0432 //--IN U, U = 2(X-1)/(X+1) = FP1/FP0
0433
0434 LP1CONT2:
0435 //--THIS IS AN RE-ENTRY POINT FOR LOGNP1
0436 fdivx %fp0,%fp1 // ...FP1 IS U
0437 fmovemx %fp2-%fp2/%fp3,-(%sp) // ...SAVE FP2
0438 //--REGISTERS SAVED ARE NOW FPCR,FP1,FP2,FP3
0439 //--LET V=U*U, W=V*V, CALCULATE
0440 //--U + U*V*(B1 + V*(B2 + V*(B3 + V*(B4 + V*B5)))) BY
0441 //--U + U*V*( [B1 + W*(B3 + W*B5)] + [V*(B2 + W*B4)] )
0442 fmovex %fp1,%fp0
0443 fmulx %fp0,%fp0 // ...FP0 IS V
0444 fmovex %fp1,SAVEU(%a6) // ...STORE U IN MEMORY, FREE FP1
0445 fmovex %fp0,%fp1
0446 fmulx %fp1,%fp1 // ...FP1 IS W
0447
0448 fmoved LOGB5,%fp3
0449 fmoved LOGB4,%fp2
0450
0451 fmulx %fp1,%fp3 // ...W*B5
0452 fmulx %fp1,%fp2 // ...W*B4
0453
0454 faddd LOGB3,%fp3 // ...B3+W*B5
0455 faddd LOGB2,%fp2 // ...B2+W*B4
0456
0457 fmulx %fp3,%fp1 // ...W*(B3+W*B5), FP3 RELEASED
0458
0459 fmulx %fp0,%fp2 // ...V*(B2+W*B4)
0460
0461 faddd LOGB1,%fp1 // ...B1+W*(B3+W*B5)
0462 fmulx SAVEU(%a6),%fp0 // ...FP0 IS U*V
0463
0464 faddx %fp2,%fp1 // ...B1+W*(B3+W*B5) + V*(B2+W*B4), FP2 RELEASED
0465 fmovemx (%sp)+,%fp2-%fp2/%fp3 // ...FP2 RESTORED
0466
0467 fmulx %fp1,%fp0 // ...U*V*( [B1+W*(B3+W*B5)] + [V*(B2+W*B4)] )
0468
0469 fmovel %d1,%fpcr
0470 faddx SAVEU(%a6),%fp0
0471 bra t_frcinx
0472 rts
0473
0474 LOGNEG:
0475 //--REGISTERS SAVED FPCR. LOG(-VE) IS INVALID
0476 bra t_operr
0477
0478 .global slognp1d
0479 slognp1d:
0480 //--ENTRY POINT FOR LOG(1+Z) FOR DENORMALIZED INPUT
0481 // Simply return the denorm
0482
0483 bra t_extdnrm
0484
0485 .global slognp1
0486 slognp1:
0487 //--ENTRY POINT FOR LOG(1+X) FOR X FINITE, NON-ZERO, NOT NAN'S
0488
0489 fmovex (%a0),%fp0 // ...LOAD INPUT
0490 fabsx %fp0 //test magnitude
0491 fcmpx LTHOLD,%fp0 //compare with min threshold
0492 fbgt LP1REAL //if greater, continue
0493 fmovel #0,%fpsr //clr N flag from compare
0494 fmovel %d1,%fpcr
0495 fmovex (%a0),%fp0 //return signed argument
0496 bra t_frcinx
0497
0498 LP1REAL:
0499 fmovex (%a0),%fp0 // ...LOAD INPUT
0500 movel #0x00000000,ADJK(%a6)
0501 fmovex %fp0,%fp1 // ...FP1 IS INPUT Z
0502 fadds one,%fp0 // ...X := ROUND(1+Z)
0503 fmovex %fp0,X(%a6)
0504 movew XFRAC(%a6),XDCARE(%a6)
0505 movel X(%a6),%d0
0506 cmpil #0,%d0
0507 ble LP1NEG0 // ...LOG OF ZERO OR -VE
0508 cmp2l BOUNDS2,%d0
0509 bcs LOGMAIN // ...BOUNDS2 IS [1/2,3/2]
0510 //--IF 1+Z > 3/2 OR 1+Z < 1/2, THEN X, WHICH IS ROUNDING 1+Z,
0511 //--CONTAINS AT LEAST 63 BITS OF INFORMATION OF Z. IN THAT CASE,
0512 //--SIMPLY INVOKE LOG(X) FOR LOG(1+Z).
0513
0514 LP1NEAR1:
0515 //--NEXT SEE IF EXP(-1/16) < X < EXP(1/16)
0516 cmp2l BOUNDS1,%d0
0517 bcss LP1CARE
0518
0519 LP1ONE16:
0520 //--EXP(-1/16) < X < EXP(1/16). LOG(1+Z) = LOG(1+U/2) - LOG(1-U/2)
0521 //--WHERE U = 2Z/(2+Z) = 2Z/(1+X).
0522 faddx %fp1,%fp1 // ...FP1 IS 2Z
0523 fadds one,%fp0 // ...FP0 IS 1+X
0524 //--U = FP1/FP0
0525 bra LP1CONT2
0526
0527 LP1CARE:
0528 //--HERE WE USE THE USUAL TABLE DRIVEN APPROACH. CARE HAS TO BE
0529 //--TAKEN BECAUSE 1+Z CAN HAVE 67 BITS OF INFORMATION AND WE MUST
0530 //--PRESERVE ALL THE INFORMATION. BECAUSE 1+Z IS IN [1/2,3/2],
0531 //--THERE ARE ONLY TWO CASES.
0532 //--CASE 1: 1+Z < 1, THEN K = -1 AND Y-F = (2-F) + 2Z
0533 //--CASE 2: 1+Z > 1, THEN K = 0 AND Y-F = (1-F) + Z
0534 //--ON RETURNING TO LP1CONT1, WE MUST HAVE K IN FP1, ADDRESS OF
0535 //--(1/F) IN A0, Y-F IN FP0, AND FP2 SAVED.
0536
0537 movel XFRAC(%a6),FFRAC(%a6)
0538 andil #0xFE000000,FFRAC(%a6)
0539 oril #0x01000000,FFRAC(%a6) // ...F OBTAINED
0540 cmpil #0x3FFF8000,%d0 // ...SEE IF 1+Z > 1
0541 bges KISZERO
0542
0543 KISNEG1:
0544 fmoves TWO,%fp0
0545 movel #0x3fff0000,F(%a6)
0546 clrl F+8(%a6)
0547 fsubx F(%a6),%fp0 // ...2-F
0548 movel FFRAC(%a6),%d0
0549 andil #0x7E000000,%d0
0550 asrl #8,%d0
0551 asrl #8,%d0
0552 asrl #4,%d0 // ...D0 CONTAINS DISPLACEMENT FOR 1/F
0553 faddx %fp1,%fp1 // ...GET 2Z
0554 fmovemx %fp2-%fp2/%fp3,-(%sp) // ...SAVE FP2
0555 faddx %fp1,%fp0 // ...FP0 IS Y-F = (2-F)+2Z
0556 lea LOGTBL,%a0 // ...A0 IS ADDRESS OF 1/F
0557 addal %d0,%a0
0558 fmoves negone,%fp1 // ...FP1 IS K = -1
0559 bra LP1CONT1
0560
0561 KISZERO:
0562 fmoves one,%fp0
0563 movel #0x3fff0000,F(%a6)
0564 clrl F+8(%a6)
0565 fsubx F(%a6),%fp0 // ...1-F
0566 movel FFRAC(%a6),%d0
0567 andil #0x7E000000,%d0
0568 asrl #8,%d0
0569 asrl #8,%d0
0570 asrl #4,%d0
0571 faddx %fp1,%fp0 // ...FP0 IS Y-F
0572 fmovemx %fp2-%fp2/%fp3,-(%sp) // ...FP2 SAVED
0573 lea LOGTBL,%a0
0574 addal %d0,%a0 // ...A0 IS ADDRESS OF 1/F
0575 fmoves zero,%fp1 // ...FP1 IS K = 0
0576 bra LP1CONT1
0577
0578 LP1NEG0:
0579 //--FPCR SAVED. D0 IS X IN COMPACT FORM.
0580 cmpil #0,%d0
0581 blts LP1NEG
0582 LP1ZERO:
0583 fmoves negone,%fp0
0584
0585 fmovel %d1,%fpcr
0586 bra t_dz
0587
0588 LP1NEG:
0589 fmoves zero,%fp0
0590
0591 fmovel %d1,%fpcr
0592 bra t_operr
0593
0594 |end