Back to home page

LXR

 
 

    


File indexing completed on 2025-05-11 08:23:49

0001 #include "fpsp-namespace.h"
0002 //
0003 //
0004 //  slogn.sa 3.1 12/10/90
0005 //
0006 //  slogn computes the natural logarithm of an
0007 //  input value. slognd does the same except the input value is a
0008 //  denormalized number. slognp1 computes log(1+X), and slognp1d
0009 //  computes log(1+X) for denormalized X.
0010 //
0011 //  Input: Double-extended value in memory location pointed to by address
0012 //      register a0.
0013 //
0014 //  Output: log(X) or log(1+X) returned in floating-point register Fp0.
0015 //
0016 //  Accuracy and Monotonicity: The returned result is within 2 ulps in
0017 //      64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
0018 //      result is subsequently rounded to double precision. The
0019 //      result is provably monotonic in double precision.
0020 //
0021 //  Speed: The program slogn takes approximately 190 cycles for input
0022 //      argument X such that |X-1| >= 1/16, which is the the usual
0023 //      situation. For those arguments, slognp1 takes approximately
0024 //       210 cycles. For the less common arguments, the program will
0025 //       run no worse than 10% slower.
0026 //
0027 //  Algorithm:
0028 //  LOGN:
0029 //  Step 1. If |X-1| < 1/16, approximate log(X) by an odd polynomial in
0030 //      u, where u = 2(X-1)/(X+1). Otherwise, move on to Step 2.
0031 //
0032 //  Step 2. X = 2**k * Y where 1 <= Y < 2. Define F to be the first seven
0033 //      significant bits of Y plus 2**(-7), i.e. F = 1.xxxxxx1 in base
0034 //      2 where the six "x" match those of Y. Note that |Y-F| <= 2**(-7).
0035 //
0036 //  Step 3. Define u = (Y-F)/F. Approximate log(1+u) by a polynomial in u,
0037 //      log(1+u) = poly.
0038 //
0039 //  Step 4. Reconstruct log(X) = log( 2**k * Y ) = k*log(2) + log(F) + log(1+u)
0040 //      by k*log(2) + (log(F) + poly). The values of log(F) are calculated
0041 //      beforehand and stored in the program.
0042 //
0043 //  lognp1:
0044 //  Step 1: If |X| < 1/16, approximate log(1+X) by an odd polynomial in
0045 //      u where u = 2X/(2+X). Otherwise, move on to Step 2.
0046 //
0047 //  Step 2: Let 1+X = 2**k * Y, where 1 <= Y < 2. Define F as done in Step 2
0048 //      of the algorithm for LOGN and compute log(1+X) as
0049 //      k*log(2) + log(F) + poly where poly approximates log(1+u),
0050 //      u = (Y-F)/F.
0051 //
0052 //  Implementation Notes:
0053 //  Note 1. There are 64 different possible values for F, thus 64 log(F)'s
0054 //      need to be tabulated. Moreover, the values of 1/F are also
0055 //      tabulated so that the division in (Y-F)/F can be performed by a
0056 //      multiplication.
0057 //
0058 //  Note 2. In Step 2 of lognp1, in order to preserved accuracy, the value
0059 //      Y-F has to be calculated carefully when 1/2 <= X < 3/2.
0060 //
0061 //  Note 3. To fully exploit the pipeline, polynomials are usually separated
0062 //      into two parts evaluated independently before being added up.
0063 //
0064 
0065 //      Copyright (C) Motorola, Inc. 1990
0066 //          All Rights Reserved
0067 //
0068 //  THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
0069 //  The copyright notice above does not evidence any
0070 //  actual or intended publication of such source code.
0071 
0072 //slogn idnt    2,1 | Motorola 040 Floating Point Software Package
0073 
0074     |section    8
0075 
0076 #include "fpsp.defs"
0077 
0078 BOUNDS1:  .long 0x3FFEF07D,0x3FFF8841
0079 BOUNDS2:  .long 0x3FFE8000,0x3FFFC000
0080 
0081 LOGOF2: .long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
0082 
0083 one:    .long 0x3F800000
0084 zero:   .long 0x00000000
0085 infty:  .long 0x7F800000
0086 negone: .long 0xBF800000
0087 
0088 LOGA6:  .long 0x3FC2499A,0xB5E4040B
0089 LOGA5:  .long 0xBFC555B5,0x848CB7DB
0090 
0091 LOGA4:  .long 0x3FC99999,0x987D8730
0092 LOGA3:  .long 0xBFCFFFFF,0xFF6F7E97
0093 
0094 LOGA2:  .long 0x3FD55555,0x555555a4
0095 LOGA1:  .long 0xBFE00000,0x00000008
0096 
0097 LOGB5:  .long 0x3F175496,0xADD7DAD6
0098 LOGB4:  .long 0x3F3C71C2,0xFE80C7E0
0099 
0100 LOGB3:  .long 0x3F624924,0x928BCCFF
0101 LOGB2:  .long 0x3F899999,0x999995EC
0102 
0103 LOGB1:  .long 0x3FB55555,0x55555555
0104 TWO:    .long 0x40000000,0x00000000
0105 
0106 LTHOLD: .long 0x3f990000,0x80000000,0x00000000,0x00000000
0107 
0108 LOGTBL:
0109     .long  0x3FFE0000,0xFE03F80F,0xE03F80FE,0x00000000
0110     .long  0x3FF70000,0xFF015358,0x833C47E2,0x00000000
0111     .long  0x3FFE0000,0xFA232CF2,0x52138AC0,0x00000000
0112     .long  0x3FF90000,0xBDC8D83E,0xAD88D549,0x00000000
0113     .long  0x3FFE0000,0xF6603D98,0x0F6603DA,0x00000000
0114     .long  0x3FFA0000,0x9CF43DCF,0xF5EAFD48,0x00000000
0115     .long  0x3FFE0000,0xF2B9D648,0x0F2B9D65,0x00000000
0116     .long  0x3FFA0000,0xDA16EB88,0xCB8DF614,0x00000000
0117     .long  0x3FFE0000,0xEF2EB71F,0xC4345238,0x00000000
0118     .long  0x3FFB0000,0x8B29B775,0x1BD70743,0x00000000
0119     .long  0x3FFE0000,0xEBBDB2A5,0xC1619C8C,0x00000000
0120     .long  0x3FFB0000,0xA8D839F8,0x30C1FB49,0x00000000
0121     .long  0x3FFE0000,0xE865AC7B,0x7603A197,0x00000000
0122     .long  0x3FFB0000,0xC61A2EB1,0x8CD907AD,0x00000000
0123     .long  0x3FFE0000,0xE525982A,0xF70C880E,0x00000000
0124     .long  0x3FFB0000,0xE2F2A47A,0xDE3A18AF,0x00000000
0125     .long  0x3FFE0000,0xE1FC780E,0x1FC780E2,0x00000000
0126     .long  0x3FFB0000,0xFF64898E,0xDF55D551,0x00000000
0127     .long  0x3FFE0000,0xDEE95C4C,0xA037BA57,0x00000000
0128     .long  0x3FFC0000,0x8DB956A9,0x7B3D0148,0x00000000
0129     .long  0x3FFE0000,0xDBEB61EE,0xD19C5958,0x00000000
0130     .long  0x3FFC0000,0x9B8FE100,0xF47BA1DE,0x00000000
0131     .long  0x3FFE0000,0xD901B203,0x6406C80E,0x00000000
0132     .long  0x3FFC0000,0xA9372F1D,0x0DA1BD17,0x00000000
0133     .long  0x3FFE0000,0xD62B80D6,0x2B80D62C,0x00000000
0134     .long  0x3FFC0000,0xB6B07F38,0xCE90E46B,0x00000000
0135     .long  0x3FFE0000,0xD3680D36,0x80D3680D,0x00000000
0136     .long  0x3FFC0000,0xC3FD0329,0x06488481,0x00000000
0137     .long  0x3FFE0000,0xD0B69FCB,0xD2580D0B,0x00000000
0138     .long  0x3FFC0000,0xD11DE0FF,0x15AB18CA,0x00000000
0139     .long  0x3FFE0000,0xCE168A77,0x25080CE1,0x00000000
0140     .long  0x3FFC0000,0xDE1433A1,0x6C66B150,0x00000000
0141     .long  0x3FFE0000,0xCB8727C0,0x65C393E0,0x00000000
0142     .long  0x3FFC0000,0xEAE10B5A,0x7DDC8ADD,0x00000000
0143     .long  0x3FFE0000,0xC907DA4E,0x871146AD,0x00000000
0144     .long  0x3FFC0000,0xF7856E5E,0xE2C9B291,0x00000000
0145     .long  0x3FFE0000,0xC6980C69,0x80C6980C,0x00000000
0146     .long  0x3FFD0000,0x82012CA5,0xA68206D7,0x00000000
0147     .long  0x3FFE0000,0xC4372F85,0x5D824CA6,0x00000000
0148     .long  0x3FFD0000,0x882C5FCD,0x7256A8C5,0x00000000
0149     .long  0x3FFE0000,0xC1E4BBD5,0x95F6E947,0x00000000
0150     .long  0x3FFD0000,0x8E44C60B,0x4CCFD7DE,0x00000000
0151     .long  0x3FFE0000,0xBFA02FE8,0x0BFA02FF,0x00000000
0152     .long  0x3FFD0000,0x944AD09E,0xF4351AF6,0x00000000
0153     .long  0x3FFE0000,0xBD691047,0x07661AA3,0x00000000
0154     .long  0x3FFD0000,0x9A3EECD4,0xC3EAA6B2,0x00000000
0155     .long  0x3FFE0000,0xBB3EE721,0xA54D880C,0x00000000
0156     .long  0x3FFD0000,0xA0218434,0x353F1DE8,0x00000000
0157     .long  0x3FFE0000,0xB92143FA,0x36F5E02E,0x00000000
0158     .long  0x3FFD0000,0xA5F2FCAB,0xBBC506DA,0x00000000
0159     .long  0x3FFE0000,0xB70FBB5A,0x19BE3659,0x00000000
0160     .long  0x3FFD0000,0xABB3B8BA,0x2AD362A5,0x00000000
0161     .long  0x3FFE0000,0xB509E68A,0x9B94821F,0x00000000
0162     .long  0x3FFD0000,0xB1641795,0xCE3CA97B,0x00000000
0163     .long  0x3FFE0000,0xB30F6352,0x8917C80B,0x00000000
0164     .long  0x3FFD0000,0xB7047551,0x5D0F1C61,0x00000000
0165     .long  0x3FFE0000,0xB11FD3B8,0x0B11FD3C,0x00000000
0166     .long  0x3FFD0000,0xBC952AFE,0xEA3D13E1,0x00000000
0167     .long  0x3FFE0000,0xAF3ADDC6,0x80AF3ADE,0x00000000
0168     .long  0x3FFD0000,0xC2168ED0,0xF458BA4A,0x00000000
0169     .long  0x3FFE0000,0xAD602B58,0x0AD602B6,0x00000000
0170     .long  0x3FFD0000,0xC788F439,0xB3163BF1,0x00000000
0171     .long  0x3FFE0000,0xAB8F69E2,0x8359CD11,0x00000000
0172     .long  0x3FFD0000,0xCCECAC08,0xBF04565D,0x00000000
0173     .long  0x3FFE0000,0xA9C84A47,0xA07F5638,0x00000000
0174     .long  0x3FFD0000,0xD2420487,0x2DD85160,0x00000000
0175     .long  0x3FFE0000,0xA80A80A8,0x0A80A80B,0x00000000
0176     .long  0x3FFD0000,0xD7894992,0x3BC3588A,0x00000000
0177     .long  0x3FFE0000,0xA655C439,0x2D7B73A8,0x00000000
0178     .long  0x3FFD0000,0xDCC2C4B4,0x9887DACC,0x00000000
0179     .long  0x3FFE0000,0xA4A9CF1D,0x96833751,0x00000000
0180     .long  0x3FFD0000,0xE1EEBD3E,0x6D6A6B9E,0x00000000
0181     .long  0x3FFE0000,0xA3065E3F,0xAE7CD0E0,0x00000000
0182     .long  0x3FFD0000,0xE70D785C,0x2F9F5BDC,0x00000000
0183     .long  0x3FFE0000,0xA16B312E,0xA8FC377D,0x00000000
0184     .long  0x3FFD0000,0xEC1F392C,0x5179F283,0x00000000
0185     .long  0x3FFE0000,0x9FD809FD,0x809FD80A,0x00000000
0186     .long  0x3FFD0000,0xF12440D3,0xE36130E6,0x00000000
0187     .long  0x3FFE0000,0x9E4CAD23,0xDD5F3A20,0x00000000
0188     .long  0x3FFD0000,0xF61CCE92,0x346600BB,0x00000000
0189     .long  0x3FFE0000,0x9CC8E160,0xC3FB19B9,0x00000000
0190     .long  0x3FFD0000,0xFB091FD3,0x8145630A,0x00000000
0191     .long  0x3FFE0000,0x9B4C6F9E,0xF03A3CAA,0x00000000
0192     .long  0x3FFD0000,0xFFE97042,0xBFA4C2AD,0x00000000
0193     .long  0x3FFE0000,0x99D722DA,0xBDE58F06,0x00000000
0194     .long  0x3FFE0000,0x825EFCED,0x49369330,0x00000000
0195     .long  0x3FFE0000,0x9868C809,0x868C8098,0x00000000
0196     .long  0x3FFE0000,0x84C37A7A,0xB9A905C9,0x00000000
0197     .long  0x3FFE0000,0x97012E02,0x5C04B809,0x00000000
0198     .long  0x3FFE0000,0x87224C2E,0x8E645FB7,0x00000000
0199     .long  0x3FFE0000,0x95A02568,0x095A0257,0x00000000
0200     .long  0x3FFE0000,0x897B8CAC,0x9F7DE298,0x00000000
0201     .long  0x3FFE0000,0x94458094,0x45809446,0x00000000
0202     .long  0x3FFE0000,0x8BCF55DE,0xC4CD05FE,0x00000000
0203     .long  0x3FFE0000,0x92F11384,0x0497889C,0x00000000
0204     .long  0x3FFE0000,0x8E1DC0FB,0x89E125E5,0x00000000
0205     .long  0x3FFE0000,0x91A2B3C4,0xD5E6F809,0x00000000
0206     .long  0x3FFE0000,0x9066E68C,0x955B6C9B,0x00000000
0207     .long  0x3FFE0000,0x905A3863,0x3E06C43B,0x00000000
0208     .long  0x3FFE0000,0x92AADE74,0xC7BE59E0,0x00000000
0209     .long  0x3FFE0000,0x8F1779D9,0xFDC3A219,0x00000000
0210     .long  0x3FFE0000,0x94E9BFF6,0x15845643,0x00000000
0211     .long  0x3FFE0000,0x8DDA5202,0x37694809,0x00000000
0212     .long  0x3FFE0000,0x9723A1B7,0x20134203,0x00000000
0213     .long  0x3FFE0000,0x8CA29C04,0x6514E023,0x00000000
0214     .long  0x3FFE0000,0x995899C8,0x90EB8990,0x00000000
0215     .long  0x3FFE0000,0x8B70344A,0x139BC75A,0x00000000
0216     .long  0x3FFE0000,0x9B88BDAA,0x3A3DAE2F,0x00000000
0217     .long  0x3FFE0000,0x8A42F870,0x5669DB46,0x00000000
0218     .long  0x3FFE0000,0x9DB4224F,0xFFE1157C,0x00000000
0219     .long  0x3FFE0000,0x891AC73A,0xE9819B50,0x00000000
0220     .long  0x3FFE0000,0x9FDADC26,0x8B7A12DA,0x00000000
0221     .long  0x3FFE0000,0x87F78087,0xF78087F8,0x00000000
0222     .long  0x3FFE0000,0xA1FCFF17,0xCE733BD4,0x00000000
0223     .long  0x3FFE0000,0x86D90544,0x7A34ACC6,0x00000000
0224     .long  0x3FFE0000,0xA41A9E8F,0x5446FB9F,0x00000000
0225     .long  0x3FFE0000,0x85BF3761,0x2CEE3C9B,0x00000000
0226     .long  0x3FFE0000,0xA633CD7E,0x6771CD8B,0x00000000
0227     .long  0x3FFE0000,0x84A9F9C8,0x084A9F9D,0x00000000
0228     .long  0x3FFE0000,0xA8489E60,0x0B435A5E,0x00000000
0229     .long  0x3FFE0000,0x83993052,0x3FBE3368,0x00000000
0230     .long  0x3FFE0000,0xAA59233C,0xCCA4BD49,0x00000000
0231     .long  0x3FFE0000,0x828CBFBE,0xB9A020A3,0x00000000
0232     .long  0x3FFE0000,0xAC656DAE,0x6BCC4985,0x00000000
0233     .long  0x3FFE0000,0x81848DA8,0xFAF0D277,0x00000000
0234     .long  0x3FFE0000,0xAE6D8EE3,0x60BB2468,0x00000000
0235     .long  0x3FFE0000,0x80808080,0x80808081,0x00000000
0236     .long  0x3FFE0000,0xB07197A2,0x3C46C654,0x00000000
0237 
0238     .set    ADJK,L_SCR1
0239 
0240     .set    X,FP_SCR1
0241     .set    XDCARE,X+2
0242     .set    XFRAC,X+4
0243 
0244     .set    F,FP_SCR2
0245     .set    FFRAC,F+4
0246 
0247     .set    KLOG2,FP_SCR3
0248 
0249     .set    SAVEU,FP_SCR4
0250 
0251     | xref  t_frcinx
0252     |xref   t_extdnrm
0253     |xref   t_operr
0254     |xref   t_dz
0255 
0256     .global slognd
0257 slognd:
0258 //--ENTRY POINT FOR LOG(X) FOR DENORMALIZED INPUT
0259 
0260     movel       #-100,ADJK(%a6) // ...INPUT = 2^(ADJK) * FP0
0261 
0262 //----normalize the input value by left shifting k bits (k to be determined
0263 //----below), adjusting exponent and storing -k to  ADJK
0264 //----the value TWOTO100 is no longer needed.
0265 //----Note that this code assumes the denormalized input is NON-ZERO.
0266 
0267      moveml %d2-%d7,-(%a7)      // ...save some registers
0268      movel  #0x00000000,%d3     // ...D3 is exponent of smallest norm. #
0269      movel  4(%a0),%d4
0270      movel  8(%a0),%d5      // ...(D4,D5) is (Hi_X,Lo_X)
0271      clrl   %d2         // ...D2 used for holding K
0272 
0273      tstl   %d4
0274      bnes   HiX_not0
0275 
0276 HiX_0:
0277      movel  %d5,%d4
0278      clrl   %d5
0279      movel  #32,%d2
0280      clrl   %d6
0281      bfffo      %d4{#0:#32},%d6
0282      lsll      %d6,%d4
0283      addl   %d6,%d2         // ...(D3,D4,D5) is normalized
0284 
0285      movel  %d3,X(%a6)
0286      movel  %d4,XFRAC(%a6)
0287      movel  %d5,XFRAC+4(%a6)
0288      negl   %d2
0289      movel  %d2,ADJK(%a6)
0290      fmovex X(%a6),%fp0
0291      moveml (%a7)+,%d2-%d7      // ...restore registers
0292      lea    X(%a6),%a0
0293      bras   LOGBGN          // ...begin regular log(X)
0294 
0295 
0296 HiX_not0:
0297      clrl   %d6
0298      bfffo  %d4{#0:#32},%d6     // ...find first 1
0299      movel  %d6,%d2         // ...get k
0300      lsll   %d6,%d4
0301      movel  %d5,%d7         // ...a copy of D5
0302      lsll   %d6,%d5
0303      negl   %d6
0304      addil  #32,%d6
0305      lsrl   %d6,%d7
0306      orl    %d7,%d4         // ...(D3,D4,D5) normalized
0307 
0308      movel  %d3,X(%a6)
0309      movel  %d4,XFRAC(%a6)
0310      movel  %d5,XFRAC+4(%a6)
0311      negl   %d2
0312      movel  %d2,ADJK(%a6)
0313      fmovex X(%a6),%fp0
0314      moveml (%a7)+,%d2-%d7      // ...restore registers
0315      lea    X(%a6),%a0
0316      bras   LOGBGN          // ...begin regular log(X)
0317 
0318 
0319     .global slogn
0320 slogn:
0321 //--ENTRY POINT FOR LOG(X) FOR X FINITE, NON-ZERO, NOT NAN'S
0322 
0323     fmovex      (%a0),%fp0  // ...LOAD INPUT
0324     movel       #0x00000000,ADJK(%a6)
0325 
0326 LOGBGN:
0327 //--FPCR SAVED AND CLEARED, INPUT IS 2^(ADJK)*FP0, FP0 CONTAINS
0328 //--A FINITE, NON-ZERO, NORMALIZED NUMBER.
0329 
0330     movel   (%a0),%d0
0331     movew   4(%a0),%d0
0332 
0333     movel   (%a0),X(%a6)
0334     movel   4(%a0),X+4(%a6)
0335     movel   8(%a0),X+8(%a6)
0336 
0337     cmpil   #0,%d0      // ...CHECK IF X IS NEGATIVE
0338     blt LOGNEG      // ...LOG OF NEGATIVE ARGUMENT IS INVALID
0339     cmp2l   BOUNDS1,%d0 // ...X IS POSITIVE, CHECK IF X IS NEAR 1
0340     bcc LOGNEAR1    // ...BOUNDS IS ROUGHLY [15/16, 17/16]
0341 
0342 LOGMAIN:
0343 //--THIS SHOULD BE THE USUAL CASE, X NOT VERY CLOSE TO 1
0344 
0345 //--X = 2^(K) * Y, 1 <= Y < 2. THUS, Y = 1.XXXXXXXX....XX IN BINARY.
0346 //--WE DEFINE F = 1.XXXXXX1, I.E. FIRST 7 BITS OF Y AND ATTACH A 1.
0347 //--THE IDEA IS THAT LOG(X) = K*LOG2 + LOG(Y)
0348 //--             = K*LOG2 + LOG(F) + LOG(1 + (Y-F)/F).
0349 //--NOTE THAT U = (Y-F)/F IS VERY SMALL AND THUS APPROXIMATING
0350 //--LOG(1+U) CAN BE VERY EFFICIENT.
0351 //--ALSO NOTE THAT THE VALUE 1/F IS STORED IN A TABLE SO THAT NO
0352 //--DIVISION IS NEEDED TO CALCULATE (Y-F)/F.
0353 
0354 //--GET K, Y, F, AND ADDRESS OF 1/F.
0355     asrl    #8,%d0
0356     asrl    #8,%d0      // ...SHIFTED 16 BITS, BIASED EXPO. OF X
0357     subil   #0x3FFF,%d0     // ...THIS IS K
0358     addl    ADJK(%a6),%d0   // ...ADJUST K, ORIGINAL INPUT MAY BE  DENORM.
0359     lea LOGTBL,%a0  // ...BASE ADDRESS OF 1/F AND LOG(F)
0360     fmovel  %d0,%fp1        // ...CONVERT K TO FLOATING-POINT FORMAT
0361 
0362 //--WHILE THE CONVERSION IS GOING ON, WE GET F AND ADDRESS OF 1/F
0363     movel   #0x3FFF0000,X(%a6)  // ...X IS NOW Y, I.E. 2^(-K)*X
0364     movel   XFRAC(%a6),FFRAC(%a6)
0365     andil   #0xFE000000,FFRAC(%a6) // ...FIRST 7 BITS OF Y
0366     oril    #0x01000000,FFRAC(%a6) // ...GET F: ATTACH A 1 AT THE EIGHTH BIT
0367     movel   FFRAC(%a6),%d0  // ...READY TO GET ADDRESS OF 1/F
0368     andil   #0x7E000000,%d0
0369     asrl    #8,%d0
0370     asrl    #8,%d0
0371     asrl    #4,%d0      // ...SHIFTED 20, D0 IS THE DISPLACEMENT
0372     addal   %d0,%a0     // ...A0 IS THE ADDRESS FOR 1/F
0373 
0374     fmovex  X(%a6),%fp0
0375     movel   #0x3fff0000,F(%a6)
0376     clrl    F+8(%a6)
0377     fsubx   F(%a6),%fp0     // ...Y-F
0378     fmovemx %fp2-%fp2/%fp3,-(%sp)   // ...SAVE FP2 WHILE FP0 IS NOT READY
0379 //--SUMMARY: FP0 IS Y-F, A0 IS ADDRESS OF 1/F, FP1 IS K
0380 //--REGISTERS SAVED: FPCR, FP1, FP2
0381 
0382 LP1CONT1:
0383 //--AN RE-ENTRY POINT FOR LOGNP1
0384     fmulx   (%a0),%fp0  // ...FP0 IS U = (Y-F)/F
0385     fmulx   LOGOF2,%fp1 // ...GET K*LOG2 WHILE FP0 IS NOT READY
0386     fmovex  %fp0,%fp2
0387     fmulx   %fp2,%fp2       // ...FP2 IS V=U*U
0388     fmovex  %fp1,KLOG2(%a6) // ...PUT K*LOG2 IN MEMORY, FREE FP1
0389 
0390 //--LOG(1+U) IS APPROXIMATED BY
0391 //--U + V*(A1+U*(A2+U*(A3+U*(A4+U*(A5+U*A6))))) WHICH IS
0392 //--[U + V*(A1+V*(A3+V*A5))]  +  [U*V*(A2+V*(A4+V*A6))]
0393 
0394     fmovex  %fp2,%fp3
0395     fmovex  %fp2,%fp1
0396 
0397     fmuld   LOGA6,%fp1  // ...V*A6
0398     fmuld   LOGA5,%fp2  // ...V*A5
0399 
0400     faddd   LOGA4,%fp1  // ...A4+V*A6
0401     faddd   LOGA3,%fp2  // ...A3+V*A5
0402 
0403     fmulx   %fp3,%fp1       // ...V*(A4+V*A6)
0404     fmulx   %fp3,%fp2       // ...V*(A3+V*A5)
0405 
0406     faddd   LOGA2,%fp1  // ...A2+V*(A4+V*A6)
0407     faddd   LOGA1,%fp2  // ...A1+V*(A3+V*A5)
0408 
0409     fmulx   %fp3,%fp1       // ...V*(A2+V*(A4+V*A6))
0410     addal   #16,%a0     // ...ADDRESS OF LOG(F)
0411     fmulx   %fp3,%fp2       // ...V*(A1+V*(A3+V*A5)), FP3 RELEASED
0412 
0413     fmulx   %fp0,%fp1       // ...U*V*(A2+V*(A4+V*A6))
0414     faddx   %fp2,%fp0       // ...U+V*(A1+V*(A3+V*A5)), FP2 RELEASED
0415 
0416     faddx   (%a0),%fp1  // ...LOG(F)+U*V*(A2+V*(A4+V*A6))
0417     fmovemx  (%sp)+,%fp2-%fp2/%fp3  // ...RESTORE FP2
0418     faddx   %fp1,%fp0       // ...FP0 IS LOG(F) + LOG(1+U)
0419 
0420     fmovel  %d1,%fpcr
0421     faddx   KLOG2(%a6),%fp0 // ...FINAL ADD
0422     bra t_frcinx
0423 
0424 
0425 LOGNEAR1:
0426 //--REGISTERS SAVED: FPCR, FP1. FP0 CONTAINS THE INPUT.
0427     fmovex  %fp0,%fp1
0428     fsubs   one,%fp1        // ...FP1 IS X-1
0429     fadds   one,%fp0        // ...FP0 IS X+1
0430     faddx   %fp1,%fp1       // ...FP1 IS 2(X-1)
0431 //--LOG(X) = LOG(1+U/2)-LOG(1-U/2) WHICH IS AN ODD POLYNOMIAL
0432 //--IN U, U = 2(X-1)/(X+1) = FP1/FP0
0433 
0434 LP1CONT2:
0435 //--THIS IS AN RE-ENTRY POINT FOR LOGNP1
0436     fdivx   %fp0,%fp1       // ...FP1 IS U
0437     fmovemx %fp2-%fp2/%fp3,-(%sp)    // ...SAVE FP2
0438 //--REGISTERS SAVED ARE NOW FPCR,FP1,FP2,FP3
0439 //--LET V=U*U, W=V*V, CALCULATE
0440 //--U + U*V*(B1 + V*(B2 + V*(B3 + V*(B4 + V*B5)))) BY
0441 //--U + U*V*(  [B1 + W*(B3 + W*B5)]  +  [V*(B2 + W*B4)]  )
0442     fmovex  %fp1,%fp0
0443     fmulx   %fp0,%fp0   // ...FP0 IS V
0444     fmovex  %fp1,SAVEU(%a6) // ...STORE U IN MEMORY, FREE FP1
0445     fmovex  %fp0,%fp1
0446     fmulx   %fp1,%fp1   // ...FP1 IS W
0447 
0448     fmoved  LOGB5,%fp3
0449     fmoved  LOGB4,%fp2
0450 
0451     fmulx   %fp1,%fp3   // ...W*B5
0452     fmulx   %fp1,%fp2   // ...W*B4
0453 
0454     faddd   LOGB3,%fp3 // ...B3+W*B5
0455     faddd   LOGB2,%fp2 // ...B2+W*B4
0456 
0457     fmulx   %fp3,%fp1   // ...W*(B3+W*B5), FP3 RELEASED
0458 
0459     fmulx   %fp0,%fp2   // ...V*(B2+W*B4)
0460 
0461     faddd   LOGB1,%fp1 // ...B1+W*(B3+W*B5)
0462     fmulx   SAVEU(%a6),%fp0 // ...FP0 IS U*V
0463 
0464     faddx   %fp2,%fp1   // ...B1+W*(B3+W*B5) + V*(B2+W*B4), FP2 RELEASED
0465     fmovemx (%sp)+,%fp2-%fp2/%fp3 // ...FP2 RESTORED
0466 
0467     fmulx   %fp1,%fp0   // ...U*V*( [B1+W*(B3+W*B5)] + [V*(B2+W*B4)] )
0468 
0469     fmovel  %d1,%fpcr
0470     faddx   SAVEU(%a6),%fp0
0471     bra t_frcinx
0472     rts
0473 
0474 LOGNEG:
0475 //--REGISTERS SAVED FPCR. LOG(-VE) IS INVALID
0476     bra t_operr
0477 
0478     .global slognp1d
0479 slognp1d:
0480 //--ENTRY POINT FOR LOG(1+Z) FOR DENORMALIZED INPUT
0481 // Simply return the denorm
0482 
0483     bra t_extdnrm
0484 
0485     .global slognp1
0486 slognp1:
0487 //--ENTRY POINT FOR LOG(1+X) FOR X FINITE, NON-ZERO, NOT NAN'S
0488 
0489     fmovex  (%a0),%fp0  // ...LOAD INPUT
0490     fabsx   %fp0        //test magnitude
0491     fcmpx   LTHOLD,%fp0 //compare with min threshold
0492     fbgt    LP1REAL     //if greater, continue
0493     fmovel  #0,%fpsr        //clr N flag from compare
0494     fmovel  %d1,%fpcr
0495     fmovex  (%a0),%fp0  //return signed argument
0496     bra t_frcinx
0497 
0498 LP1REAL:
0499     fmovex  (%a0),%fp0  // ...LOAD INPUT
0500     movel   #0x00000000,ADJK(%a6)
0501     fmovex  %fp0,%fp1   // ...FP1 IS INPUT Z
0502     fadds   one,%fp0    // ...X := ROUND(1+Z)
0503     fmovex  %fp0,X(%a6)
0504     movew   XFRAC(%a6),XDCARE(%a6)
0505     movel   X(%a6),%d0
0506     cmpil   #0,%d0
0507     ble LP1NEG0 // ...LOG OF ZERO OR -VE
0508     cmp2l   BOUNDS2,%d0
0509     bcs LOGMAIN // ...BOUNDS2 IS [1/2,3/2]
0510 //--IF 1+Z > 3/2 OR 1+Z < 1/2, THEN X, WHICH IS ROUNDING 1+Z,
0511 //--CONTAINS AT LEAST 63 BITS OF INFORMATION OF Z. IN THAT CASE,
0512 //--SIMPLY INVOKE LOG(X) FOR LOG(1+Z).
0513 
0514 LP1NEAR1:
0515 //--NEXT SEE IF EXP(-1/16) < X < EXP(1/16)
0516     cmp2l   BOUNDS1,%d0
0517     bcss    LP1CARE
0518 
0519 LP1ONE16:
0520 //--EXP(-1/16) < X < EXP(1/16). LOG(1+Z) = LOG(1+U/2) - LOG(1-U/2)
0521 //--WHERE U = 2Z/(2+Z) = 2Z/(1+X).
0522     faddx   %fp1,%fp1   // ...FP1 IS 2Z
0523     fadds   one,%fp0    // ...FP0 IS 1+X
0524 //--U = FP1/FP0
0525     bra LP1CONT2
0526 
0527 LP1CARE:
0528 //--HERE WE USE THE USUAL TABLE DRIVEN APPROACH. CARE HAS TO BE
0529 //--TAKEN BECAUSE 1+Z CAN HAVE 67 BITS OF INFORMATION AND WE MUST
0530 //--PRESERVE ALL THE INFORMATION. BECAUSE 1+Z IS IN [1/2,3/2],
0531 //--THERE ARE ONLY TWO CASES.
0532 //--CASE 1: 1+Z < 1, THEN K = -1 AND Y-F = (2-F) + 2Z
0533 //--CASE 2: 1+Z > 1, THEN K = 0  AND Y-F = (1-F) + Z
0534 //--ON RETURNING TO LP1CONT1, WE MUST HAVE K IN FP1, ADDRESS OF
0535 //--(1/F) IN A0, Y-F IN FP0, AND FP2 SAVED.
0536 
0537     movel   XFRAC(%a6),FFRAC(%a6)
0538     andil   #0xFE000000,FFRAC(%a6)
0539     oril    #0x01000000,FFRAC(%a6)  // ...F OBTAINED
0540     cmpil   #0x3FFF8000,%d0 // ...SEE IF 1+Z > 1
0541     bges    KISZERO
0542 
0543 KISNEG1:
0544     fmoves  TWO,%fp0
0545     movel   #0x3fff0000,F(%a6)
0546     clrl    F+8(%a6)
0547     fsubx   F(%a6),%fp0 // ...2-F
0548     movel   FFRAC(%a6),%d0
0549     andil   #0x7E000000,%d0
0550     asrl    #8,%d0
0551     asrl    #8,%d0
0552     asrl    #4,%d0      // ...D0 CONTAINS DISPLACEMENT FOR 1/F
0553     faddx   %fp1,%fp1       // ...GET 2Z
0554     fmovemx %fp2-%fp2/%fp3,-(%sp)   // ...SAVE FP2
0555     faddx   %fp1,%fp0       // ...FP0 IS Y-F = (2-F)+2Z
0556     lea LOGTBL,%a0  // ...A0 IS ADDRESS OF 1/F
0557     addal   %d0,%a0
0558     fmoves  negone,%fp1 // ...FP1 IS K = -1
0559     bra LP1CONT1
0560 
0561 KISZERO:
0562     fmoves  one,%fp0
0563     movel   #0x3fff0000,F(%a6)
0564     clrl    F+8(%a6)
0565     fsubx   F(%a6),%fp0     // ...1-F
0566     movel   FFRAC(%a6),%d0
0567     andil   #0x7E000000,%d0
0568     asrl    #8,%d0
0569     asrl    #8,%d0
0570     asrl    #4,%d0
0571     faddx   %fp1,%fp0       // ...FP0 IS Y-F
0572     fmovemx %fp2-%fp2/%fp3,-(%sp)   // ...FP2 SAVED
0573     lea LOGTBL,%a0
0574     addal   %d0,%a0     // ...A0 IS ADDRESS OF 1/F
0575     fmoves  zero,%fp1   // ...FP1 IS K = 0
0576     bra LP1CONT1
0577 
0578 LP1NEG0:
0579 //--FPCR SAVED. D0 IS X IN COMPACT FORM.
0580     cmpil   #0,%d0
0581     blts    LP1NEG
0582 LP1ZERO:
0583     fmoves  negone,%fp0
0584 
0585     fmovel  %d1,%fpcr
0586     bra t_dz
0587 
0588 LP1NEG:
0589     fmoves  zero,%fp0
0590 
0591     fmovel  %d1,%fpcr
0592     bra t_operr
0593 
0594     |end