File indexing completed on 2025-05-11 08:23:49
0001 #include "fpsp-namespace.h"
0002 //
0003 //
0004 // stwotox.sa 3.1 12/10/90
0005 //
0006 // stwotox --- 2**X
0007 // stwotoxd --- 2**X for denormalized X
0008 // stentox --- 10**X
0009 // stentoxd --- 10**X for denormalized X
0010 //
0011 // Input: Double-extended number X in location pointed to
0012 // by address register a0.
0013 //
0014 // Output: The function values are returned in Fp0.
0015 //
0016 // Accuracy and Monotonicity: The returned result is within 2 ulps in
0017 // 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
0018 // result is subsequently rounded to double precision. The
0019 // result is provably monotonic in double precision.
0020 //
0021 // Speed: The program stwotox takes approximately 190 cycles and the
0022 // program stentox takes approximately 200 cycles.
0023 //
0024 // Algorithm:
0025 //
0026 // twotox
0027 // 1. If |X| > 16480, go to ExpBig.
0028 //
0029 // 2. If |X| < 2**(-70), go to ExpSm.
0030 //
0031 // 3. Decompose X as X = N/64 + r where |r| <= 1/128. Furthermore
0032 // decompose N as
0033 // N = 64(M + M') + j, j = 0,1,2,...,63.
0034 //
0035 // 4. Overwrite r := r * log2. Then
0036 // 2**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r).
0037 // Go to expr to compute that expression.
0038 //
0039 // tentox
0040 // 1. If |X| > 16480*log_10(2) (base 10 log of 2), go to ExpBig.
0041 //
0042 // 2. If |X| < 2**(-70), go to ExpSm.
0043 //
0044 // 3. Set y := X*log_2(10)*64 (base 2 log of 10). Set
0045 // N := round-to-int(y). Decompose N as
0046 // N = 64(M + M') + j, j = 0,1,2,...,63.
0047 //
0048 // 4. Define r as
0049 // r := ((X - N*L1)-N*L2) * L10
0050 // where L1, L2 are the leading and trailing parts of log_10(2)/64
0051 // and L10 is the natural log of 10. Then
0052 // 10**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r).
0053 // Go to expr to compute that expression.
0054 //
0055 // expr
0056 // 1. Fetch 2**(j/64) from table as Fact1 and Fact2.
0057 //
0058 // 2. Overwrite Fact1 and Fact2 by
0059 // Fact1 := 2**(M) * Fact1
0060 // Fact2 := 2**(M) * Fact2
0061 // Thus Fact1 + Fact2 = 2**(M) * 2**(j/64).
0062 //
0063 // 3. Calculate P where 1 + P approximates exp(r):
0064 // P = r + r*r*(A1+r*(A2+...+r*A5)).
0065 //
0066 // 4. Let AdjFact := 2**(M'). Return
0067 // AdjFact * ( Fact1 + ((Fact1*P) + Fact2) ).
0068 // Exit.
0069 //
0070 // ExpBig
0071 // 1. Generate overflow by Huge * Huge if X > 0; otherwise, generate
0072 // underflow by Tiny * Tiny.
0073 //
0074 // ExpSm
0075 // 1. Return 1 + X.
0076 //
0077
0078 // Copyright (C) Motorola, Inc. 1990
0079 // All Rights Reserved
0080 //
0081 // THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
0082 // The copyright notice above does not evidence any
0083 // actual or intended publication of such source code.
0084
0085 //STWOTOX idnt 2,1 | Motorola 040 Floating Point Software Package
0086
0087 |section 8
0088
0089 #include "fpsp.defs"
0090
0091 BOUNDS1: .long 0x3FB98000,0x400D80C0 // ... 2^(-70),16480
0092 BOUNDS2: .long 0x3FB98000,0x400B9B07 // ... 2^(-70),16480 LOG2/LOG10
0093
0094 L2TEN64: .long 0x406A934F,0x0979A371 // ... 64LOG10/LOG2
0095 L10TWO1: .long 0x3F734413,0x509F8000 // ... LOG2/64LOG10
0096
0097 L10TWO2: .long 0xBFCD0000,0xC0219DC1,0xDA994FD2,0x00000000
0098
0099 LOG10: .long 0x40000000,0x935D8DDD,0xAAA8AC17,0x00000000
0100
0101 LOG2: .long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
0102
0103 EXPA5: .long 0x3F56C16D,0x6F7BD0B2
0104 EXPA4: .long 0x3F811112,0x302C712C
0105 EXPA3: .long 0x3FA55555,0x55554CC1
0106 EXPA2: .long 0x3FC55555,0x55554A54
0107 EXPA1: .long 0x3FE00000,0x00000000,0x00000000,0x00000000
0108
0109 HUGE: .long 0x7FFE0000,0xFFFFFFFF,0xFFFFFFFF,0x00000000
0110 TINY: .long 0x00010000,0xFFFFFFFF,0xFFFFFFFF,0x00000000
0111
0112 EXPTBL:
0113 .long 0x3FFF0000,0x80000000,0x00000000,0x3F738000
0114 .long 0x3FFF0000,0x8164D1F3,0xBC030773,0x3FBEF7CA
0115 .long 0x3FFF0000,0x82CD8698,0xAC2BA1D7,0x3FBDF8A9
0116 .long 0x3FFF0000,0x843A28C3,0xACDE4046,0x3FBCD7C9
0117 .long 0x3FFF0000,0x85AAC367,0xCC487B15,0xBFBDE8DA
0118 .long 0x3FFF0000,0x871F6196,0x9E8D1010,0x3FBDE85C
0119 .long 0x3FFF0000,0x88980E80,0x92DA8527,0x3FBEBBF1
0120 .long 0x3FFF0000,0x8A14D575,0x496EFD9A,0x3FBB80CA
0121 .long 0x3FFF0000,0x8B95C1E3,0xEA8BD6E7,0xBFBA8373
0122 .long 0x3FFF0000,0x8D1ADF5B,0x7E5BA9E6,0xBFBE9670
0123 .long 0x3FFF0000,0x8EA4398B,0x45CD53C0,0x3FBDB700
0124 .long 0x3FFF0000,0x9031DC43,0x1466B1DC,0x3FBEEEB0
0125 .long 0x3FFF0000,0x91C3D373,0xAB11C336,0x3FBBFD6D
0126 .long 0x3FFF0000,0x935A2B2F,0x13E6E92C,0xBFBDB319
0127 .long 0x3FFF0000,0x94F4EFA8,0xFEF70961,0x3FBDBA2B
0128 .long 0x3FFF0000,0x96942D37,0x20185A00,0x3FBE91D5
0129 .long 0x3FFF0000,0x9837F051,0x8DB8A96F,0x3FBE8D5A
0130 .long 0x3FFF0000,0x99E04593,0x20B7FA65,0xBFBCDE7B
0131 .long 0x3FFF0000,0x9B8D39B9,0xD54E5539,0xBFBEBAAF
0132 .long 0x3FFF0000,0x9D3ED9A7,0x2CFFB751,0xBFBD86DA
0133 .long 0x3FFF0000,0x9EF53260,0x91A111AE,0xBFBEBEDD
0134 .long 0x3FFF0000,0xA0B0510F,0xB9714FC2,0x3FBCC96E
0135 .long 0x3FFF0000,0xA2704303,0x0C496819,0xBFBEC90B
0136 .long 0x3FFF0000,0xA43515AE,0x09E6809E,0x3FBBD1DB
0137 .long 0x3FFF0000,0xA5FED6A9,0xB15138EA,0x3FBCE5EB
0138 .long 0x3FFF0000,0xA7CD93B4,0xE965356A,0xBFBEC274
0139 .long 0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x3FBEA83C
0140 .long 0x3FFF0000,0xAB7A39B5,0xA93ED337,0x3FBECB00
0141 .long 0x3FFF0000,0xAD583EEA,0x42A14AC6,0x3FBE9301
0142 .long 0x3FFF0000,0xAF3B78AD,0x690A4375,0xBFBD8367
0143 .long 0x3FFF0000,0xB123F581,0xD2AC2590,0xBFBEF05F
0144 .long 0x3FFF0000,0xB311C412,0xA9112489,0x3FBDFB3C
0145 .long 0x3FFF0000,0xB504F333,0xF9DE6484,0x3FBEB2FB
0146 .long 0x3FFF0000,0xB6FD91E3,0x28D17791,0x3FBAE2CB
0147 .long 0x3FFF0000,0xB8FBAF47,0x62FB9EE9,0x3FBCDC3C
0148 .long 0x3FFF0000,0xBAFF5AB2,0x133E45FB,0x3FBEE9AA
0149 .long 0x3FFF0000,0xBD08A39F,0x580C36BF,0xBFBEAEFD
0150 .long 0x3FFF0000,0xBF1799B6,0x7A731083,0xBFBCBF51
0151 .long 0x3FFF0000,0xC12C4CCA,0x66709456,0x3FBEF88A
0152 .long 0x3FFF0000,0xC346CCDA,0x24976407,0x3FBD83B2
0153 .long 0x3FFF0000,0xC5672A11,0x5506DADD,0x3FBDF8AB
0154 .long 0x3FFF0000,0xC78D74C8,0xABB9B15D,0xBFBDFB17
0155 .long 0x3FFF0000,0xC9B9BD86,0x6E2F27A3,0xBFBEFE3C
0156 .long 0x3FFF0000,0xCBEC14FE,0xF2727C5D,0xBFBBB6F8
0157 .long 0x3FFF0000,0xCE248C15,0x1F8480E4,0xBFBCEE53
0158 .long 0x3FFF0000,0xD06333DA,0xEF2B2595,0xBFBDA4AE
0159 .long 0x3FFF0000,0xD2A81D91,0xF12AE45A,0x3FBC9124
0160 .long 0x3FFF0000,0xD4F35AAB,0xCFEDFA1F,0x3FBEB243
0161 .long 0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x3FBDE69A
0162 .long 0x3FFF0000,0xD99D15C2,0x78AFD7B6,0xBFB8BC61
0163 .long 0x3FFF0000,0xDBFBB797,0xDAF23755,0x3FBDF610
0164 .long 0x3FFF0000,0xDE60F482,0x5E0E9124,0xBFBD8BE1
0165 .long 0x3FFF0000,0xE0CCDEEC,0x2A94E111,0x3FBACB12
0166 .long 0x3FFF0000,0xE33F8972,0xBE8A5A51,0x3FBB9BFE
0167 .long 0x3FFF0000,0xE5B906E7,0x7C8348A8,0x3FBCF2F4
0168 .long 0x3FFF0000,0xE8396A50,0x3C4BDC68,0x3FBEF22F
0169 .long 0x3FFF0000,0xEAC0C6E7,0xDD24392F,0xBFBDBF4A
0170 .long 0x3FFF0000,0xED4F301E,0xD9942B84,0x3FBEC01A
0171 .long 0x3FFF0000,0xEFE4B99B,0xDCDAF5CB,0x3FBE8CAC
0172 .long 0x3FFF0000,0xF281773C,0x59FFB13A,0xBFBCBB3F
0173 .long 0x3FFF0000,0xF5257D15,0x2486CC2C,0x3FBEF73A
0174 .long 0x3FFF0000,0xF7D0DF73,0x0AD13BB9,0xBFB8B795
0175 .long 0x3FFF0000,0xFA83B2DB,0x722A033A,0x3FBEF84B
0176 .long 0x3FFF0000,0xFD3E0C0C,0xF486C175,0xBFBEF581
0177
0178 .set N,L_SCR1
0179
0180 .set X,FP_SCR1
0181 .set XDCARE,X+2
0182 .set XFRAC,X+4
0183
0184 .set ADJFACT,FP_SCR2
0185
0186 .set FACT1,FP_SCR3
0187 .set FACT1HI,FACT1+4
0188 .set FACT1LOW,FACT1+8
0189
0190 .set FACT2,FP_SCR4
0191 .set FACT2HI,FACT2+4
0192 .set FACT2LOW,FACT2+8
0193
0194 | xref t_unfl
0195 |xref t_ovfl
0196 |xref t_frcinx
0197
0198 .global stwotoxd
0199 stwotoxd:
0200 //--ENTRY POINT FOR 2**(X) FOR DENORMALIZED ARGUMENT
0201
0202 fmovel %d1,%fpcr // ...set user's rounding mode/precision
0203 fmoves #0x3F800000,%fp0 // ...RETURN 1 + X
0204 movel (%a0),%d0
0205 orl #0x00800001,%d0
0206 fadds %d0,%fp0
0207 bra t_frcinx
0208
0209 .global stwotox
0210 stwotox:
0211 //--ENTRY POINT FOR 2**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
0212 fmovemx (%a0),%fp0-%fp0 // ...LOAD INPUT, do not set cc's
0213
0214 movel (%a0),%d0
0215 movew 4(%a0),%d0
0216 fmovex %fp0,X(%a6)
0217 andil #0x7FFFFFFF,%d0
0218
0219 cmpil #0x3FB98000,%d0 // ...|X| >= 2**(-70)?
0220 bges TWOOK1
0221 bra EXPBORS
0222
0223 TWOOK1:
0224 cmpil #0x400D80C0,%d0 // ...|X| > 16480?
0225 bles TWOMAIN
0226 bra EXPBORS
0227
0228
0229 TWOMAIN:
0230 //--USUAL CASE, 2^(-70) <= |X| <= 16480
0231
0232 fmovex %fp0,%fp1
0233 fmuls #0x42800000,%fp1 // ...64 * X
0234
0235 fmovel %fp1,N(%a6) // ...N = ROUND-TO-INT(64 X)
0236 movel %d2,-(%sp)
0237 lea EXPTBL,%a1 // ...LOAD ADDRESS OF TABLE OF 2^(J/64)
0238 fmovel N(%a6),%fp1 // ...N --> FLOATING FMT
0239 movel N(%a6),%d0
0240 movel %d0,%d2
0241 andil #0x3F,%d0 // ...D0 IS J
0242 asll #4,%d0 // ...DISPLACEMENT FOR 2^(J/64)
0243 addal %d0,%a1 // ...ADDRESS FOR 2^(J/64)
0244 asrl #6,%d2 // ...d2 IS L, N = 64L + J
0245 movel %d2,%d0
0246 asrl #1,%d0 // ...D0 IS M
0247 subl %d0,%d2 // ...d2 IS M', N = 64(M+M') + J
0248 addil #0x3FFF,%d2
0249 movew %d2,ADJFACT(%a6) // ...ADJFACT IS 2^(M')
0250 movel (%sp)+,%d2
0251 //--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
0252 //--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
0253 //--ADJFACT = 2^(M').
0254 //--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
0255
0256 fmuls #0x3C800000,%fp1 // ...(1/64)*N
0257 movel (%a1)+,FACT1(%a6)
0258 movel (%a1)+,FACT1HI(%a6)
0259 movel (%a1)+,FACT1LOW(%a6)
0260 movew (%a1)+,FACT2(%a6)
0261 clrw FACT2+2(%a6)
0262
0263 fsubx %fp1,%fp0 // ...X - (1/64)*INT(64 X)
0264
0265 movew (%a1)+,FACT2HI(%a6)
0266 clrw FACT2HI+2(%a6)
0267 clrl FACT2LOW(%a6)
0268 addw %d0,FACT1(%a6)
0269
0270 fmulx LOG2,%fp0 // ...FP0 IS R
0271 addw %d0,FACT2(%a6)
0272
0273 bra expr
0274
0275 EXPBORS:
0276 //--FPCR, D0 SAVED
0277 cmpil #0x3FFF8000,%d0
0278 bgts EXPBIG
0279
0280 EXPSM:
0281 //--|X| IS SMALL, RETURN 1 + X
0282
0283 fmovel %d1,%FPCR //restore users exceptions
0284 fadds #0x3F800000,%fp0 // ...RETURN 1 + X
0285
0286 bra t_frcinx
0287
0288 EXPBIG:
0289 //--|X| IS LARGE, GENERATE OVERFLOW IF X > 0; ELSE GENERATE UNDERFLOW
0290 //--REGISTERS SAVE SO FAR ARE FPCR AND D0
0291 movel X(%a6),%d0
0292 cmpil #0,%d0
0293 blts EXPNEG
0294
0295 bclrb #7,(%a0) //t_ovfl expects positive value
0296 bra t_ovfl
0297
0298 EXPNEG:
0299 bclrb #7,(%a0) //t_unfl expects positive value
0300 bra t_unfl
0301
0302 .global stentoxd
0303 stentoxd:
0304 //--ENTRY POINT FOR 10**(X) FOR DENORMALIZED ARGUMENT
0305
0306 fmovel %d1,%fpcr // ...set user's rounding mode/precision
0307 fmoves #0x3F800000,%fp0 // ...RETURN 1 + X
0308 movel (%a0),%d0
0309 orl #0x00800001,%d0
0310 fadds %d0,%fp0
0311 bra t_frcinx
0312
0313 .global stentox
0314 stentox:
0315 //--ENTRY POINT FOR 10**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
0316 fmovemx (%a0),%fp0-%fp0 // ...LOAD INPUT, do not set cc's
0317
0318 movel (%a0),%d0
0319 movew 4(%a0),%d0
0320 fmovex %fp0,X(%a6)
0321 andil #0x7FFFFFFF,%d0
0322
0323 cmpil #0x3FB98000,%d0 // ...|X| >= 2**(-70)?
0324 bges TENOK1
0325 bra EXPBORS
0326
0327 TENOK1:
0328 cmpil #0x400B9B07,%d0 // ...|X| <= 16480*log2/log10 ?
0329 bles TENMAIN
0330 bra EXPBORS
0331
0332 TENMAIN:
0333 //--USUAL CASE, 2^(-70) <= |X| <= 16480 LOG 2 / LOG 10
0334
0335 fmovex %fp0,%fp1
0336 fmuld L2TEN64,%fp1 // ...X*64*LOG10/LOG2
0337
0338 fmovel %fp1,N(%a6) // ...N=INT(X*64*LOG10/LOG2)
0339 movel %d2,-(%sp)
0340 lea EXPTBL,%a1 // ...LOAD ADDRESS OF TABLE OF 2^(J/64)
0341 fmovel N(%a6),%fp1 // ...N --> FLOATING FMT
0342 movel N(%a6),%d0
0343 movel %d0,%d2
0344 andil #0x3F,%d0 // ...D0 IS J
0345 asll #4,%d0 // ...DISPLACEMENT FOR 2^(J/64)
0346 addal %d0,%a1 // ...ADDRESS FOR 2^(J/64)
0347 asrl #6,%d2 // ...d2 IS L, N = 64L + J
0348 movel %d2,%d0
0349 asrl #1,%d0 // ...D0 IS M
0350 subl %d0,%d2 // ...d2 IS M', N = 64(M+M') + J
0351 addil #0x3FFF,%d2
0352 movew %d2,ADJFACT(%a6) // ...ADJFACT IS 2^(M')
0353 movel (%sp)+,%d2
0354
0355 //--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
0356 //--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
0357 //--ADJFACT = 2^(M').
0358 //--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
0359
0360 fmovex %fp1,%fp2
0361
0362 fmuld L10TWO1,%fp1 // ...N*(LOG2/64LOG10)_LEAD
0363 movel (%a1)+,FACT1(%a6)
0364
0365 fmulx L10TWO2,%fp2 // ...N*(LOG2/64LOG10)_TRAIL
0366
0367 movel (%a1)+,FACT1HI(%a6)
0368 movel (%a1)+,FACT1LOW(%a6)
0369 fsubx %fp1,%fp0 // ...X - N L_LEAD
0370 movew (%a1)+,FACT2(%a6)
0371
0372 fsubx %fp2,%fp0 // ...X - N L_TRAIL
0373
0374 clrw FACT2+2(%a6)
0375 movew (%a1)+,FACT2HI(%a6)
0376 clrw FACT2HI+2(%a6)
0377 clrl FACT2LOW(%a6)
0378
0379 fmulx LOG10,%fp0 // ...FP0 IS R
0380
0381 addw %d0,FACT1(%a6)
0382 addw %d0,FACT2(%a6)
0383
0384 expr:
0385 //--FPCR, FP2, FP3 ARE SAVED IN ORDER AS SHOWN.
0386 //--ADJFACT CONTAINS 2**(M'), FACT1 + FACT2 = 2**(M) * 2**(J/64).
0387 //--FP0 IS R. THE FOLLOWING CODE COMPUTES
0388 //-- 2**(M'+M) * 2**(J/64) * EXP(R)
0389
0390 fmovex %fp0,%fp1
0391 fmulx %fp1,%fp1 // ...FP1 IS S = R*R
0392
0393 fmoved EXPA5,%fp2 // ...FP2 IS A5
0394 fmoved EXPA4,%fp3 // ...FP3 IS A4
0395
0396 fmulx %fp1,%fp2 // ...FP2 IS S*A5
0397 fmulx %fp1,%fp3 // ...FP3 IS S*A4
0398
0399 faddd EXPA3,%fp2 // ...FP2 IS A3+S*A5
0400 faddd EXPA2,%fp3 // ...FP3 IS A2+S*A4
0401
0402 fmulx %fp1,%fp2 // ...FP2 IS S*(A3+S*A5)
0403 fmulx %fp1,%fp3 // ...FP3 IS S*(A2+S*A4)
0404
0405 faddd EXPA1,%fp2 // ...FP2 IS A1+S*(A3+S*A5)
0406 fmulx %fp0,%fp3 // ...FP3 IS R*S*(A2+S*A4)
0407
0408 fmulx %fp1,%fp2 // ...FP2 IS S*(A1+S*(A3+S*A5))
0409 faddx %fp3,%fp0 // ...FP0 IS R+R*S*(A2+S*A4)
0410
0411 faddx %fp2,%fp0 // ...FP0 IS EXP(R) - 1
0412
0413
0414 //--FINAL RECONSTRUCTION PROCESS
0415 //--EXP(X) = 2^M*2^(J/64) + 2^M*2^(J/64)*(EXP(R)-1) - (1 OR 0)
0416
0417 fmulx FACT1(%a6),%fp0
0418 faddx FACT2(%a6),%fp0
0419 faddx FACT1(%a6),%fp0
0420
0421 fmovel %d1,%FPCR //restore users exceptions
0422 clrw ADJFACT+2(%a6)
0423 movel #0x80000000,ADJFACT+4(%a6)
0424 clrl ADJFACT+8(%a6)
0425 fmulx ADJFACT(%a6),%fp0 // ...FINAL ADJUSTMENT
0426
0427 bra t_frcinx
0428
0429 |end