shared/fpsp/bindec.S

0001 #include "fpsp-namespace.h"
0002 //
0003 //
0004 //  bindec.sa 3.4 1/3/91
0005 //
0006 //  bindec
0007 //
0008 //  Description:
0009 //      Converts an input in extended precision format
0010 //      to bcd format.
0011 //
0012 //  Input:
0013 //      a0 points to the input extended precision value
0014 //      value in memory; d0 contains the k-factor sign-extended
0015 //      to 32-bits.  The input may be either normalized,
0016 //      unnormalized, or denormalized.
0017 //
0018 //  Output: result in the FP_SCR1 space on the stack.
0019 //
0020 //  Saves and Modifies: D2-D7,A2,FP2
0021 //
0022 //  Algorithm:
0023 //
0024 //  A1. Set RM and size ext;  Set SIGMA = sign of input.
0025 //      The k-factor is saved for use in d7. Clear the
0026 //      BINDEC_FLG for separating normalized/denormalized
0027 //      input.  If input is unnormalized or denormalized,
0028 //      normalize it.
0029 //
0030 //  A2. Set X = abs(input).
0031 //
0032 //  A3. Compute ILOG.
0033 //      ILOG is the log base 10 of the input value.  It is
0034 //      approximated by adding e + 0.f when the original
0035 //      value is viewed as 2^^e * 1.f in extended precision.
0036 //      This value is stored in d6.
0037 //
0038 //  A4. Clr INEX bit.
0039 //      The operation in A3 above may have set INEX2.
0040 //
0041 //  A5. Set ICTR = 0;
0042 //      ICTR is a flag used in A13.  It must be set before the
0043 //      loop entry A6.
0044 //
0045 //  A6. Calculate LEN.
0046 //      LEN is the number of digits to be displayed.  The
0047 //      k-factor can dictate either the total number of digits,
0048 //      if it is a positive number, or the number of digits
0049 //      after the decimal point which are to be included as
0050 //      significant.  See the 68882 manual for examples.
0051 //      If LEN is computed to be greater than 17, set OPERR in
0052 //      USER_FPSR.  LEN is stored in d4.
0053 //
0054 //  A7. Calculate SCALE.
0055 //      SCALE is equal to 10^ISCALE, where ISCALE is the number
0056 //      of decimal places needed to insure LEN integer digits
0057 //      in the output before conversion to bcd. LAMBDA is the
0058 //      sign of ISCALE, used in A9. Fp1 contains
0059 //      10^^(abs(ISCALE)) using a rounding mode which is a
0060 //      function of the original rounding mode and the signs
0061 //      of ISCALE and X.  A table is given in the code.
0062 //
0063 //  A8. Clr INEX; Force RZ.
0064 //      The operation in A3 above may have set INEX2.
0065 //      RZ mode is forced for the scaling operation to insure
0066 //      only one rounding error.  The grs bits are collected in
0067 //      the INEX flag for use in A10.
0068 //
0069 //  A9. Scale X -> Y.
0070 //      The mantissa is scaled to the desired number of
0071 //      significant digits.  The excess digits are collected
0072 //      in INEX2.
0073 //
0074 //  A10.    Or in INEX.
0075 //      If INEX is set, round error occurred.  This is
0076 //      compensated for by 'or-ing' in the INEX2 flag to
0077 //      the lsb of Y.
0078 //
0079 //  A11.    Restore original FPCR; set size ext.
0080 //      Perform FINT operation in the user's rounding mode.
0081 //      Keep the size to extended.
0082 //
0083 //  A12.    Calculate YINT = FINT(Y) according to user's rounding
0084 //      mode.  The FPSP routine sintd0 is used.  The output
0085 //      is in fp0.
0086 //
0087 //  A13.    Check for LEN digits.
0088 //      If the int operation results in more than LEN digits,
0089 //      or less than LEN -1 digits, adjust ILOG and repeat from
0090 //      A6.  This test occurs only on the first pass.  If the
0091 //      result is exactly 10^LEN, decrement ILOG and divide
0092 //      the mantissa by 10.
0093 //
0094 //  A14.    Convert the mantissa to bcd.
0095 //      The binstr routine is used to convert the LEN digit
0096 //      mantissa to bcd in memory.  The input to binstr is
0097 //      to be a fraction; i.e. (mantissa)/10^LEN and adjusted
0098 //      such that the decimal point is to the left of bit 63.
0099 //      The bcd digits are stored in the correct position in
0100 //      the final string area in memory.
0101 //
0102 //  A15.    Convert the exponent to bcd.
0103 //      As in A14 above, the exp is converted to bcd and the
0104 //      digits are stored in the final string.
0105 //      Test the length of the final exponent string.  If the
0106 //      length is 4, set operr.
0107 //
0108 //  A16.    Write sign bits to final string.
0109 //
0110 //  Implementation Notes:
0111 //
0112 //  The registers are used as follows:
0113 //
0114 //      d0: scratch; LEN input to binstr
0115 //      d1: scratch
0116 //      d2: upper 32-bits of mantissa for binstr
0117 //      d3: scratch;lower 32-bits of mantissa for binstr
0118 //      d4: LEN
0119 //              d5: LAMBDA/ICTR
0120 //      d6: ILOG
0121 //      d7: k-factor
0122 //      a0: ptr for original operand/final result
0123 //      a1: scratch pointer
0124 //      a2: pointer to FP_X; abs(original value) in ext
0125 //      fp0: scratch
0126 //      fp1: scratch
0127 //      fp2: scratch
0128 //      F_SCR1:
0129 //      F_SCR2:
0130 //      L_SCR1:
0131 //      L_SCR2:
0132
0133 //      Copyright (C) Motorola, Inc. 1990
0134 //          All Rights Reserved
0135 //
0136 //  THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
0137 //  The copyright notice above does not evidence any
0138 //  actual or intended publication of such source code.
0139
0140 //BINDEC    idnt    2,1 | Motorola 040 Floating Point Software Package
0141
0142 #include "fpsp.defs"
0143
0144     |section    8
0145
0146 // Constants in extended precision
0147 LOG2:   .long   0x3FFD0000,0x9A209A84,0xFBCFF798,0x00000000
0148 LOG2UP1:    .long   0x3FFD0000,0x9A209A84,0xFBCFF799,0x00000000
0149
0150 // Constants in single precision
0151 FONE:   .long   0x3F800000,0x00000000,0x00000000,0x00000000
0152 FTWO:   .long   0x40000000,0x00000000,0x00000000,0x00000000
0153 FTEN:   .long   0x41200000,0x00000000,0x00000000,0x00000000
0154 F4933:  .long   0x459A2800,0x00000000,0x00000000,0x00000000
0155
0156 RBDTBL:     .byte   0,0,0,0
0157     .byte   3,3,2,2
0158     .byte   3,2,2,3
0159     .byte   2,3,3,2
0160
0161     |xref   binstr
0162     |xref   sintdo
0163     |xref   ptenrn,ptenrm,ptenrp
0164
0165     .global bindec
0166     .global sc_mul
0167 bindec:
0168     moveml  %d2-%d7/%a2,-(%a7)
0169     fmovemx %fp0-%fp2,-(%a7)
0170
0171 // A1. Set RM and size ext. Set SIGMA = sign input;
0172 //     The k-factor is saved for use in d7.  Clear BINDEC_FLG for
0173 //     separating  normalized/denormalized input.  If the input
0174 //     is a denormalized number, set the BINDEC_FLG memory word
0175 //     to signal denorm.  If the input is unnormalized, normalize
0176 //     the input and test for denormalized result.
0177 //
0178     fmovel  #rm_mode,%FPCR  //set RM and ext
0179     movel   (%a0),L_SCR2(%a6)   //save exponent for sign check
0180     movel   %d0,%d7     //move k-factor to d7
0181     clrb    BINDEC_FLG(%a6) //clr norm/denorm flag
0182     movew   STAG(%a6),%d0   //get stag
0183     andiw   #0xe000,%d0 //isolate stag bits
0184     beq A2_str      //if zero, input is norm
0185 //
0186 // Normalize the denorm
0187 //
0188 un_de_norm:
0189     movew   (%a0),%d0
0190     andiw   #0x7fff,%d0 //strip sign of normalized exp
0191     movel   4(%a0),%d1
0192     movel   8(%a0),%d2
0193 norm_loop:
0194     subw    #1,%d0
0195     lsll    #1,%d2
0196     roxll   #1,%d1
0197     tstl    %d1
0198     bges    norm_loop
0199 //
0200 // Test if the normalized input is denormalized
0201 //
0202     tstw    %d0
0203     bgts    pos_exp     //if greater than zero, it is a norm
0204     st  BINDEC_FLG(%a6) //set flag for denorm
0205 pos_exp:
0206     andiw   #0x7fff,%d0 //strip sign of normalized exp
0207     movew   %d0,(%a0)
0208     movel   %d1,4(%a0)
0209     movel   %d2,8(%a0)
0210
0211 // A2. Set X = abs(input).
0212 //
0213 A2_str:
0214     movel   (%a0),FP_SCR2(%a6) // move input to work space
0215     movel   4(%a0),FP_SCR2+4(%a6) // move input to work space
0216     movel   8(%a0),FP_SCR2+8(%a6) // move input to work space
0217     andil   #0x7fffffff,FP_SCR2(%a6) //create abs(X)
0218
0219 // A3. Compute ILOG.
0220 //     ILOG is the log base 10 of the input value.  It is approx-
0221 //     imated by adding e + 0.f when the original value is viewed
0222 //     as 2^^e * 1.f in extended precision.  This value is stored
0223 //     in d6.
0224 //
0225 // Register usage:
0226 //  Input/Output
0227 //  d0: k-factor/exponent
0228 //  d2: x/x
0229 //  d3: x/x
0230 //  d4: x/x
0231 //  d5: x/x
0232 //  d6: x/ILOG
0233 //  d7: k-factor/Unchanged
0234 //  a0: ptr for original operand/final result
0235 //  a1: x/x
0236 //  a2: x/x
0237 //  fp0: x/float(ILOG)
0238 //  fp1: x/x
0239 //  fp2: x/x
0240 //  F_SCR1:x/x
0241 //  F_SCR2:Abs(X)/Abs(X) with $3fff exponent
0242 //  L_SCR1:x/x
0243 //  L_SCR2:first word of X packed/Unchanged
0244
0245     tstb    BINDEC_FLG(%a6) //check for denorm
0246     beqs    A3_cont     //if clr, continue with norm
0247     movel   #-4933,%d6  //force ILOG = -4933
0248     bras    A4_str
0249 A3_cont:
0250     movew   FP_SCR2(%a6),%d0    //move exp to d0
0251     movew   #0x3fff,FP_SCR2(%a6) //replace exponent with 0x3fff
0252     fmovex  FP_SCR2(%a6),%fp0   //now fp0 has 1.f
0253     subw    #0x3fff,%d0 //strip off bias
0254     faddw   %d0,%fp0        //add in exp
0255     fsubs   FONE,%fp0   //subtract off 1.0
0256     fbge    pos_res     //if pos, branch
0257     fmulx   LOG2UP1,%fp0    //if neg, mul by LOG2UP1
0258     fmovel  %fp0,%d6        //put ILOG in d6 as a lword
0259     bras    A4_str      //go move out ILOG
0260 pos_res:
0261     fmulx   LOG2,%fp0   //if pos, mul by LOG2
0262     fmovel  %fp0,%d6        //put ILOG in d6 as a lword
0263
0264
0265 // A4. Clr INEX bit.
0266 //     The operation in A3 above may have set INEX2.
0267
0268 A4_str:
0269     fmovel  #0,%FPSR        //zero all of fpsr - nothing needed
0270
0271
0272 // A5. Set ICTR = 0;
0273 //     ICTR is a flag used in A13.  It must be set before the
0274 //     loop entry A6. The lower word of d5 is used for ICTR.
0275
0276     clrw    %d5     //clear ICTR
0277
0278
0279 // A6. Calculate LEN.
0280 //     LEN is the number of digits to be displayed.  The k-factor
0281 //     can dictate either the total number of digits, if it is
0282 //     a positive number, or the number of digits after the
0283 //     original decimal point which are to be included as
0284 //     significant.  See the 68882 manual for examples.
0285 //     If LEN is computed to be greater than 17, set OPERR in
0286 //     USER_FPSR.  LEN is stored in d4.
0287 //
0288 // Register usage:
0289 //  Input/Output
0290 //  d0: exponent/Unchanged
0291 //  d2: x/x/scratch
0292 //  d3: x/x
0293 //  d4: exc picture/LEN
0294 //  d5: ICTR/Unchanged
0295 //  d6: ILOG/Unchanged
0296 //  d7: k-factor/Unchanged
0297 //  a0: ptr for original operand/final result
0298 //  a1: x/x
0299 //  a2: x/x
0300 //  fp0: float(ILOG)/Unchanged
0301 //  fp1: x/x
0302 //  fp2: x/x
0303 //  F_SCR1:x/x
0304 //  F_SCR2:Abs(X) with $3fff exponent/Unchanged
0305 //  L_SCR1:x/x
0306 //  L_SCR2:first word of X packed/Unchanged
0307
0308 A6_str:
0309     tstl    %d7     //branch on sign of k
0310     bles    k_neg       //if k <= 0, LEN = ILOG + 1 - k
0311     movel   %d7,%d4     //if k > 0, LEN = k
0312     bras    len_ck      //skip to LEN check
0313 k_neg:
0314     movel   %d6,%d4     //first load ILOG to d4
0315     subl    %d7,%d4     //subtract off k
0316     addql   #1,%d4      //add in the 1
0317 len_ck:
0318     tstl    %d4     //LEN check: branch on sign of LEN
0319     bles    LEN_ng      //if neg, set LEN = 1
0320     cmpl    #17,%d4     //test if LEN > 17
0321     bles    A7_str      //if not, forget it
0322     movel   #17,%d4     //set max LEN = 17
0323     tstl    %d7     //if negative, never set OPERR
0324     bles    A7_str      //if positive, continue
0325     orl #opaop_mask,USER_FPSR(%a6) //set OPERR & AIOP in USER_FPSR
0326     bras    A7_str      //finished here
0327 LEN_ng:
0328     moveql  #1,%d4      //min LEN is 1
0329
0330
0331 // A7. Calculate SCALE.
0332 //     SCALE is equal to 10^ISCALE, where ISCALE is the number
0333 //     of decimal places needed to insure LEN integer digits
0334 //     in the output before conversion to bcd. LAMBDA is the sign
0335 //     of ISCALE, used in A9.  Fp1 contains 10^^(abs(ISCALE)) using
0336 //     the rounding mode as given in the following table (see
0337 //     Coonen, p. 7.23 as ref.; however, the SCALE variable is
0338 //     of opposite sign in bindec.sa from Coonen).
0339 //
0340 //  Initial                 USE
0341 //  FPCR[6:5]   LAMBDA  SIGN(X)     FPCR[6:5]
0342 //  ----------------------------------------------
0343 //   RN 00     0       0        00/0    RN
0344 //   RN 00     0       1        00/0    RN
0345 //   RN 00     1       0        00/0    RN
0346 //   RN 00     1       1        00/0    RN
0347 //   RZ 01     0       0        11/3    RP
0348 //   RZ 01     0       1        11/3    RP
0349 //   RZ 01     1       0        10/2    RM
0350 //   RZ 01     1       1        10/2    RM
0351 //   RM 10     0       0        11/3    RP
0352 //   RM 10     0       1        10/2    RM
0353 //   RM 10     1       0        10/2    RM
0354 //   RM 10     1       1        11/3    RP
0355 //   RP 11     0       0        10/2    RM
0356 //   RP 11     0       1        11/3    RP
0357 //   RP 11     1       0        11/3    RP
0358 //   RP 11     1       1        10/2    RM
0359 //
0360 // Register usage:
0361 //  Input/Output
0362 //  d0: exponent/scratch - final is 0
0363 //  d2: x/0 or 24 for A9
0364 //  d3: x/scratch - offset ptr into PTENRM array
0365 //  d4: LEN/Unchanged
0366 //  d5: 0/ICTR:LAMBDA
0367 //  d6: ILOG/ILOG or k if ((k<=0)&(ILOG<k))
0368 //  d7: k-factor/Unchanged
0369 //  a0: ptr for original operand/final result
0370 //  a1: x/ptr to PTENRM array
0371 //  a2: x/x
0372 //  fp0: float(ILOG)/Unchanged
0373 //  fp1: x/10^ISCALE
0374 //  fp2: x/x
0375 //  F_SCR1:x/x
0376 //  F_SCR2:Abs(X) with $3fff exponent/Unchanged
0377 //  L_SCR1:x/x
0378 //  L_SCR2:first word of X packed/Unchanged
0379
0380 A7_str:
0381     tstl    %d7     //test sign of k
0382     bgts    k_pos       //if pos and > 0, skip this
0383     cmpl    %d6,%d7     //test k - ILOG
0384     blts    k_pos       //if ILOG >= k, skip this
0385     movel   %d7,%d6     //if ((k<0) & (ILOG < k)) ILOG = k
0386 k_pos:
0387     movel   %d6,%d0     //calc ILOG + 1 - LEN in d0
0388     addql   #1,%d0      //add the 1
0389     subl    %d4,%d0     //sub off LEN
0390     swap    %d5     //use upper word of d5 for LAMBDA
0391     clrw    %d5     //set it zero initially
0392     clrw    %d2     //set up d2 for very small case
0393     tstl    %d0     //test sign of ISCALE
0394     bges    iscale      //if pos, skip next inst
0395     addqw   #1,%d5      //if neg, set LAMBDA true
0396     cmpl    #0xffffecd4,%d0 //test iscale <= -4908
0397     bgts    no_inf      //if false, skip rest
0398     addil   #24,%d0     //add in 24 to iscale
0399     movel   #24,%d2     //put 24 in d2 for A9
0400 no_inf:
0401     negl    %d0     //and take abs of ISCALE
0402 iscale:
0403     fmoves  FONE,%fp1   //init fp1 to 1
0404     bfextu  USER_FPCR(%a6){#26:#2},%d1 //get initial rmode bits
0405     lslw    #1,%d1      //put them in bits 2:1
0406     addw    %d5,%d1     //add in LAMBDA
0407     lslw    #1,%d1      //put them in bits 3:1
0408     tstl    L_SCR2(%a6) //test sign of original x
0409     bges    x_pos       //if pos, don't set bit 0
0410     addql   #1,%d1      //if neg, set bit 0
0411 x_pos:
0412     leal    RBDTBL,%a2  //load rbdtbl base
0413     moveb   (%a2,%d1),%d3   //load d3 with new rmode
0414     lsll    #4,%d3      //put bits in proper position
0415     fmovel  %d3,%fpcr       //load bits into fpu
0416     lsrl    #4,%d3      //put bits in proper position
0417     tstb    %d3     //decode new rmode for pten table
0418     bnes    not_rn      //if zero, it is RN
0419     leal    PTENRN,%a1  //load a1 with RN table base
0420     bras    rmode       //exit decode
0421 not_rn:
0422     lsrb    #1,%d3      //get lsb in carry
0423     bccs    not_rp      //if carry clear, it is RM
0424     leal    PTENRP,%a1  //load a1 with RP table base
0425     bras    rmode       //exit decode
0426 not_rp:
0427     leal    PTENRM,%a1  //load a1 with RM table base
0428 rmode:
0429     clrl    %d3     //clr table index
0430 e_loop:
0431     lsrl    #1,%d0      //shift next bit into carry
0432     bccs    e_next      //if zero, skip the mul
0433     fmulx   (%a1,%d3),%fp1  //mul by 10**(d3_bit_no)
0434 e_next:
0435     addl    #12,%d3     //inc d3 to next pwrten table entry
0436     tstl    %d0     //test if ISCALE is zero
0437     bnes    e_loop      //if not, loop
0438
0439
0440 // A8. Clr INEX; Force RZ.
0441 //     The operation in A3 above may have set INEX2.
0442 //     RZ mode is forced for the scaling operation to insure
0443 //     only one rounding error.  The grs bits are collected in
0444 //     the INEX flag for use in A10.
0445 //
0446 // Register usage:
0447 //  Input/Output
0448
0449     fmovel  #0,%FPSR        //clr INEX
0450     fmovel  #rz_mode,%FPCR  //set RZ rounding mode
0451
0452
0453 // A9. Scale X -> Y.
0454 //     The mantissa is scaled to the desired number of significant
0455 //     digits.  The excess digits are collected in INEX2. If mul,
0456 //     Check d2 for excess 10 exponential value.  If not zero,
0457 //     the iscale value would have caused the pwrten calculation
0458 //     to overflow.  Only a negative iscale can cause this, so
0459 //     multiply by 10^(d2), which is now only allowed to be 24,
0460 //     with a multiply by 10^8 and 10^16, which is exact since
0461 //     10^24 is exact.  If the input was denormalized, we must
0462 //     create a busy stack frame with the mul command and the
0463 //     two operands, and allow the fpu to complete the multiply.
0464 //
0465 // Register usage:
0466 //  Input/Output
0467 //  d0: FPCR with RZ mode/Unchanged
0468 //  d2: 0 or 24/unchanged
0469 //  d3: x/x
0470 //  d4: LEN/Unchanged
0471 //  d5: ICTR:LAMBDA
0472 //  d6: ILOG/Unchanged
0473 //  d7: k-factor/Unchanged
0474 //  a0: ptr for original operand/final result
0475 //  a1: ptr to PTENRM array/Unchanged
0476 //  a2: x/x
0477 //  fp0: float(ILOG)/X adjusted for SCALE (Y)
0478 //  fp1: 10^ISCALE/Unchanged
0479 //  fp2: x/x
0480 //  F_SCR1:x/x
0481 //  F_SCR2:Abs(X) with $3fff exponent/Unchanged
0482 //  L_SCR1:x/x
0483 //  L_SCR2:first word of X packed/Unchanged
0484
0485 A9_str:
0486     fmovex  (%a0),%fp0  //load X from memory
0487     fabsx   %fp0        //use abs(X)
0488     tstw    %d5     //LAMBDA is in lower word of d5
0489     bne     sc_mul      //if neg (LAMBDA = 1), scale by mul
0490     fdivx   %fp1,%fp0       //calculate X / SCALE -> Y to fp0
0491     bras    A10_st      //branch to A10
0492
0493 sc_mul:
0494     tstb    BINDEC_FLG(%a6) //check for denorm
0495     beqs    A9_norm     //if norm, continue with mul
0496     fmovemx %fp1-%fp1,-(%a7)    //load ETEMP with 10^ISCALE
0497     movel   8(%a0),-(%a7)   //load FPTEMP with input arg
0498     movel   4(%a0),-(%a7)
0499     movel   (%a0),-(%a7)
0500     movel   #18,%d3     //load count for busy stack
0501 A9_loop:
0502     clrl    -(%a7)      //clear lword on stack
0503     dbf %d3,A9_loop
0504     moveb   VER_TMP(%a6),(%a7) //write current version number
0505     moveb   #BUSY_SIZE-4,1(%a7) //write current busy size
0506     moveb   #0x10,0x44(%a7) //set fcefpte[15] bit
0507     movew   #0x0023,0x40(%a7)   //load cmdreg1b with mul command
0508     moveb   #0xfe,0x8(%a7)  //load all 1s to cu savepc
0509     frestore (%a7)+     //restore frame to fpu for completion
0510     fmulx   36(%a1),%fp0    //multiply fp0 by 10^8
0511     fmulx   48(%a1),%fp0    //multiply fp0 by 10^16
0512     bras    A10_st
0513 A9_norm:
0514     tstw    %d2     //test for small exp case
0515     beqs    A9_con      //if zero, continue as normal
0516     fmulx   36(%a1),%fp0    //multiply fp0 by 10^8
0517     fmulx   48(%a1),%fp0    //multiply fp0 by 10^16
0518 A9_con:
0519     fmulx   %fp1,%fp0       //calculate X * SCALE -> Y to fp0
0520
0521
0522 // A10. Or in INEX.
0523 //      If INEX is set, round error occurred.  This is compensated
0524 //      for by 'or-ing' in the INEX2 flag to the lsb of Y.
0525 //
0526 // Register usage:
0527 //  Input/Output
0528 //  d0: FPCR with RZ mode/FPSR with INEX2 isolated
0529 //  d2: x/x
0530 //  d3: x/x
0531 //  d4: LEN/Unchanged
0532 //  d5: ICTR:LAMBDA
0533 //  d6: ILOG/Unchanged
0534 //  d7: k-factor/Unchanged
0535 //  a0: ptr for original operand/final result
0536 //  a1: ptr to PTENxx array/Unchanged
0537 //  a2: x/ptr to FP_SCR2(a6)
0538 //  fp0: Y/Y with lsb adjusted
0539 //  fp1: 10^ISCALE/Unchanged
0540 //  fp2: x/x
0541
0542 A10_st:
0543     fmovel  %FPSR,%d0       //get FPSR
0544     fmovex  %fp0,FP_SCR2(%a6)   //move Y to memory
0545     leal    FP_SCR2(%a6),%a2    //load a2 with ptr to FP_SCR2
0546     btstl   #9,%d0      //check if INEX2 set
0547     beqs    A11_st      //if clear, skip rest
0548     oril    #1,8(%a2)   //or in 1 to lsb of mantissa
0549     fmovex  FP_SCR2(%a6),%fp0   //write adjusted Y back to fpu
0550
0551
0552 // A11. Restore original FPCR; set size ext.
0553 //      Perform FINT operation in the user's rounding mode.  Keep
0554 //      the size to extended.  The sintdo entry point in the sint
0555 //      routine expects the FPCR value to be in USER_FPCR for
0556 //      mode and precision.  The original FPCR is saved in L_SCR1.
0557
0558 A11_st:
0559     movel   USER_FPCR(%a6),L_SCR1(%a6) //save it for later
0560     andil   #0x00000030,USER_FPCR(%a6) //set size to ext,
0561 //                  ;block exceptions
0562
0563
0564 // A12. Calculate YINT = FINT(Y) according to user's rounding mode.
0565 //      The FPSP routine sintd0 is used.  The output is in fp0.
0566 //
0567 // Register usage:
0568 //  Input/Output
0569 //  d0: FPSR with AINEX cleared/FPCR with size set to ext
0570 //  d2: x/x/scratch
0571 //  d3: x/x
0572 //  d4: LEN/Unchanged
0573 //  d5: ICTR:LAMBDA/Unchanged
0574 //  d6: ILOG/Unchanged
0575 //  d7: k-factor/Unchanged
0576 //  a0: ptr for original operand/src ptr for sintdo
0577 //  a1: ptr to PTENxx array/Unchanged
0578 //  a2: ptr to FP_SCR2(a6)/Unchanged
0579 //  a6: temp pointer to FP_SCR2(a6) - orig value saved and restored
0580 //  fp0: Y/YINT
0581 //  fp1: 10^ISCALE/Unchanged
0582 //  fp2: x/x
0583 //  F_SCR1:x/x
0584 //  F_SCR2:Y adjusted for inex/Y with original exponent
0585 //  L_SCR1:x/original USER_FPCR
0586 //  L_SCR2:first word of X packed/Unchanged
0587
0588 A12_st:
0589     moveml  %d0-%d1/%a0-%a1,-(%a7)  //save regs used by sintd0
0590     movel   L_SCR1(%a6),-(%a7)
0591     movel   L_SCR2(%a6),-(%a7)
0592     leal    FP_SCR2(%a6),%a0        //a0 is ptr to F_SCR2(a6)
0593     fmovex  %fp0,(%a0)      //move Y to memory at FP_SCR2(a6)
0594     tstl    L_SCR2(%a6)     //test sign of original operand
0595     bges    do_fint         //if pos, use Y
0596     orl #0x80000000,(%a0)       //if neg, use -Y
0597 do_fint:
0598     movel   USER_FPSR(%a6),-(%a7)
0599     bsr sintdo          //sint routine returns int in fp0
0600     moveb   (%a7),USER_FPSR(%a6)
0601     addl    #4,%a7
0602     movel   (%a7)+,L_SCR2(%a6)
0603     movel   (%a7)+,L_SCR1(%a6)
0604     moveml  (%a7)+,%d0-%d1/%a0-%a1  //restore regs used by sint
0605     movel   L_SCR2(%a6),FP_SCR2(%a6)    //restore original exponent
0606     movel   L_SCR1(%a6),USER_FPCR(%a6) //restore user's FPCR
0607
0608
0609 // A13. Check for LEN digits.
0610 //      If the int operation results in more than LEN digits,
0611 //      or less than LEN -1 digits, adjust ILOG and repeat from
0612 //      A6.  This test occurs only on the first pass.  If the
0613 //      result is exactly 10^LEN, decrement ILOG and divide
0614 //      the mantissa by 10.  The calculation of 10^LEN cannot
0615 //      be inexact, since all powers of ten upto 10^27 are exact
0616 //      in extended precision, so the use of a previous power-of-ten
0617 //      table will introduce no error.
0618 //
0619 //
0620 // Register usage:
0621 //  Input/Output
0622 //  d0: FPCR with size set to ext/scratch final = 0
0623 //  d2: x/x
0624 //  d3: x/scratch final = x
0625 //  d4: LEN/LEN adjusted
0626 //  d5: ICTR:LAMBDA/LAMBDA:ICTR
0627 //  d6: ILOG/ILOG adjusted
0628 //  d7: k-factor/Unchanged
0629 //  a0: pointer into memory for packed bcd string formation
0630 //  a1: ptr to PTENxx array/Unchanged
0631 //  a2: ptr to FP_SCR2(a6)/Unchanged
0632 //  fp0: int portion of Y/abs(YINT) adjusted
0633 //  fp1: 10^ISCALE/Unchanged
0634 //  fp2: x/10^LEN
0635 //  F_SCR1:x/x
0636 //  F_SCR2:Y with original exponent/Unchanged
0637 //  L_SCR1:original USER_FPCR/Unchanged
0638 //  L_SCR2:first word of X packed/Unchanged
0639
0640 A13_st:
0641     swap    %d5     //put ICTR in lower word of d5
0642     tstw    %d5     //check if ICTR = 0
0643     bne not_zr      //if non-zero, go to second test
0644 //
0645 // Compute 10^(LEN-1)
0646 //
0647     fmoves  FONE,%fp2   //init fp2 to 1.0
0648     movel   %d4,%d0     //put LEN in d0
0649     subql   #1,%d0      //d0 = LEN -1
0650     clrl    %d3     //clr table index
0651 l_loop:
0652     lsrl    #1,%d0      //shift next bit into carry
0653     bccs    l_next      //if zero, skip the mul
0654     fmulx   (%a1,%d3),%fp2  //mul by 10**(d3_bit_no)
0655 l_next:
0656     addl    #12,%d3     //inc d3 to next pwrten table entry
0657     tstl    %d0     //test if LEN is zero
0658     bnes    l_loop      //if not, loop
0659 //
0660 // 10^LEN-1 is computed for this test and A14.  If the input was
0661 // denormalized, check only the case in which YINT > 10^LEN.
0662 //
0663     tstb    BINDEC_FLG(%a6) //check if input was norm
0664     beqs    A13_con     //if norm, continue with checking
0665     fabsx   %fp0        //take abs of YINT
0666     bra test_2
0667 //
0668 // Compare abs(YINT) to 10^(LEN-1) and 10^LEN
0669 //
0670 A13_con:
0671     fabsx   %fp0        //take abs of YINT
0672     fcmpx   %fp2,%fp0       //compare abs(YINT) with 10^(LEN-1)
0673     fbge    test_2      //if greater, do next test
0674     subql   #1,%d6      //subtract 1 from ILOG
0675     movew   #1,%d5      //set ICTR
0676     fmovel  #rm_mode,%FPCR  //set rmode to RM
0677     fmuls   FTEN,%fp2   //compute 10^LEN
0678     bra A6_str      //return to A6 and recompute YINT
0679 test_2:
0680     fmuls   FTEN,%fp2   //compute 10^LEN
0681     fcmpx   %fp2,%fp0       //compare abs(YINT) with 10^LEN
0682     fblt    A14_st      //if less, all is ok, go to A14
0683     fbgt    fix_ex      //if greater, fix and redo
0684     fdivs   FTEN,%fp0   //if equal, divide by 10
0685     addql   #1,%d6      // and inc ILOG
0686     bras    A14_st      // and continue elsewhere
0687 fix_ex:
0688     addql   #1,%d6      //increment ILOG by 1
0689     movew   #1,%d5      //set ICTR
0690     fmovel  #rm_mode,%FPCR  //set rmode to RM
0691     bra A6_str      //return to A6 and recompute YINT
0692 //
0693 // Since ICTR <> 0, we have already been through one adjustment,
0694 // and shouldn't have another; this is to check if abs(YINT) = 10^LEN
0695 // 10^LEN is again computed using whatever table is in a1 since the
0696 // value calculated cannot be inexact.
0697 //
0698 not_zr:
0699     fmoves  FONE,%fp2   //init fp2 to 1.0
0700     movel   %d4,%d0     //put LEN in d0
0701     clrl    %d3     //clr table index
0702 z_loop:
0703     lsrl    #1,%d0      //shift next bit into carry
0704     bccs    z_next      //if zero, skip the mul
0705     fmulx   (%a1,%d3),%fp2  //mul by 10**(d3_bit_no)
0706 z_next:
0707     addl    #12,%d3     //inc d3 to next pwrten table entry
0708     tstl    %d0     //test if LEN is zero
0709     bnes    z_loop      //if not, loop
0710     fabsx   %fp0        //get abs(YINT)
0711     fcmpx   %fp2,%fp0       //check if abs(YINT) = 10^LEN
0712     fbne    A14_st      //if not, skip this
0713     fdivs   FTEN,%fp0   //divide abs(YINT) by 10
0714     addql   #1,%d6      //and inc ILOG by 1
0715     addql   #1,%d4      // and inc LEN
0716     fmuls   FTEN,%fp2   // if LEN++, the get 10^^LEN
0717
0718
0719 // A14. Convert the mantissa to bcd.
0720 //      The binstr routine is used to convert the LEN digit
0721 //      mantissa to bcd in memory.  The input to binstr is
0722 //      to be a fraction; i.e. (mantissa)/10^LEN and adjusted
0723 //      such that the decimal point is to the left of bit 63.
0724 //      The bcd digits are stored in the correct position in
0725 //      the final string area in memory.
0726 //
0727 //
0728 // Register usage:
0729 //  Input/Output
0730 //  d0: x/LEN call to binstr - final is 0
0731 //  d1: x/0
0732 //  d2: x/ms 32-bits of mant of abs(YINT)
0733 //  d3: x/ls 32-bits of mant of abs(YINT)
0734 //  d4: LEN/Unchanged
0735 //  d5: ICTR:LAMBDA/LAMBDA:ICTR
0736 //  d6: ILOG
0737 //  d7: k-factor/Unchanged
0738 //  a0: pointer into memory for packed bcd string formation
0739 //      /ptr to first mantissa byte in result string
0740 //  a1: ptr to PTENxx array/Unchanged
0741 //  a2: ptr to FP_SCR2(a6)/Unchanged
0742 //  fp0: int portion of Y/abs(YINT) adjusted
0743 //  fp1: 10^ISCALE/Unchanged
0744 //  fp2: 10^LEN/Unchanged
0745 //  F_SCR1:x/Work area for final result
0746 //  F_SCR2:Y with original exponent/Unchanged
0747 //  L_SCR1:original USER_FPCR/Unchanged
0748 //  L_SCR2:first word of X packed/Unchanged
0749
0750 A14_st:
0751     fmovel  #rz_mode,%FPCR  //force rz for conversion
0752     fdivx   %fp2,%fp0       //divide abs(YINT) by 10^LEN
0753     leal    FP_SCR1(%a6),%a0
0754     fmovex  %fp0,(%a0)  //move abs(YINT)/10^LEN to memory
0755     movel   4(%a0),%d2  //move 2nd word of FP_RES to d2
0756     movel   8(%a0),%d3  //move 3rd word of FP_RES to d3
0757     clrl    4(%a0)      //zero word 2 of FP_RES
0758     clrl    8(%a0)      //zero word 3 of FP_RES
0759     movel   (%a0),%d0       //move exponent to d0
0760     swap    %d0     //put exponent in lower word
0761     beqs    no_sft      //if zero, don't shift
0762     subil   #0x3ffd,%d0 //sub bias less 2 to make fract
0763     tstl    %d0     //check if > 1
0764     bgts    no_sft      //if so, don't shift
0765     negl    %d0     //make exp positive
0766 m_loop:
0767     lsrl    #1,%d2      //shift d2:d3 right, add 0s
0768     roxrl   #1,%d3      //the number of places
0769     dbf %d0,m_loop  //given in d0
0770 no_sft:
0771     tstl    %d2     //check for mantissa of zero
0772     bnes    no_zr       //if not, go on
0773     tstl    %d3     //continue zero check
0774     beqs    zer_m       //if zero, go directly to binstr
0775 no_zr:
0776     clrl    %d1     //put zero in d1 for addx
0777     addil   #0x00000080,%d3 //inc at bit 7
0778     addxl   %d1,%d2     //continue inc
0779     andil   #0xffffff80,%d3 //strip off lsb not used by 882
0780 zer_m:
0781     movel   %d4,%d0     //put LEN in d0 for binstr call
0782     addql   #3,%a0      //a0 points to M16 byte in result
0783     bsr binstr      //call binstr to convert mant
0784
0785
0786 // A15. Convert the exponent to bcd.
0787 //      As in A14 above, the exp is converted to bcd and the
0788 //      digits are stored in the final string.
0789 //
0790 //      Digits are stored in L_SCR1(a6) on return from BINDEC as:
0791 //
0792 //       32               16 15                0
0793 //  -----------------------------------------
0794 //      |  0 | e3 | e2 | e1 | e4 |  X |  X |  X |
0795 //  -----------------------------------------
0796 //
0797 // And are moved into their proper places in FP_SCR1.  If digit e4
0798 // is non-zero, OPERR is signaled.  In all cases, all 4 digits are
0799 // written as specified in the 881/882 manual for packed decimal.
0800 //
0801 // Register usage:
0802 //  Input/Output
0803 //  d0: x/LEN call to binstr - final is 0
0804 //  d1: x/scratch (0);shift count for final exponent packing
0805 //  d2: x/ms 32-bits of exp fraction/scratch
0806 //  d3: x/ls 32-bits of exp fraction
0807 //  d4: LEN/Unchanged
0808 //  d5: ICTR:LAMBDA/LAMBDA:ICTR
0809 //  d6: ILOG
0810 //  d7: k-factor/Unchanged
0811 //  a0: ptr to result string/ptr to L_SCR1(a6)
0812 //  a1: ptr to PTENxx array/Unchanged
0813 //  a2: ptr to FP_SCR2(a6)/Unchanged
0814 //  fp0: abs(YINT) adjusted/float(ILOG)
0815 //  fp1: 10^ISCALE/Unchanged
0816 //  fp2: 10^LEN/Unchanged
0817 //  F_SCR1:Work area for final result/BCD result
0818 //  F_SCR2:Y with original exponent/ILOG/10^4
0819 //  L_SCR1:original USER_FPCR/Exponent digits on return from binstr
0820 //  L_SCR2:first word of X packed/Unchanged
0821
0822 A15_st:
0823     tstb    BINDEC_FLG(%a6) //check for denorm
0824     beqs    not_denorm
0825     ftstx   %fp0        //test for zero
0826     fbeq    den_zero    //if zero, use k-factor or 4933
0827     fmovel  %d6,%fp0        //float ILOG
0828     fabsx   %fp0        //get abs of ILOG
0829     bras    convrt
0830 den_zero:
0831     tstl    %d7     //check sign of the k-factor
0832     blts    use_ilog    //if negative, use ILOG
0833     fmoves  F4933,%fp0  //force exponent to 4933
0834     bras    convrt      //do it
0835 use_ilog:
0836     fmovel  %d6,%fp0        //float ILOG
0837     fabsx   %fp0        //get abs of ILOG
0838     bras    convrt
0839 not_denorm:
0840     ftstx   %fp0        //test for zero
0841     fbne    not_zero    //if zero, force exponent
0842     fmoves  FONE,%fp0   //force exponent to 1
0843     bras    convrt      //do it
0844 not_zero:
0845     fmovel  %d6,%fp0        //float ILOG
0846     fabsx   %fp0        //get abs of ILOG
0847 convrt:
0848     fdivx   24(%a1),%fp0    //compute ILOG/10^4
0849     fmovex  %fp0,FP_SCR2(%a6)   //store fp0 in memory
0850     movel   4(%a2),%d2  //move word 2 to d2
0851     movel   8(%a2),%d3  //move word 3 to d3
0852     movew   (%a2),%d0       //move exp to d0
0853     beqs    x_loop_fin  //if zero, skip the shift
0854     subiw   #0x3ffd,%d0 //subtract off bias
0855     negw    %d0     //make exp positive
0856 x_loop:
0857     lsrl    #1,%d2      //shift d2:d3 right
0858     roxrl   #1,%d3      //the number of places
0859     dbf %d0,x_loop  //given in d0
0860 x_loop_fin:
0861     clrl    %d1     //put zero in d1 for addx
0862     addil   #0x00000080,%d3 //inc at bit 6
0863     addxl   %d1,%d2     //continue inc
0864     andil   #0xffffff80,%d3 //strip off lsb not used by 882
0865     movel   #4,%d0      //put 4 in d0 for binstr call
0866     leal    L_SCR1(%a6),%a0 //a0 is ptr to L_SCR1 for exp digits
0867     bsr binstr      //call binstr to convert exp
0868     movel   L_SCR1(%a6),%d0 //load L_SCR1 lword to d0
0869     movel   #12,%d1     //use d1 for shift count
0870     lsrl    %d1,%d0     //shift d0 right by 12
0871     bfins   %d0,FP_SCR1(%a6){#4:#12} //put e3:e2:e1 in FP_SCR1
0872     lsrl    %d1,%d0     //shift d0 right by 12
0873     bfins   %d0,FP_SCR1(%a6){#16:#4} //put e4 in FP_SCR1
0874     tstb    %d0     //check if e4 is zero
0875     beqs    A16_st      //if zero, skip rest
0876     orl #opaop_mask,USER_FPSR(%a6) //set OPERR & AIOP in USER_FPSR
0877
0878
0879 // A16. Write sign bits to final string.
0880 //     Sigma is bit 31 of initial value; RHO is bit 31 of d6 (ILOG).
0881 //
0882 // Register usage:
0883 //  Input/Output
0884 //  d0: x/scratch - final is x
0885 //  d2: x/x
0886 //  d3: x/x
0887 //  d4: LEN/Unchanged
0888 //  d5: ICTR:LAMBDA/LAMBDA:ICTR
0889 //  d6: ILOG/ILOG adjusted
0890 //  d7: k-factor/Unchanged
0891 //  a0: ptr to L_SCR1(a6)/Unchanged
0892 //  a1: ptr to PTENxx array/Unchanged
0893 //  a2: ptr to FP_SCR2(a6)/Unchanged
0894 //  fp0: float(ILOG)/Unchanged
0895 //  fp1: 10^ISCALE/Unchanged
0896 //  fp2: 10^LEN/Unchanged
0897 //  F_SCR1:BCD result with correct signs
0898 //  F_SCR2:ILOG/10^4
0899 //  L_SCR1:Exponent digits on return from binstr
0900 //  L_SCR2:first word of X packed/Unchanged
0901
0902 A16_st:
0903     clrl    %d0     //clr d0 for collection of signs
0904     andib   #0x0f,FP_SCR1(%a6) //clear first nibble of FP_SCR1
0905     tstl    L_SCR2(%a6) //check sign of original mantissa
0906     bges    mant_p      //if pos, don't set SM
0907     moveql  #2,%d0      //move 2 in to d0 for SM
0908 mant_p:
0909     tstl    %d6     //check sign of ILOG
0910     bges    wr_sgn      //if pos, don't set SE
0911     addql   #1,%d0      //set bit 0 in d0 for SE
0912 wr_sgn:
0913     bfins   %d0,FP_SCR1(%a6){#0:#2} //insert SM and SE into FP_SCR1
0914
0915 // Clean up and restore all registers used.
0916
0917     fmovel  #0,%FPSR        //clear possible inex2/ainex bits
0918     fmovemx (%a7)+,%fp0-%fp2
0919     moveml  (%a7)+,%d2-%d7/%a2
0920     rts
0921
0922     |end