shared/fpsp/srem_mod.S

0001 #include "fpsp-namespace.h"
0002 //
0003 //
0004 //  srem_mod.sa 3.1 12/10/90
0005 //
0006 //      The entry point sMOD computes the floating point MOD of the
0007 //      input values X and Y. The entry point sREM computes the floating
0008 //      point (IEEE) REM of the input values X and Y.
0009 //
0010 //      INPUT
0011 //      -----
0012 //      Double-extended value Y is pointed to by address in register
0013 //      A0. Double-extended value X is located in -12(A0). The values
0014 //      of X and Y are both nonzero and finite; although either or both
0015 //      of them can be denormalized. The special cases of zeros, NaNs,
0016 //      and infinities are handled elsewhere.
0017 //
0018 //      OUTPUT
0019 //      ------
0020 //      FREM(X,Y) or FMOD(X,Y), depending on entry point.
0021 //
0022 //       ALGORITHM
0023 //       ---------
0024 //
0025 //       Step 1.  Save and strip signs of X and Y: signX := sign(X),
0026 //                signY := sign(Y), X := |X|, Y := |Y|,
0027 //                signQ := signX EOR signY. Record whether MOD or REM
0028 //                is requested.
0029 //
0030 //       Step 2.  Set L := expo(X)-expo(Y), k := 0, Q := 0.
0031 //                If (L < 0) then
0032 //                   R := X, go to Step 4.
0033 //                else
0034 //                   R := 2^(-L)X, j := L.
0035 //                endif
0036 //
0037 //       Step 3.  Perform MOD(X,Y)
0038 //            3.1 If R = Y, go to Step 9.
0039 //            3.2 If R > Y, then { R := R - Y, Q := Q + 1}
0040 //            3.3 If j = 0, go to Step 4.
0041 //            3.4 k := k + 1, j := j - 1, Q := 2Q, R := 2R. Go to
0042 //                Step 3.1.
0043 //
0044 //       Step 4.  At this point, R = X - QY = MOD(X,Y). Set
0045 //                Last_Subtract := false (used in Step 7 below). If
0046 //                MOD is requested, go to Step 6.
0047 //
0048 //       Step 5.  R = MOD(X,Y), but REM(X,Y) is requested.
0049 //            5.1 If R < Y/2, then R = MOD(X,Y) = REM(X,Y). Go to
0050 //                Step 6.
0051 //            5.2 If R > Y/2, then { set Last_Subtract := true,
0052 //                Q := Q + 1, Y := signY*Y }. Go to Step 6.
0053 //            5.3 This is the tricky case of R = Y/2. If Q is odd,
0054 //                then { Q := Q + 1, signX := -signX }.
0055 //
0056 //       Step 6.  R := signX*R.
0057 //
0058 //       Step 7.  If Last_Subtract = true, R := R - Y.
0059 //
0060 //       Step 8.  Return signQ, last 7 bits of Q, and R as required.
0061 //
0062 //       Step 9.  At this point, R = 2^(-j)*X - Q Y = Y. Thus,
0063 //                X = 2^(j)*(Q+1)Y. set Q := 2^(j)*(Q+1),
0064 //                R := 0. Return signQ, last 7 bits of Q, and R.
0065 //
0066 //
0067
0068 //      Copyright (C) Motorola, Inc. 1990
0069 //          All Rights Reserved
0070 //
0071 //  THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
0072 //  The copyright notice above does not evidence any
0073 //  actual or intended publication of such source code.
0074
0075 SREM_MOD:    //idnt    2,1 | Motorola 040 Floating Point Software Package
0076
0077     |section    8
0078
0079 #include "fpsp.defs"
0080
0081     .set    Mod_Flag,L_SCR3
0082     .set    SignY,FP_SCR3+4
0083     .set    SignX,FP_SCR3+8
0084     .set    SignQ,FP_SCR3+12
0085     .set    Sc_Flag,FP_SCR4
0086
0087     .set    Y,FP_SCR1
0088     .set    Y_Hi,Y+4
0089     .set    Y_Lo,Y+8
0090
0091     .set    R,FP_SCR2
0092     .set    R_Hi,R+4
0093     .set    R_Lo,R+8
0094
0095
0096 Scale:     .long    0x00010000,0x80000000,0x00000000,0x00000000
0097
0098     |xref   t_avoid_unsupp
0099
0100         .global        smod
0101 smod:
0102
0103    movel               #0,Mod_Flag(%a6)
0104    bras                Mod_Rem
0105
0106         .global        srem
0107 srem:
0108
0109    movel               #1,Mod_Flag(%a6)
0110
0111 Mod_Rem:
0112 //..Save sign of X and Y
0113    moveml              %d2-%d7,-(%a7)     // ...save data registers
0114    movew               (%a0),%d3
0115    movew               %d3,SignY(%a6)
0116    andil               #0x00007FFF,%d3   // ...Y := |Y|
0117
0118 //
0119    movel               4(%a0),%d4
0120    movel               8(%a0),%d5        // ...(D3,D4,D5) is |Y|
0121
0122    tstl                %d3
0123    bnes                Y_Normal
0124
0125    movel               #0x00003FFE,%d3  // ...$3FFD + 1
0126    tstl                %d4
0127    bnes                HiY_not0
0128
0129 HiY_0:
0130    movel               %d5,%d4
0131    clrl                %d5
0132    subil               #32,%d3
0133    clrl                %d6
0134    bfffo                %d4{#0:#32},%d6
0135    lsll                %d6,%d4
0136    subl                %d6,%d3           // ...(D3,D4,D5) is normalized
0137 //                                       ...with bias $7FFD
0138    bras                Chk_X
0139
0140 HiY_not0:
0141    clrl                %d6
0142    bfffo                %d4{#0:#32},%d6
0143    subl                %d6,%d3
0144    lsll                %d6,%d4
0145    movel               %d5,%d7           // ...a copy of D5
0146    lsll                %d6,%d5
0147    negl                %d6
0148    addil               #32,%d6
0149    lsrl                %d6,%d7
0150    orl                 %d7,%d4           // ...(D3,D4,D5) normalized
0151 //                                       ...with bias $7FFD
0152    bras                Chk_X
0153
0154 Y_Normal:
0155    addil               #0x00003FFE,%d3   // ...(D3,D4,D5) normalized
0156 //                                       ...with bias $7FFD
0157
0158 Chk_X:
0159    movew               -12(%a0),%d0
0160    movew               %d0,SignX(%a6)
0161    movew               SignY(%a6),%d1
0162    eorl                %d0,%d1
0163    andil               #0x00008000,%d1
0164    movew               %d1,SignQ(%a6)   // ...sign(Q) obtained
0165    andil               #0x00007FFF,%d0
0166    movel               -8(%a0),%d1
0167    movel               -4(%a0),%d2       // ...(D0,D1,D2) is |X|
0168    tstl                %d0
0169    bnes                X_Normal
0170    movel               #0x00003FFE,%d0
0171    tstl                %d1
0172    bnes                HiX_not0
0173
0174 HiX_0:
0175    movel               %d2,%d1
0176    clrl                %d2
0177    subil               #32,%d0
0178    clrl                %d6
0179    bfffo                %d1{#0:#32},%d6
0180    lsll                %d6,%d1
0181    subl                %d6,%d0           // ...(D0,D1,D2) is normalized
0182 //                                       ...with bias $7FFD
0183    bras                Init
0184
0185 HiX_not0:
0186    clrl                %d6
0187    bfffo                %d1{#0:#32},%d6
0188    subl                %d6,%d0
0189    lsll                %d6,%d1
0190    movel               %d2,%d7           // ...a copy of D2
0191    lsll                %d6,%d2
0192    negl                %d6
0193    addil               #32,%d6
0194    lsrl                %d6,%d7
0195    orl                 %d7,%d1           // ...(D0,D1,D2) normalized
0196 //                                       ...with bias $7FFD
0197    bras                Init
0198
0199 X_Normal:
0200    addil               #0x00003FFE,%d0   // ...(D0,D1,D2) normalized
0201 //                                       ...with bias $7FFD
0202
0203 Init:
0204 //
0205    movel               %d3,L_SCR1(%a6)   // ...save biased expo(Y)
0206    movel        %d0,L_SCR2(%a6) //save d0
0207    subl                %d3,%d0           // ...L := expo(X)-expo(Y)
0208 //   Move.L               D0,L            ...D0 is j
0209    clrl                %d6              // ...D6 := carry <- 0
0210    clrl                %d3              // ...D3 is Q
0211    moveal              #0,%a1           // ...A1 is k; j+k=L, Q=0
0212
0213 //..(Carry,D1,D2) is R
0214    tstl                %d0
0215    bges                Mod_Loop
0216
0217 //..expo(X) < expo(Y). Thus X = mod(X,Y)
0218 //
0219    movel        L_SCR2(%a6),%d0 //restore d0
0220    bra                Get_Mod
0221
0222 //..At this point  R = 2^(-L)X; Q = 0; k = 0; and  k+j = L
0223
0224
0225 Mod_Loop:
0226    tstl                %d6              // ...test carry bit
0227    bgts                R_GT_Y
0228
0229 //..At this point carry = 0, R = (D1,D2), Y = (D4,D5)
0230    cmpl                %d4,%d1           // ...compare hi(R) and hi(Y)
0231    bnes                R_NE_Y
0232    cmpl                %d5,%d2           // ...compare lo(R) and lo(Y)
0233    bnes                R_NE_Y
0234
0235 //..At this point, R = Y
0236    bra                Rem_is_0
0237
0238 R_NE_Y:
0239 //..use the borrow of the previous compare
0240    bcss                R_LT_Y          // ...borrow is set iff R < Y
0241
0242 R_GT_Y:
0243 //..If Carry is set, then Y < (Carry,D1,D2) < 2Y. Otherwise, Carry = 0
0244 //..and Y < (D1,D2) < 2Y. Either way, perform R - Y
0245    subl                %d5,%d2           // ...lo(R) - lo(Y)
0246    subxl               %d4,%d1           // ...hi(R) - hi(Y)
0247    clrl                %d6              // ...clear carry
0248    addql               #1,%d3           // ...Q := Q + 1
0249
0250 R_LT_Y:
0251 //..At this point, Carry=0, R < Y. R = 2^(k-L)X - QY; k+j = L; j >= 0.
0252    tstl                %d0              // ...see if j = 0.
0253    beqs                PostLoop
0254
0255    addl                %d3,%d3           // ...Q := 2Q
0256    addl                %d2,%d2           // ...lo(R) = 2lo(R)
0257    roxll               #1,%d1           // ...hi(R) = 2hi(R) + carry
0258    scs                  %d6              // ...set Carry if 2(R) overflows
0259    addql               #1,%a1           // ...k := k+1
0260    subql               #1,%d0           // ...j := j - 1
0261 //..At this point, R=(Carry,D1,D2) = 2^(k-L)X - QY, j+k=L, j >= 0, R < 2Y.
0262
0263    bras                Mod_Loop
0264
0265 PostLoop:
0266 //..k = L, j = 0, Carry = 0, R = (D1,D2) = X - QY, R < Y.
0267
0268 //..normalize R.
0269    movel               L_SCR1(%a6),%d0           // ...new biased expo of R
0270    tstl                %d1
0271    bnes                HiR_not0
0272
0273 HiR_0:
0274    movel               %d2,%d1
0275    clrl                %d2
0276    subil               #32,%d0
0277    clrl                %d6
0278    bfffo                %d1{#0:#32},%d6
0279    lsll                %d6,%d1
0280    subl                %d6,%d0           // ...(D0,D1,D2) is normalized
0281 //                                       ...with bias $7FFD
0282    bras                Get_Mod
0283
0284 HiR_not0:
0285    clrl                %d6
0286    bfffo                %d1{#0:#32},%d6
0287    bmis                Get_Mod         // ...already normalized
0288    subl                %d6,%d0
0289    lsll                %d6,%d1
0290    movel               %d2,%d7           // ...a copy of D2
0291    lsll                %d6,%d2
0292    negl                %d6
0293    addil               #32,%d6
0294    lsrl                %d6,%d7
0295    orl                 %d7,%d1           // ...(D0,D1,D2) normalized
0296
0297 //
0298 Get_Mod:
0299    cmpil        #0x000041FE,%d0
0300    bges     No_Scale
0301 Do_Scale:
0302    movew        %d0,R(%a6)
0303    clrw     R+2(%a6)
0304    movel        %d1,R_Hi(%a6)
0305    movel        %d2,R_Lo(%a6)
0306    movel        L_SCR1(%a6),%d6
0307    movew        %d6,Y(%a6)
0308    clrw     Y+2(%a6)
0309    movel        %d4,Y_Hi(%a6)
0310    movel        %d5,Y_Lo(%a6)
0311    fmovex       R(%a6),%fp0     // ...no exception
0312    movel        #1,Sc_Flag(%a6)
0313    bras     ModOrRem
0314 No_Scale:
0315    movel        %d1,R_Hi(%a6)
0316    movel        %d2,R_Lo(%a6)
0317    subil        #0x3FFE,%d0
0318    movew        %d0,R(%a6)
0319    clrw     R+2(%a6)
0320    movel        L_SCR1(%a6),%d6
0321    subil        #0x3FFE,%d6
0322    movel        %d6,L_SCR1(%a6)
0323    fmovex       R(%a6),%fp0
0324    movew        %d6,Y(%a6)
0325    movel        %d4,Y_Hi(%a6)
0326    movel        %d5,Y_Lo(%a6)
0327    movel        #0,Sc_Flag(%a6)
0328
0329 //
0330
0331
0332 ModOrRem:
0333    movel               Mod_Flag(%a6),%d6
0334    beqs                Fix_Sign
0335
0336    movel               L_SCR1(%a6),%d6           // ...new biased expo(Y)
0337    subql               #1,%d6           // ...biased expo(Y/2)
0338    cmpl                %d6,%d0
0339    blts                Fix_Sign
0340    bgts                Last_Sub
0341
0342    cmpl                %d4,%d1
0343    bnes                Not_EQ
0344    cmpl                %d5,%d2
0345    bnes                Not_EQ
0346    bra                Tie_Case
0347
0348 Not_EQ:
0349    bcss                Fix_Sign
0350
0351 Last_Sub:
0352 //
0353    fsubx        Y(%a6),%fp0     // ...no exceptions
0354    addql               #1,%d3           // ...Q := Q + 1
0355
0356 //
0357
0358 Fix_Sign:
0359 //..Get sign of X
0360    movew               SignX(%a6),%d6
0361    bges     Get_Q
0362    fnegx        %fp0
0363
0364 //..Get Q
0365 //
0366 Get_Q:
0367    clrl     %d6
0368    movew               SignQ(%a6),%d6        // ...D6 is sign(Q)
0369    movel               #8,%d7
0370    lsrl                %d7,%d6
0371    andil               #0x0000007F,%d3   // ...7 bits of Q
0372    orl                 %d6,%d3           // ...sign and bits of Q
0373    swap                 %d3
0374    fmovel              %fpsr,%d6
0375    andil               #0xFF00FFFF,%d6
0376    orl                 %d3,%d6
0377    fmovel              %d6,%fpsr         // ...put Q in fpsr
0378
0379 //
0380 Restore:
0381    moveml              (%a7)+,%d2-%d7
0382    fmovel              USER_FPCR(%a6),%fpcr
0383    movel               Sc_Flag(%a6),%d0
0384    beqs                Finish
0385    fmulx        Scale(%pc),%fp0 // ...may cause underflow
0386    bra          t_avoid_unsupp  //check for denorm as a
0387 //                  ;result of the scaling
0388
0389 Finish:
0390     fmovex      %fp0,%fp0       //capture exceptions & round
0391     rts
0392
0393 Rem_is_0:
0394 //..R = 2^(-j)X - Q Y = Y, thus R = 0 and quotient = 2^j (Q+1)
0395    addql               #1,%d3
0396    cmpil               #8,%d0           // ...D0 is j
0397    bges                Q_Big
0398
0399    lsll                %d0,%d3
0400    bras                Set_R_0
0401
0402 Q_Big:
0403    clrl                %d3
0404
0405 Set_R_0:
0406    fmoves       #0x00000000,%fp0
0407    movel        #0,Sc_Flag(%a6)
0408    bra                Fix_Sign
0409
0410 Tie_Case:
0411 //..Check parity of Q
0412    movel               %d3,%d6
0413    andil               #0x00000001,%d6
0414    tstl                %d6
0415    beq                Fix_Sign  // ...Q is even
0416
0417 //..Q is odd, Q := Q + 1, signX := -signX
0418    addql               #1,%d3
0419    movew               SignX(%a6),%d6
0420    eoril               #0x00008000,%d6
0421    movew               %d6,SignX(%a6)
0422    bra                Fix_Sign
0423
0424    //end