X-Git-Url: https://oss.titaniummirror.com/gitweb/?a=blobdiff_plain;f=libgcc%2Fconfig%2Flibbid%2Fbid128_round_integral.c;fp=libgcc%2Fconfig%2Flibbid%2Fbid128_round_integral.c;h=6efcd474380bddd48d2963c324c0e45ff1f84d78;hb=6fed43773c9b0ce596dca5686f37ac3fc0fa11c0;hp=0000000000000000000000000000000000000000;hpb=27b11d56b743098deb193d510b337ba22dc52e5c;p=msp430-gcc.git diff --git a/libgcc/config/libbid/bid128_round_integral.c b/libgcc/config/libbid/bid128_round_integral.c new file mode 100644 index 00000000..6efcd474 --- /dev/null +++ b/libgcc/config/libbid/bid128_round_integral.c @@ -0,0 +1,1951 @@ +/* Copyright (C) 2007, 2009 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +#define BID_128RES + +#include "bid_internal.h" + +/***************************************************************************** + * BID128_round_integral_exact + ****************************************************************************/ + +BID128_FUNCTION_ARG1 (bid128_round_integral_exact, x) + + UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} + }; +UINT64 x_sign; +UINT64 x_exp; +int exp; // unbiased exponent + // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64) +UINT64 tmp64; +BID_UI64DOUBLE tmp1; +unsigned int x_nr_bits; +int q, ind, shift; +UINT128 C1; +UINT256 fstar; +UINT256 P256; + + // check for NaN or Infinity +if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) { + // x is special + if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN + // if x = NaN, then res = Q (x) + // check first for non-canonical NaN payload + if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) || + (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) && + (x.w[0] > 0x38c15b09ffffffffull))) { + x.w[1] = x.w[1] & 0xffffc00000000000ull; + x.w[0] = 0x0ull; + } + if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN + // set invalid flag + *pfpsf |= INVALID_EXCEPTION; + // return quiet (x) + res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16] + res.w[0] = x.w[0]; + } else { // x is QNaN + // return x + res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16] + res.w[0] = x.w[0]; + } + BID_RETURN (res) + } else { // x is not a NaN, so it must be infinity + if ((x.w[1] & MASK_SIGN) == 0x0ull) { // x is +inf + // return +inf + res.w[1] = 0x7800000000000000ull; + res.w[0] = 0x0000000000000000ull; + } else { // x is -inf + // return -inf + res.w[1] = 0xf800000000000000ull; + res.w[0] = 0x0000000000000000ull; + } + BID_RETURN (res); + } +} + // unpack x +x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative +C1.w[1] = x.w[1] & MASK_COEFF; +C1.w[0] = x.w[0]; + + // check for non-canonical values (treated as zero) +if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11 + // non-canonical + x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits + C1.w[1] = 0; // significand high + C1.w[0] = 0; // significand low +} else { // G0_G1 != 11 + x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits + if (C1.w[1] > 0x0001ed09bead87c0ull || + (C1.w[1] == 0x0001ed09bead87c0ull + && C1.w[0] > 0x378d8e63ffffffffull)) { + // x is non-canonical if coefficient is larger than 10^34 -1 + C1.w[1] = 0; + C1.w[0] = 0; + } else { // canonical + ; + } +} + + // test for input equal to zero +if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) { + // x is 0 + // return 0 preserving the sign bit and the preferred exponent + // of MAX(Q(x), 0) + if (x_exp <= (0x1820ull << 49)) { + res.w[1] = (x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull; + } else { + res.w[1] = x_sign | x_exp; + } + res.w[0] = 0x0000000000000000ull; + BID_RETURN (res); +} + // x is not special and is not zero + +switch (rnd_mode) { +case ROUNDING_TO_NEAREST: +case ROUNDING_TIES_AWAY: + // if (exp <= -(p+1)) return 0.0 + if (x_exp <= 0x2ffa000000000000ull) { // 0x2ffa000000000000ull == -35 + res.w[1] = x_sign | 0x3040000000000000ull; + res.w[0] = 0x0000000000000000ull; + *pfpsf |= INEXACT_EXCEPTION; + BID_RETURN (res); + } + break; +case ROUNDING_DOWN: + // if (exp <= -p) return -1.0 or +0.0 + if (x_exp <= 0x2ffc000000000000ull) { // 0x2ffa000000000000ull == -34 + if (x_sign) { + // if negative, return negative 1, because we know coefficient + // is non-zero (would have been caught above) + res.w[1] = 0xb040000000000000ull; + res.w[0] = 0x0000000000000001ull; + } else { + // if positive, return positive 0, because we know coefficient is + // non-zero (would have been caught above) + res.w[1] = 0x3040000000000000ull; + res.w[0] = 0x0000000000000000ull; + } + *pfpsf |= INEXACT_EXCEPTION; + BID_RETURN (res); + } + break; +case ROUNDING_UP: + // if (exp <= -p) return -0.0 or +1.0 + if (x_exp <= 0x2ffc000000000000ull) { // 0x2ffc000000000000ull == -34 + if (x_sign) { + // if negative, return negative 0, because we know the coefficient + // is non-zero (would have been caught above) + res.w[1] = 0xb040000000000000ull; + res.w[0] = 0x0000000000000000ull; + } else { + // if positive, return positive 1, because we know coefficient is + // non-zero (would have been caught above) + res.w[1] = 0x3040000000000000ull; + res.w[0] = 0x0000000000000001ull; + } + *pfpsf |= INEXACT_EXCEPTION; + BID_RETURN (res); + } + break; +case ROUNDING_TO_ZERO: + // if (exp <= -p) return -0.0 or +0.0 + if (x_exp <= 0x2ffc000000000000ull) { // 0x2ffc000000000000ull == -34 + res.w[1] = x_sign | 0x3040000000000000ull; + res.w[0] = 0x0000000000000000ull; + *pfpsf |= INEXACT_EXCEPTION; + BID_RETURN (res); + } + break; +} + + // q = nr. of decimal digits in x + // determine first the nr. of bits in x +if (C1.w[1] == 0) { + if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53 + // split the 64-bit value in two 32-bit halves to avoid rounding errors + if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32 + tmp1.d = (double) (C1.w[0] >> 32); // exact conversion + x_nr_bits = + 33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); + } else { // x < 2^32 + tmp1.d = (double) (C1.w[0]); // exact conversion + x_nr_bits = + 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); + } + } else { // if x < 2^53 + tmp1.d = (double) C1.w[0]; // exact conversion + x_nr_bits = + 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); + } +} else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1]) + tmp1.d = (double) C1.w[1]; // exact conversion + x_nr_bits = + 65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); +} + +q = nr_digits[x_nr_bits - 1].digits; +if (q == 0) { + q = nr_digits[x_nr_bits - 1].digits1; + if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi || + (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi && + C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo)) + q++; +} +exp = (x_exp >> 49) - 6176; +if (exp >= 0) { // -exp <= 0 + // the argument is an integer already + res.w[1] = x.w[1]; + res.w[0] = x.w[0]; + BID_RETURN (res); +} + // exp < 0 +switch (rnd_mode) { +case ROUNDING_TO_NEAREST: + if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q + // need to shift right -exp digits from the coefficient; exp will be 0 + ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x' + // chop off ind digits from the lower part of C1 + // C1 = C1 + 1/2 * 10^x where the result C1 fits in 127 bits + tmp64 = C1.w[0]; + if (ind <= 19) { + C1.w[0] = C1.w[0] + midpoint64[ind - 1]; + } else { + C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0]; + C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1]; + } + if (C1.w[0] < tmp64) + C1.w[1]++; + // calculate C* and f* + // C* is actually floor(C*) in this case + // C* and f* need shifting and masking, as shown by + // shiftright128[] and maskhigh128[] + // 1 <= x <= 34 + // kx = 10^(-x) = ten2mk128[ind - 1] + // C* = (C1 + 1/2 * 10^x) * 10^(-x) + // the approximation of 10^(-x) was rounded up to 118 bits + __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]); + // determine the value of res and fstar + + // determine inexactness of the rounding of C* + // if (0 < f* - 1/2 < 10^(-x)) then + // the result is exact + // else // if (f* - 1/2 > T*) then + // the result is inexact + // Note: we are going to use ten2mk128[] instead of ten2mk128trunc[] + + if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 + // redundant shift = shiftright128[ind - 1]; // shift = 0 + res.w[1] = P256.w[3]; + res.w[0] = P256.w[2]; + // redundant fstar.w[3] = 0; + // redundant fstar.w[2] = 0; + fstar.w[1] = P256.w[1]; + fstar.w[0] = P256.w[0]; + // fraction f* < 10^(-x) <=> midpoint + // f* is in the right position to be compared with + // 10^(-x) from ten2mk128[] + // if 0 < fstar < 10^(-x), subtract 1 if odd (for rounding to even) + if ((res.w[0] & 0x0000000000000001ull) && // is result odd? + ((fstar.w[1] < (ten2mk128[ind - 1].w[1])) + || ((fstar.w[1] == ten2mk128[ind - 1].w[1]) + && (fstar.w[0] < ten2mk128[ind - 1].w[0])))) { + // subract 1 to make even + if (res.w[0]-- == 0) { + res.w[1]--; + } + } + if (fstar.w[1] > 0x8000000000000000ull || + (fstar.w[1] == 0x8000000000000000ull + && fstar.w[0] > 0x0ull)) { + // f* > 1/2 and the result may be exact + tmp64 = fstar.w[1] - 0x8000000000000000ull; // f* - 1/2 + if (tmp64 > ten2mk128[ind - 1].w[1] || + (tmp64 == ten2mk128[ind - 1].w[1] && + fstar.w[0] >= ten2mk128[ind - 1].w[0])) { + // set the inexact flag + *pfpsf |= INEXACT_EXCEPTION; + } // else the result is exact + } else { // the result is inexact; f2* <= 1/2 + // set the inexact flag + *pfpsf |= INEXACT_EXCEPTION; + } + } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 + shift = shiftright128[ind - 1]; // 3 <= shift <= 63 + res.w[1] = (P256.w[3] >> shift); + res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift); + // redundant fstar.w[3] = 0; + fstar.w[2] = P256.w[2] & maskhigh128[ind - 1]; + fstar.w[1] = P256.w[1]; + fstar.w[0] = P256.w[0]; + // fraction f* < 10^(-x) <=> midpoint + // f* is in the right position to be compared with + // 10^(-x) from ten2mk128[] + if ((res.w[0] & 0x0000000000000001ull) && // is result odd? + fstar.w[2] == 0 && (fstar.w[1] < ten2mk128[ind - 1].w[1] || + (fstar.w[1] == ten2mk128[ind - 1].w[1] && + fstar.w[0] < ten2mk128[ind - 1].w[0]))) { + // subract 1 to make even + if (res.w[0]-- == 0) { + res.w[1]--; + } + } + if (fstar.w[2] > onehalf128[ind - 1] || + (fstar.w[2] == onehalf128[ind - 1] + && (fstar.w[1] || fstar.w[0]))) { + // f2* > 1/2 and the result may be exact + // Calculate f2* - 1/2 + tmp64 = fstar.w[2] - onehalf128[ind - 1]; + if (tmp64 || fstar.w[1] > ten2mk128[ind - 1].w[1] || + (fstar.w[1] == ten2mk128[ind - 1].w[1] && + fstar.w[0] >= ten2mk128[ind - 1].w[0])) { + // set the inexact flag + *pfpsf |= INEXACT_EXCEPTION; + } // else the result is exact + } else { // the result is inexact; f2* <= 1/2 + // set the inexact flag + *pfpsf |= INEXACT_EXCEPTION; + } + } else { // 22 <= ind - 1 <= 33 + shift = shiftright128[ind - 1] - 64; // 2 <= shift <= 38 + res.w[1] = 0; + res.w[0] = P256.w[3] >> shift; + fstar.w[3] = P256.w[3] & maskhigh128[ind - 1]; + fstar.w[2] = P256.w[2]; + fstar.w[1] = P256.w[1]; + fstar.w[0] = P256.w[0]; + // fraction f* < 10^(-x) <=> midpoint + // f* is in the right position to be compared with + // 10^(-x) from ten2mk128[] + if ((res.w[0] & 0x0000000000000001ull) && // is result odd? + fstar.w[3] == 0 && fstar.w[2] == 0 && + (fstar.w[1] < ten2mk128[ind - 1].w[1] || + (fstar.w[1] == ten2mk128[ind - 1].w[1] && + fstar.w[0] < ten2mk128[ind - 1].w[0]))) { + // subract 1 to make even + if (res.w[0]-- == 0) { + res.w[1]--; + } + } + if (fstar.w[3] > onehalf128[ind - 1] || + (fstar.w[3] == onehalf128[ind - 1] && + (fstar.w[2] || fstar.w[1] || fstar.w[0]))) { + // f2* > 1/2 and the result may be exact + // Calculate f2* - 1/2 + tmp64 = fstar.w[3] - onehalf128[ind - 1]; + if (tmp64 || fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1] + || (fstar.w[1] == ten2mk128[ind - 1].w[1] + && fstar.w[0] >= ten2mk128[ind - 1].w[0])) { + // set the inexact flag + *pfpsf |= INEXACT_EXCEPTION; + } // else the result is exact + } else { // the result is inexact; f2* <= 1/2 + // set the inexact flag + *pfpsf |= INEXACT_EXCEPTION; + } + } + res.w[1] = x_sign | 0x3040000000000000ull | res.w[1]; + BID_RETURN (res); + } else { // if ((q + exp) < 0) <=> q < -exp + // the result is +0 or -0 + res.w[1] = x_sign | 0x3040000000000000ull; + res.w[0] = 0x0000000000000000ull; + *pfpsf |= INEXACT_EXCEPTION; + BID_RETURN (res); + } + break; +case ROUNDING_TIES_AWAY: + if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q + // need to shift right -exp digits from the coefficient; exp will be 0 + ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x' + // chop off ind digits from the lower part of C1 + // C1 = C1 + 1/2 * 10^x where the result C1 fits in 127 bits + tmp64 = C1.w[0]; + if (ind <= 19) { + C1.w[0] = C1.w[0] + midpoint64[ind - 1]; + } else { + C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0]; + C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1]; + } + if (C1.w[0] < tmp64) + C1.w[1]++; + // calculate C* and f* + // C* is actually floor(C*) in this case + // C* and f* need shifting and masking, as shown by + // shiftright128[] and maskhigh128[] + // 1 <= x <= 34 + // kx = 10^(-x) = ten2mk128[ind - 1] + // C* = (C1 + 1/2 * 10^x) * 10^(-x) + // the approximation of 10^(-x) was rounded up to 118 bits + __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]); + // the top Ex bits of 10^(-x) are T* = ten2mk128trunc[ind], e.g. + // if x=1, T*=ten2mk128trunc[0]=0x19999999999999999999999999999999 + // if (0 < f* < 10^(-x)) then the result is a midpoint + // if floor(C*) is even then C* = floor(C*) - logical right + // shift; C* has p decimal digits, correct by Prop. 1) + // else if floor(C*) is odd C* = floor(C*)-1 (logical right + // shift; C* has p decimal digits, correct by Pr. 1) + // else + // C* = floor(C*) (logical right shift; C has p decimal digits, + // correct by Property 1) + // n = C* * 10^(e+x) + + // determine also the inexactness of the rounding of C* + // if (0 < f* - 1/2 < 10^(-x)) then + // the result is exact + // else // if (f* - 1/2 > T*) then + // the result is inexact + // Note: we are going to use ten2mk128[] instead of ten2mk128trunc[] + // shift right C* by Ex-128 = shiftright128[ind] + if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 + // redundant shift = shiftright128[ind - 1]; // shift = 0 + res.w[1] = P256.w[3]; + res.w[0] = P256.w[2]; + // redundant fstar.w[3] = 0; + // redundant fstar.w[2] = 0; + fstar.w[1] = P256.w[1]; + fstar.w[0] = P256.w[0]; + if (fstar.w[1] > 0x8000000000000000ull || + (fstar.w[1] == 0x8000000000000000ull + && fstar.w[0] > 0x0ull)) { + // f* > 1/2 and the result may be exact + tmp64 = fstar.w[1] - 0x8000000000000000ull; // f* - 1/2 + if ((tmp64 > ten2mk128[ind - 1].w[1] || + (tmp64 == ten2mk128[ind - 1].w[1] && + fstar.w[0] >= ten2mk128[ind - 1].w[0]))) { + // set the inexact flag + *pfpsf |= INEXACT_EXCEPTION; + } // else the result is exact + } else { // the result is inexact; f2* <= 1/2 + // set the inexact flag + *pfpsf |= INEXACT_EXCEPTION; + } + } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 + shift = shiftright128[ind - 1]; // 3 <= shift <= 63 + res.w[1] = (P256.w[3] >> shift); + res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift); + // redundant fstar.w[3] = 0; + fstar.w[2] = P256.w[2] & maskhigh128[ind - 1]; + fstar.w[1] = P256.w[1]; + fstar.w[0] = P256.w[0]; + if (fstar.w[2] > onehalf128[ind - 1] || + (fstar.w[2] == onehalf128[ind - 1] + && (fstar.w[1] || fstar.w[0]))) { + // f2* > 1/2 and the result may be exact + // Calculate f2* - 1/2 + tmp64 = fstar.w[2] - onehalf128[ind - 1]; + if (tmp64 || fstar.w[1] > ten2mk128[ind - 1].w[1] || + (fstar.w[1] == ten2mk128[ind - 1].w[1] && + fstar.w[0] >= ten2mk128[ind - 1].w[0])) { + // set the inexact flag + *pfpsf |= INEXACT_EXCEPTION; + } // else the result is exact + } else { // the result is inexact; f2* <= 1/2 + // set the inexact flag + *pfpsf |= INEXACT_EXCEPTION; + } + } else { // 22 <= ind - 1 <= 33 + shift = shiftright128[ind - 1] - 64; // 2 <= shift <= 38 + res.w[1] = 0; + res.w[0] = P256.w[3] >> shift; + fstar.w[3] = P256.w[3] & maskhigh128[ind - 1]; + fstar.w[2] = P256.w[2]; + fstar.w[1] = P256.w[1]; + fstar.w[0] = P256.w[0]; + if (fstar.w[3] > onehalf128[ind - 1] || + (fstar.w[3] == onehalf128[ind - 1] && + (fstar.w[2] || fstar.w[1] || fstar.w[0]))) { + // f2* > 1/2 and the result may be exact + // Calculate f2* - 1/2 + tmp64 = fstar.w[3] - onehalf128[ind - 1]; + if (tmp64 || fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1] + || (fstar.w[1] == ten2mk128[ind - 1].w[1] + && fstar.w[0] >= ten2mk128[ind - 1].w[0])) { + // set the inexact flag + *pfpsf |= INEXACT_EXCEPTION; + } // else the result is exact + } else { // the result is inexact; f2* <= 1/2 + // set the inexact flag + *pfpsf |= INEXACT_EXCEPTION; + } + } + // if the result was a midpoint, it was already rounded away from zero + res.w[1] |= x_sign | 0x3040000000000000ull; + BID_RETURN (res); + } else { // if ((q + exp) < 0) <=> q < -exp + // the result is +0 or -0 + res.w[1] = x_sign | 0x3040000000000000ull; + res.w[0] = 0x0000000000000000ull; + *pfpsf |= INEXACT_EXCEPTION; + BID_RETURN (res); + } + break; +case ROUNDING_DOWN: + if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q + // need to shift right -exp digits from the coefficient; exp will be 0 + ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x' + // (number of digits to be chopped off) + // chop off ind digits from the lower part of C1 + // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate + // FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP + // FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE + // FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE + // tmp64 = C1.w[0]; + // if (ind <= 19) { + // C1.w[0] = C1.w[0] + midpoint64[ind - 1]; + // } else { + // C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0]; + // C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1]; + // } + // if (C1.w[0] < tmp64) C1.w[1]++; + // if carry-out from C1.w[0], increment C1.w[1] + // calculate C* and f* + // C* is actually floor(C*) in this case + // C* and f* need shifting and masking, as shown by + // shiftright128[] and maskhigh128[] + // 1 <= x <= 34 + // kx = 10^(-x) = ten2mk128[ind - 1] + // C* = (C1 + 1/2 * 10^x) * 10^(-x) + // the approximation of 10^(-x) was rounded up to 118 bits + __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]); + if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 + res.w[1] = P256.w[3]; + res.w[0] = P256.w[2]; + // redundant fstar.w[3] = 0; + // redundant fstar.w[2] = 0; + // redundant fstar.w[1] = P256.w[1]; + // redundant fstar.w[0] = P256.w[0]; + // fraction f* > 10^(-x) <=> inexact + // f* is in the right position to be compared with + // 10^(-x) from ten2mk128[] + if ((P256.w[1] > ten2mk128[ind - 1].w[1]) + || (P256.w[1] == ten2mk128[ind - 1].w[1] + && (P256.w[0] >= ten2mk128[ind - 1].w[0]))) { + *pfpsf |= INEXACT_EXCEPTION; + // if positive, the truncated value is already the correct result + if (x_sign) { // if negative + if (++res.w[0] == 0) { + res.w[1]++; + } + } + } + } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 + shift = shiftright128[ind - 1]; // 0 <= shift <= 102 + res.w[1] = (P256.w[3] >> shift); + res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift); + // redundant fstar.w[3] = 0; + fstar.w[2] = P256.w[2] & maskhigh128[ind - 1]; + fstar.w[1] = P256.w[1]; + fstar.w[0] = P256.w[0]; + // fraction f* > 10^(-x) <=> inexact + // f* is in the right position to be compared with + // 10^(-x) from ten2mk128[] + if (fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1] || + (fstar.w[1] == ten2mk128[ind - 1].w[1] && + fstar.w[0] >= ten2mk128[ind - 1].w[0])) { + *pfpsf |= INEXACT_EXCEPTION; + // if positive, the truncated value is already the correct result + if (x_sign) { // if negative + if (++res.w[0] == 0) { + res.w[1]++; + } + } + } + } else { // 22 <= ind - 1 <= 33 + shift = shiftright128[ind - 1] - 64; // 2 <= shift <= 38 + res.w[1] = 0; + res.w[0] = P256.w[3] >> shift; + fstar.w[3] = P256.w[3] & maskhigh128[ind - 1]; + fstar.w[2] = P256.w[2]; + fstar.w[1] = P256.w[1]; + fstar.w[0] = P256.w[0]; + // fraction f* > 10^(-x) <=> inexact + // f* is in the right position to be compared with + // 10^(-x) from ten2mk128[] + if (fstar.w[3] || fstar.w[2] + || fstar.w[1] > ten2mk128[ind - 1].w[1] + || (fstar.w[1] == ten2mk128[ind - 1].w[1] + && fstar.w[0] >= ten2mk128[ind - 1].w[0])) { + *pfpsf |= INEXACT_EXCEPTION; + // if positive, the truncated value is already the correct result + if (x_sign) { // if negative + if (++res.w[0] == 0) { + res.w[1]++; + } + } + } + } + res.w[1] = x_sign | 0x3040000000000000ull | res.w[1]; + BID_RETURN (res); + } else { // if exp < 0 and q + exp <= 0 + if (x_sign) { // negative rounds down to -1.0 + res.w[1] = 0xb040000000000000ull; + res.w[0] = 0x0000000000000001ull; + } else { // positive rpunds down to +0.0 + res.w[1] = 0x3040000000000000ull; + res.w[0] = 0x0000000000000000ull; + } + *pfpsf |= INEXACT_EXCEPTION; + BID_RETURN (res); + } + break; +case ROUNDING_UP: + if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q + // need to shift right -exp digits from the coefficient; exp will be 0 + ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x' + // (number of digits to be chopped off) + // chop off ind digits from the lower part of C1 + // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate + // FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP + // FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE + // FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE + // tmp64 = C1.w[0]; + // if (ind <= 19) { + // C1.w[0] = C1.w[0] + midpoint64[ind - 1]; + // } else { + // C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0]; + // C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1]; + // } + // if (C1.w[0] < tmp64) C1.w[1]++; + // if carry-out from C1.w[0], increment C1.w[1] + // calculate C* and f* + // C* is actually floor(C*) in this case + // C* and f* need shifting and masking, as shown by + // shiftright128[] and maskhigh128[] + // 1 <= x <= 34 + // kx = 10^(-x) = ten2mk128[ind - 1] + // C* = C1 * 10^(-x) + // the approximation of 10^(-x) was rounded up to 118 bits + __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]); + if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 + res.w[1] = P256.w[3]; + res.w[0] = P256.w[2]; + // redundant fstar.w[3] = 0; + // redundant fstar.w[2] = 0; + // redundant fstar.w[1] = P256.w[1]; + // redundant fstar.w[0] = P256.w[0]; + // fraction f* > 10^(-x) <=> inexact + // f* is in the right position to be compared with + // 10^(-x) from ten2mk128[] + if ((P256.w[1] > ten2mk128[ind - 1].w[1]) + || (P256.w[1] == ten2mk128[ind - 1].w[1] + && (P256.w[0] >= ten2mk128[ind - 1].w[0]))) { + *pfpsf |= INEXACT_EXCEPTION; + // if negative, the truncated value is already the correct result + if (!x_sign) { // if positive + if (++res.w[0] == 0) { + res.w[1]++; + } + } + } + } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 + shift = shiftright128[ind - 1]; // 3 <= shift <= 63 + res.w[1] = (P256.w[3] >> shift); + res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift); + // redundant fstar.w[3] = 0; + fstar.w[2] = P256.w[2] & maskhigh128[ind - 1]; + fstar.w[1] = P256.w[1]; + fstar.w[0] = P256.w[0]; + // fraction f* > 10^(-x) <=> inexact + // f* is in the right position to be compared with + // 10^(-x) from ten2mk128[] + if (fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1] || + (fstar.w[1] == ten2mk128[ind - 1].w[1] && + fstar.w[0] >= ten2mk128[ind - 1].w[0])) { + *pfpsf |= INEXACT_EXCEPTION; + // if negative, the truncated value is already the correct result + if (!x_sign) { // if positive + if (++res.w[0] == 0) { + res.w[1]++; + } + } + } + } else { // 22 <= ind - 1 <= 33 + shift = shiftright128[ind - 1] - 64; // 2 <= shift <= 38 + res.w[1] = 0; + res.w[0] = P256.w[3] >> shift; + fstar.w[3] = P256.w[3] & maskhigh128[ind - 1]; + fstar.w[2] = P256.w[2]; + fstar.w[1] = P256.w[1]; + fstar.w[0] = P256.w[0]; + // fraction f* > 10^(-x) <=> inexact + // f* is in the right position to be compared with + // 10^(-x) from ten2mk128[] + if (fstar.w[3] || fstar.w[2] + || fstar.w[1] > ten2mk128[ind - 1].w[1] + || (fstar.w[1] == ten2mk128[ind - 1].w[1] + && fstar.w[0] >= ten2mk128[ind - 1].w[0])) { + *pfpsf |= INEXACT_EXCEPTION; + // if negative, the truncated value is already the correct result + if (!x_sign) { // if positive + if (++res.w[0] == 0) { + res.w[1]++; + } + } + } + } + res.w[1] = x_sign | 0x3040000000000000ull | res.w[1]; + BID_RETURN (res); + } else { // if exp < 0 and q + exp <= 0 + if (x_sign) { // negative rounds up to -0.0 + res.w[1] = 0xb040000000000000ull; + res.w[0] = 0x0000000000000000ull; + } else { // positive rpunds up to +1.0 + res.w[1] = 0x3040000000000000ull; + res.w[0] = 0x0000000000000001ull; + } + *pfpsf |= INEXACT_EXCEPTION; + BID_RETURN (res); + } + break; +case ROUNDING_TO_ZERO: + if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q + // need to shift right -exp digits from the coefficient; exp will be 0 + ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x' + // (number of digits to be chopped off) + // chop off ind digits from the lower part of C1 + // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate + // FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP + // FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE + // FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE + //tmp64 = C1.w[0]; + // if (ind <= 19) { + // C1.w[0] = C1.w[0] + midpoint64[ind - 1]; + // } else { + // C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0]; + // C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1]; + // } + // if (C1.w[0] < tmp64) C1.w[1]++; + // if carry-out from C1.w[0], increment C1.w[1] + // calculate C* and f* + // C* is actually floor(C*) in this case + // C* and f* need shifting and masking, as shown by + // shiftright128[] and maskhigh128[] + // 1 <= x <= 34 + // kx = 10^(-x) = ten2mk128[ind - 1] + // C* = (C1 + 1/2 * 10^x) * 10^(-x) + // the approximation of 10^(-x) was rounded up to 118 bits + __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]); + if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 + res.w[1] = P256.w[3]; + res.w[0] = P256.w[2]; + // redundant fstar.w[3] = 0; + // redundant fstar.w[2] = 0; + // redundant fstar.w[1] = P256.w[1]; + // redundant fstar.w[0] = P256.w[0]; + // fraction f* > 10^(-x) <=> inexact + // f* is in the right position to be compared with + // 10^(-x) from ten2mk128[] + if ((P256.w[1] > ten2mk128[ind - 1].w[1]) + || (P256.w[1] == ten2mk128[ind - 1].w[1] + && (P256.w[0] >= ten2mk128[ind - 1].w[0]))) { + *pfpsf |= INEXACT_EXCEPTION; + } + } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 + shift = shiftright128[ind - 1]; // 3 <= shift <= 63 + res.w[1] = (P256.w[3] >> shift); + res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift); + // redundant fstar.w[3] = 0; + fstar.w[2] = P256.w[2] & maskhigh128[ind - 1]; + fstar.w[1] = P256.w[1]; + fstar.w[0] = P256.w[0]; + // fraction f* > 10^(-x) <=> inexact + // f* is in the right position to be compared with + // 10^(-x) from ten2mk128[] + if (fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1] || + (fstar.w[1] == ten2mk128[ind - 1].w[1] && + fstar.w[0] >= ten2mk128[ind - 1].w[0])) { + *pfpsf |= INEXACT_EXCEPTION; + } + } else { // 22 <= ind - 1 <= 33 + shift = shiftright128[ind - 1] - 64; // 2 <= shift <= 38 + res.w[1] = 0; + res.w[0] = P256.w[3] >> shift; + fstar.w[3] = P256.w[3] & maskhigh128[ind - 1]; + fstar.w[2] = P256.w[2]; + fstar.w[1] = P256.w[1]; + fstar.w[0] = P256.w[0]; + // fraction f* > 10^(-x) <=> inexact + // f* is in the right position to be compared with + // 10^(-x) from ten2mk128[] + if (fstar.w[3] || fstar.w[2] + || fstar.w[1] > ten2mk128[ind - 1].w[1] + || (fstar.w[1] == ten2mk128[ind - 1].w[1] + && fstar.w[0] >= ten2mk128[ind - 1].w[0])) { + *pfpsf |= INEXACT_EXCEPTION; + } + } + res.w[1] = x_sign | 0x3040000000000000ull | res.w[1]; + BID_RETURN (res); + } else { // if exp < 0 and q + exp <= 0 the result is +0 or -0 + res.w[1] = x_sign | 0x3040000000000000ull; + res.w[0] = 0x0000000000000000ull; + *pfpsf |= INEXACT_EXCEPTION; + BID_RETURN (res); + } + break; +} + +BID_RETURN (res); +} + +/***************************************************************************** + * BID128_round_integral_nearest_even + ****************************************************************************/ + +BID128_FUNCTION_ARG1_NORND (bid128_round_integral_nearest_even, x) + + UINT128 res; + UINT64 x_sign; + UINT64 x_exp; + int exp; // unbiased exponent + // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64) + UINT64 tmp64; + BID_UI64DOUBLE tmp1; + unsigned int x_nr_bits; + int q, ind, shift; + UINT128 C1; + // UINT128 res is C* at first - represents up to 34 decimal digits ~ 113 bits + UINT256 fstar; + UINT256 P256; + + // check for NaN or Infinity +if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) { + // x is special +if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN + // if x = NaN, then res = Q (x) + // check first for non-canonical NaN payload + if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) || + (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) && + (x.w[0] > 0x38c15b09ffffffffull))) { + x.w[1] = x.w[1] & 0xffffc00000000000ull; + x.w[0] = 0x0ull; + } + if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN + // set invalid flag + *pfpsf |= INVALID_EXCEPTION; + // return quiet (x) + res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16] + res.w[0] = x.w[0]; + } else { // x is QNaN + // return x + res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16] + res.w[0] = x.w[0]; + } + BID_RETURN (res) +} else { // x is not a NaN, so it must be infinity + if ((x.w[1] & MASK_SIGN) == 0x0ull) { // x is +inf + // return +inf + res.w[1] = 0x7800000000000000ull; + res.w[0] = 0x0000000000000000ull; + } else { // x is -inf + // return -inf + res.w[1] = 0xf800000000000000ull; + res.w[0] = 0x0000000000000000ull; + } + BID_RETURN (res); +} +} + // unpack x +x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative +C1.w[1] = x.w[1] & MASK_COEFF; +C1.w[0] = x.w[0]; + + // check for non-canonical values (treated as zero) +if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11 + // non-canonical + x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits + C1.w[1] = 0; // significand high + C1.w[0] = 0; // significand low +} else { // G0_G1 != 11 + x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits + if (C1.w[1] > 0x0001ed09bead87c0ull || + (C1.w[1] == 0x0001ed09bead87c0ull + && C1.w[0] > 0x378d8e63ffffffffull)) { + // x is non-canonical if coefficient is larger than 10^34 -1 + C1.w[1] = 0; + C1.w[0] = 0; + } else { // canonical + ; + } +} + + // test for input equal to zero +if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) { + // x is 0 + // return 0 preserving the sign bit and the preferred exponent + // of MAX(Q(x), 0) + if (x_exp <= (0x1820ull << 49)) { + res.w[1] = (x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull; + } else { + res.w[1] = x_sign | x_exp; + } + res.w[0] = 0x0000000000000000ull; + BID_RETURN (res); +} + // x is not special and is not zero + + // if (exp <= -(p+1)) return 0 +if (x_exp <= 0x2ffa000000000000ull) { // 0x2ffa000000000000ull == -35 + res.w[1] = x_sign | 0x3040000000000000ull; + res.w[0] = 0x0000000000000000ull; + BID_RETURN (res); +} + // q = nr. of decimal digits in x + // determine first the nr. of bits in x +if (C1.w[1] == 0) { + if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53 + // split the 64-bit value in two 32-bit halves to avoid rounding errors + if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32 + tmp1.d = (double) (C1.w[0] >> 32); // exact conversion + x_nr_bits = + 33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); + } else { // x < 2^32 + tmp1.d = (double) (C1.w[0]); // exact conversion + x_nr_bits = + 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); + } + } else { // if x < 2^53 + tmp1.d = (double) C1.w[0]; // exact conversion + x_nr_bits = + 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); + } +} else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1]) + tmp1.d = (double) C1.w[1]; // exact conversion + x_nr_bits = + 65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); +} + +q = nr_digits[x_nr_bits - 1].digits; +if (q == 0) { + q = nr_digits[x_nr_bits - 1].digits1; + if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi + || (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi && + C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo)) + q++; +} +exp = (x_exp >> 49) - 6176; +if (exp >= 0) { // -exp <= 0 + // the argument is an integer already + res.w[1] = x.w[1]; + res.w[0] = x.w[0]; + BID_RETURN (res); +} else if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q + // need to shift right -exp digits from the coefficient; the exp will be 0 + ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x' + // chop off ind digits from the lower part of C1 + // C1 = C1 + 1/2 * 10^x where the result C1 fits in 127 bits + tmp64 = C1.w[0]; + if (ind <= 19) { + C1.w[0] = C1.w[0] + midpoint64[ind - 1]; + } else { + C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0]; + C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1]; + } + if (C1.w[0] < tmp64) + C1.w[1]++; + // calculate C* and f* + // C* is actually floor(C*) in this case + // C* and f* need shifting and masking, as shown by + // shiftright128[] and maskhigh128[] + // 1 <= x <= 34 + // kx = 10^(-x) = ten2mk128[ind - 1] + // C* = (C1 + 1/2 * 10^x) * 10^(-x) + // the approximation of 10^(-x) was rounded up to 118 bits + __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]); + // determine the value of res and fstar + if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 + // redundant shift = shiftright128[ind - 1]; // shift = 0 + res.w[1] = P256.w[3]; + res.w[0] = P256.w[2]; + // redundant fstar.w[3] = 0; + // redundant fstar.w[2] = 0; + // redundant fstar.w[1] = P256.w[1]; + // redundant fstar.w[0] = P256.w[0]; + // fraction f* < 10^(-x) <=> midpoint + // f* is in the right position to be compared with + // 10^(-x) from ten2mk128[] + // if 0 < fstar < 10^(-x), subtract 1 if odd (for rounding to even) + if ((res.w[0] & 0x0000000000000001ull) && // is result odd? + ((P256.w[1] < (ten2mk128[ind - 1].w[1])) + || ((P256.w[1] == ten2mk128[ind - 1].w[1]) + && (P256.w[0] < ten2mk128[ind - 1].w[0])))) { + // subract 1 to make even + if (res.w[0]-- == 0) { + res.w[1]--; + } + } + } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 + shift = shiftright128[ind - 1]; // 3 <= shift <= 63 + res.w[1] = (P256.w[3] >> shift); + res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift); + // redundant fstar.w[3] = 0; + fstar.w[2] = P256.w[2] & maskhigh128[ind - 1]; + fstar.w[1] = P256.w[1]; + fstar.w[0] = P256.w[0]; + // fraction f* < 10^(-x) <=> midpoint + // f* is in the right position to be compared with + // 10^(-x) from ten2mk128[] + if ((res.w[0] & 0x0000000000000001ull) && // is result odd? + fstar.w[2] == 0 && (fstar.w[1] < ten2mk128[ind - 1].w[1] || + (fstar.w[1] == ten2mk128[ind - 1].w[1] && + fstar.w[0] < ten2mk128[ind - 1].w[0]))) { + // subract 1 to make even + if (res.w[0]-- == 0) { + res.w[1]--; + } + } + } else { // 22 <= ind - 1 <= 33 + shift = shiftright128[ind - 1] - 64; // 2 <= shift <= 38 + res.w[1] = 0; + res.w[0] = P256.w[3] >> shift; + fstar.w[3] = P256.w[3] & maskhigh128[ind - 1]; + fstar.w[2] = P256.w[2]; + fstar.w[1] = P256.w[1]; + fstar.w[0] = P256.w[0]; + // fraction f* < 10^(-x) <=> midpoint + // f* is in the right position to be compared with + // 10^(-x) from ten2mk128[] + if ((res.w[0] & 0x0000000000000001ull) && // is result odd? + fstar.w[3] == 0 && fstar.w[2] == 0 + && (fstar.w[1] < ten2mk128[ind - 1].w[1] + || (fstar.w[1] == ten2mk128[ind - 1].w[1] + && fstar.w[0] < ten2mk128[ind - 1].w[0]))) { + // subract 1 to make even + if (res.w[0]-- == 0) { + res.w[1]--; + } + } + } + res.w[1] = x_sign | 0x3040000000000000ull | res.w[1]; + BID_RETURN (res); +} else { // if ((q + exp) < 0) <=> q < -exp + // the result is +0 or -0 + res.w[1] = x_sign | 0x3040000000000000ull; + res.w[0] = 0x0000000000000000ull; + BID_RETURN (res); +} +} + +/***************************************************************************** + * BID128_round_integral_negative + ****************************************************************************/ + +BID128_FUNCTION_ARG1_NORND (bid128_round_integral_negative, x) + + UINT128 res; + UINT64 x_sign; + UINT64 x_exp; + int exp; // unbiased exponent + // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo + // (all are UINT64) + BID_UI64DOUBLE tmp1; + unsigned int x_nr_bits; + int q, ind, shift; + UINT128 C1; + // UINT128 res is C* at first - represents up to 34 decimal digits ~ + // 113 bits + UINT256 fstar; + UINT256 P256; + + // check for NaN or Infinity +if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) { + // x is special +if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN + // if x = NaN, then res = Q (x) + // check first for non-canonical NaN payload + if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) || + (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) && + (x.w[0] > 0x38c15b09ffffffffull))) { + x.w[1] = x.w[1] & 0xffffc00000000000ull; + x.w[0] = 0x0ull; + } + if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN + // set invalid flag + *pfpsf |= INVALID_EXCEPTION; + // return quiet (x) + res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16] + res.w[0] = x.w[0]; + } else { // x is QNaN + // return x + res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16] + res.w[0] = x.w[0]; + } + BID_RETURN (res) +} else { // x is not a NaN, so it must be infinity + if ((x.w[1] & MASK_SIGN) == 0x0ull) { // x is +inf + // return +inf + res.w[1] = 0x7800000000000000ull; + res.w[0] = 0x0000000000000000ull; + } else { // x is -inf + // return -inf + res.w[1] = 0xf800000000000000ull; + res.w[0] = 0x0000000000000000ull; + } + BID_RETURN (res); +} +} + // unpack x +x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative +C1.w[1] = x.w[1] & MASK_COEFF; +C1.w[0] = x.w[0]; + + // check for non-canonical values (treated as zero) +if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11 + // non-canonical + x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits + C1.w[1] = 0; // significand high + C1.w[0] = 0; // significand low +} else { // G0_G1 != 11 + x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits + if (C1.w[1] > 0x0001ed09bead87c0ull || + (C1.w[1] == 0x0001ed09bead87c0ull + && C1.w[0] > 0x378d8e63ffffffffull)) { + // x is non-canonical if coefficient is larger than 10^34 -1 + C1.w[1] = 0; + C1.w[0] = 0; + } else { // canonical + ; + } +} + + // test for input equal to zero +if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) { + // x is 0 + // return 0 preserving the sign bit and the preferred exponent + // of MAX(Q(x), 0) + if (x_exp <= (0x1820ull << 49)) { + res.w[1] = (x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull; + } else { + res.w[1] = x_sign | x_exp; + } + res.w[0] = 0x0000000000000000ull; + BID_RETURN (res); +} + // x is not special and is not zero + + // if (exp <= -p) return -1.0 or +0.0 +if (x_exp <= 0x2ffc000000000000ull) { // 0x2ffc000000000000ull == -34 + if (x_sign) { + // if negative, return negative 1, because we know the coefficient + // is non-zero (would have been caught above) + res.w[1] = 0xb040000000000000ull; + res.w[0] = 0x0000000000000001ull; + } else { + // if positive, return positive 0, because we know coefficient is + // non-zero (would have been caught above) + res.w[1] = 0x3040000000000000ull; + res.w[0] = 0x0000000000000000ull; + } + BID_RETURN (res); +} + // q = nr. of decimal digits in x + // determine first the nr. of bits in x +if (C1.w[1] == 0) { + if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53 + // split the 64-bit value in two 32-bit halves to avoid rounding errors + if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32 + tmp1.d = (double) (C1.w[0] >> 32); // exact conversion + x_nr_bits = + 33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); + } else { // x < 2^32 + tmp1.d = (double) (C1.w[0]); // exact conversion + x_nr_bits = + 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); + } + } else { // if x < 2^53 + tmp1.d = (double) C1.w[0]; // exact conversion + x_nr_bits = + 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); + } +} else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1]) + tmp1.d = (double) C1.w[1]; // exact conversion + x_nr_bits = + 65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); +} + +q = nr_digits[x_nr_bits - 1].digits; +if (q == 0) { + q = nr_digits[x_nr_bits - 1].digits1; + if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi || + (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi && + C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo)) + q++; +} +exp = (x_exp >> 49) - 6176; +if (exp >= 0) { // -exp <= 0 + // the argument is an integer already + res.w[1] = x.w[1]; + res.w[0] = x.w[0]; + BID_RETURN (res); +} else if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q + // need to shift right -exp digits from the coefficient; the exp will be 0 + ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x' + // (number of digits to be chopped off) + // chop off ind digits from the lower part of C1 + // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate + // FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP + // FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE + // FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE + //tmp64 = C1.w[0]; + // if (ind <= 19) { + // C1.w[0] = C1.w[0] + midpoint64[ind - 1]; + // } else { + // C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0]; + // C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1]; + // } + // if (C1.w[0] < tmp64) C1.w[1]++; + // if carry-out from C1.w[0], increment C1.w[1] + // calculate C* and f* + // C* is actually floor(C*) in this case + // C* and f* need shifting and masking, as shown by + // shiftright128[] and maskhigh128[] + // 1 <= x <= 34 + // kx = 10^(-x) = ten2mk128[ind - 1] + // C* = (C1 + 1/2 * 10^x) * 10^(-x) + // the approximation of 10^(-x) was rounded up to 118 bits + __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]); + if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 + res.w[1] = P256.w[3]; + res.w[0] = P256.w[2]; + // if positive, the truncated value is already the correct result + if (x_sign) { // if negative + // redundant fstar.w[3] = 0; + // redundant fstar.w[2] = 0; + // redundant fstar.w[1] = P256.w[1]; + // redundant fstar.w[0] = P256.w[0]; + // fraction f* > 10^(-x) <=> inexact + // f* is in the right position to be compared with + // 10^(-x) from ten2mk128[] + if ((P256.w[1] > ten2mk128[ind - 1].w[1]) + || (P256.w[1] == ten2mk128[ind - 1].w[1] + && (P256.w[0] >= ten2mk128[ind - 1].w[0]))) { + if (++res.w[0] == 0) { + res.w[1]++; + } + } + } + } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 + shift = shiftright128[ind - 1]; // 0 <= shift <= 102 + res.w[1] = (P256.w[3] >> shift); + res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift); + // if positive, the truncated value is already the correct result + if (x_sign) { // if negative + // redundant fstar.w[3] = 0; + fstar.w[2] = P256.w[2] & maskhigh128[ind - 1]; + fstar.w[1] = P256.w[1]; + fstar.w[0] = P256.w[0]; + // fraction f* > 10^(-x) <=> inexact + // f* is in the right position to be compared with + // 10^(-x) from ten2mk128[] + if (fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1] || + (fstar.w[1] == ten2mk128[ind - 1].w[1] && + fstar.w[0] >= ten2mk128[ind - 1].w[0])) { + if (++res.w[0] == 0) { + res.w[1]++; + } + } + } + } else { // 22 <= ind - 1 <= 33 + shift = shiftright128[ind - 1] - 64; // 2 <= shift <= 38 + res.w[1] = 0; + res.w[0] = P256.w[3] >> shift; + // if positive, the truncated value is already the correct result + if (x_sign) { // if negative + fstar.w[3] = P256.w[3] & maskhigh128[ind - 1]; + fstar.w[2] = P256.w[2]; + fstar.w[1] = P256.w[1]; + fstar.w[0] = P256.w[0]; + // fraction f* > 10^(-x) <=> inexact + // f* is in the right position to be compared with + // 10^(-x) from ten2mk128[] + if (fstar.w[3] || fstar.w[2] + || fstar.w[1] > ten2mk128[ind - 1].w[1] + || (fstar.w[1] == ten2mk128[ind - 1].w[1] + && fstar.w[0] >= ten2mk128[ind - 1].w[0])) { + if (++res.w[0] == 0) { + res.w[1]++; + } + } + } + } + res.w[1] = x_sign | 0x3040000000000000ull | res.w[1]; + BID_RETURN (res); +} else { // if exp < 0 and q + exp <= 0 + if (x_sign) { // negative rounds down to -1.0 + res.w[1] = 0xb040000000000000ull; + res.w[0] = 0x0000000000000001ull; + } else { // positive rpunds down to +0.0 + res.w[1] = 0x3040000000000000ull; + res.w[0] = 0x0000000000000000ull; + } + BID_RETURN (res); +} +} + +/***************************************************************************** + * BID128_round_integral_positive + ****************************************************************************/ + +BID128_FUNCTION_ARG1_NORND (bid128_round_integral_positive, x) + + UINT128 res; + UINT64 x_sign; + UINT64 x_exp; + int exp; // unbiased exponent + // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo + // (all are UINT64) + BID_UI64DOUBLE tmp1; + unsigned int x_nr_bits; + int q, ind, shift; + UINT128 C1; + // UINT128 res is C* at first - represents up to 34 decimal digits ~ + // 113 bits + UINT256 fstar; + UINT256 P256; + + // check for NaN or Infinity +if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) { + // x is special +if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN + // if x = NaN, then res = Q (x) + // check first for non-canonical NaN payload + if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) || + (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) && + (x.w[0] > 0x38c15b09ffffffffull))) { + x.w[1] = x.w[1] & 0xffffc00000000000ull; + x.w[0] = 0x0ull; + } + if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN + // set invalid flag + *pfpsf |= INVALID_EXCEPTION; + // return quiet (x) + res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16] + res.w[0] = x.w[0]; + } else { // x is QNaN + // return x + res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16] + res.w[0] = x.w[0]; + } + BID_RETURN (res) +} else { // x is not a NaN, so it must be infinity + if ((x.w[1] & MASK_SIGN) == 0x0ull) { // x is +inf + // return +inf + res.w[1] = 0x7800000000000000ull; + res.w[0] = 0x0000000000000000ull; + } else { // x is -inf + // return -inf + res.w[1] = 0xf800000000000000ull; + res.w[0] = 0x0000000000000000ull; + } + BID_RETURN (res); +} +} + // unpack x +x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative +C1.w[1] = x.w[1] & MASK_COEFF; +C1.w[0] = x.w[0]; + + // check for non-canonical values (treated as zero) +if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11 + // non-canonical + x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits + C1.w[1] = 0; // significand high + C1.w[0] = 0; // significand low +} else { // G0_G1 != 11 + x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits + if (C1.w[1] > 0x0001ed09bead87c0ull || + (C1.w[1] == 0x0001ed09bead87c0ull + && C1.w[0] > 0x378d8e63ffffffffull)) { + // x is non-canonical if coefficient is larger than 10^34 -1 + C1.w[1] = 0; + C1.w[0] = 0; + } else { // canonical + ; + } +} + + // test for input equal to zero +if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) { + // x is 0 + // return 0 preserving the sign bit and the preferred exponent + // of MAX(Q(x), 0) + if (x_exp <= (0x1820ull << 49)) { + res.w[1] = (x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull; + } else { + res.w[1] = x_sign | x_exp; + } + res.w[0] = 0x0000000000000000ull; + BID_RETURN (res); +} + // x is not special and is not zero + + // if (exp <= -p) return -0.0 or +1.0 +if (x_exp <= 0x2ffc000000000000ull) { // 0x2ffc000000000000ull == -34 + if (x_sign) { + // if negative, return negative 0, because we know the coefficient + // is non-zero (would have been caught above) + res.w[1] = 0xb040000000000000ull; + res.w[0] = 0x0000000000000000ull; + } else { + // if positive, return positive 1, because we know coefficient is + // non-zero (would have been caught above) + res.w[1] = 0x3040000000000000ull; + res.w[0] = 0x0000000000000001ull; + } + BID_RETURN (res); +} + // q = nr. of decimal digits in x + // determine first the nr. of bits in x +if (C1.w[1] == 0) { + if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53 + // split 64-bit value in two 32-bit halves to avoid rounding errors + if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32 + tmp1.d = (double) (C1.w[0] >> 32); // exact conversion + x_nr_bits = + 33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); + } else { // x < 2^32 + tmp1.d = (double) (C1.w[0]); // exact conversion + x_nr_bits = + 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); + } + } else { // if x < 2^53 + tmp1.d = (double) C1.w[0]; // exact conversion + x_nr_bits = + 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); + } +} else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1]) + tmp1.d = (double) C1.w[1]; // exact conversion + x_nr_bits = + 65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); +} + +q = nr_digits[x_nr_bits - 1].digits; +if (q == 0) { + q = nr_digits[x_nr_bits - 1].digits1; + if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi || + (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi && + C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo)) + q++; +} +exp = (x_exp >> 49) - 6176; +if (exp >= 0) { // -exp <= 0 + // the argument is an integer already + res.w[1] = x.w[1]; + res.w[0] = x.w[0]; + BID_RETURN (res); +} else if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q + // need to shift right -exp digits from the coefficient; exp will be 0 + ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x' + // (number of digits to be chopped off) + // chop off ind digits from the lower part of C1 + // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate + // FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP + // FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE + // FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE + // tmp64 = C1.w[0]; + // if (ind <= 19) { + // C1.w[0] = C1.w[0] + midpoint64[ind - 1]; + // } else { + // C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0]; + // C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1]; + // } + // if (C1.w[0] < tmp64) C1.w[1]++; + // if carry-out from C1.w[0], increment C1.w[1] + // calculate C* and f* + // C* is actually floor(C*) in this case + // C* and f* need shifting and masking, as shown by + // shiftright128[] and maskhigh128[] + // 1 <= x <= 34 + // kx = 10^(-x) = ten2mk128[ind - 1] + // C* = C1 * 10^(-x) + // the approximation of 10^(-x) was rounded up to 118 bits + __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]); + if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 + res.w[1] = P256.w[3]; + res.w[0] = P256.w[2]; + // if negative, the truncated value is already the correct result + if (!x_sign) { // if positive + // redundant fstar.w[3] = 0; + // redundant fstar.w[2] = 0; + // redundant fstar.w[1] = P256.w[1]; + // redundant fstar.w[0] = P256.w[0]; + // fraction f* > 10^(-x) <=> inexact + // f* is in the right position to be compared with + // 10^(-x) from ten2mk128[] + if ((P256.w[1] > ten2mk128[ind - 1].w[1]) + || (P256.w[1] == ten2mk128[ind - 1].w[1] + && (P256.w[0] >= ten2mk128[ind - 1].w[0]))) { + if (++res.w[0] == 0) { + res.w[1]++; + } + } + } + } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 + shift = shiftright128[ind - 1]; // 3 <= shift <= 63 + res.w[1] = (P256.w[3] >> shift); + res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift); + // if negative, the truncated value is already the correct result + if (!x_sign) { // if positive + // redundant fstar.w[3] = 0; + fstar.w[2] = P256.w[2] & maskhigh128[ind - 1]; + fstar.w[1] = P256.w[1]; + fstar.w[0] = P256.w[0]; + // fraction f* > 10^(-x) <=> inexact + // f* is in the right position to be compared with + // 10^(-x) from ten2mk128[] + if (fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1] || + (fstar.w[1] == ten2mk128[ind - 1].w[1] && + fstar.w[0] >= ten2mk128[ind - 1].w[0])) { + if (++res.w[0] == 0) { + res.w[1]++; + } + } + } + } else { // 22 <= ind - 1 <= 33 + shift = shiftright128[ind - 1] - 64; // 2 <= shift <= 38 + res.w[1] = 0; + res.w[0] = P256.w[3] >> shift; + // if negative, the truncated value is already the correct result + if (!x_sign) { // if positive + fstar.w[3] = P256.w[3] & maskhigh128[ind - 1]; + fstar.w[2] = P256.w[2]; + fstar.w[1] = P256.w[1]; + fstar.w[0] = P256.w[0]; + // fraction f* > 10^(-x) <=> inexact + // f* is in the right position to be compared with + // 10^(-x) from ten2mk128[] + if (fstar.w[3] || fstar.w[2] + || fstar.w[1] > ten2mk128[ind - 1].w[1] + || (fstar.w[1] == ten2mk128[ind - 1].w[1] + && fstar.w[0] >= ten2mk128[ind - 1].w[0])) { + if (++res.w[0] == 0) { + res.w[1]++; + } + } + } + } + res.w[1] = x_sign | 0x3040000000000000ull | res.w[1]; + BID_RETURN (res); +} else { // if exp < 0 and q + exp <= 0 + if (x_sign) { // negative rounds up to -0.0 + res.w[1] = 0xb040000000000000ull; + res.w[0] = 0x0000000000000000ull; + } else { // positive rpunds up to +1.0 + res.w[1] = 0x3040000000000000ull; + res.w[0] = 0x0000000000000001ull; + } + BID_RETURN (res); +} +} + +/***************************************************************************** + * BID128_round_integral_zero + ****************************************************************************/ + +BID128_FUNCTION_ARG1_NORND (bid128_round_integral_zero, x) + + UINT128 res; + UINT64 x_sign; + UINT64 x_exp; + int exp; // unbiased exponent + // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo + // (all are UINT64) + BID_UI64DOUBLE tmp1; + unsigned int x_nr_bits; + int q, ind, shift; + UINT128 C1; + // UINT128 res is C* at first - represents up to 34 decimal digits ~ + // 113 bits + UINT256 P256; + + // check for NaN or Infinity +if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) { + // x is special +if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN + // if x = NaN, then res = Q (x) + // check first for non-canonical NaN payload + if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) || + (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) && + (x.w[0] > 0x38c15b09ffffffffull))) { + x.w[1] = x.w[1] & 0xffffc00000000000ull; + x.w[0] = 0x0ull; + } + if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN + // set invalid flag + *pfpsf |= INVALID_EXCEPTION; + // return quiet (x) + res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16] + res.w[0] = x.w[0]; + } else { // x is QNaN + // return x + res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16] + res.w[0] = x.w[0]; + } + BID_RETURN (res) +} else { // x is not a NaN, so it must be infinity + if ((x.w[1] & MASK_SIGN) == 0x0ull) { // x is +inf + // return +inf + res.w[1] = 0x7800000000000000ull; + res.w[0] = 0x0000000000000000ull; + } else { // x is -inf + // return -inf + res.w[1] = 0xf800000000000000ull; + res.w[0] = 0x0000000000000000ull; + } + BID_RETURN (res); +} +} + // unpack x +x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative +C1.w[1] = x.w[1] & MASK_COEFF; +C1.w[0] = x.w[0]; + + // check for non-canonical values (treated as zero) +if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11 + // non-canonical + x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits + C1.w[1] = 0; // significand high + C1.w[0] = 0; // significand low +} else { // G0_G1 != 11 + x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits + if (C1.w[1] > 0x0001ed09bead87c0ull || + (C1.w[1] == 0x0001ed09bead87c0ull + && C1.w[0] > 0x378d8e63ffffffffull)) { + // x is non-canonical if coefficient is larger than 10^34 -1 + C1.w[1] = 0; + C1.w[0] = 0; + } else { // canonical + ; + } +} + + // test for input equal to zero +if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) { + // x is 0 + // return 0 preserving the sign bit and the preferred exponent + // of MAX(Q(x), 0) + if (x_exp <= (0x1820ull << 49)) { + res.w[1] = (x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull; + } else { + res.w[1] = x_sign | x_exp; + } + res.w[0] = 0x0000000000000000ull; + BID_RETURN (res); +} + // x is not special and is not zero + + // if (exp <= -p) return -0.0 or +0.0 +if (x_exp <= 0x2ffc000000000000ull) { // 0x2ffc000000000000ull == -34 + res.w[1] = x_sign | 0x3040000000000000ull; + res.w[0] = 0x0000000000000000ull; + BID_RETURN (res); +} + // q = nr. of decimal digits in x + // determine first the nr. of bits in x +if (C1.w[1] == 0) { + if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53 + // split the 64-bit value in two 32-bit halves to avoid rounding errors + if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32 + tmp1.d = (double) (C1.w[0] >> 32); // exact conversion + x_nr_bits = + 33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); + } else { // x < 2^32 + tmp1.d = (double) (C1.w[0]); // exact conversion + x_nr_bits = + 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); + } + } else { // if x < 2^53 + tmp1.d = (double) C1.w[0]; // exact conversion + x_nr_bits = + 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); + } +} else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1]) + tmp1.d = (double) C1.w[1]; // exact conversion + x_nr_bits = + 65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); +} + +q = nr_digits[x_nr_bits - 1].digits; +if (q == 0) { + q = nr_digits[x_nr_bits - 1].digits1; + if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi || + (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi && + C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo)) + q++; +} +exp = (x_exp >> 49) - 6176; +if (exp >= 0) { // -exp <= 0 + // the argument is an integer already + res.w[1] = x.w[1]; + res.w[0] = x.w[0]; + BID_RETURN (res); +} else if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q + // need to shift right -exp digits from the coefficient; the exp will be 0 + ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x' + // (number of digits to be chopped off) + // chop off ind digits from the lower part of C1 + // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate + // FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP + // FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE + // FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE + //tmp64 = C1.w[0]; + // if (ind <= 19) { + // C1.w[0] = C1.w[0] + midpoint64[ind - 1]; + // } else { + // C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0]; + // C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1]; + // } + // if (C1.w[0] < tmp64) C1.w[1]++; + // if carry-out from C1.w[0], increment C1.w[1] + // calculate C* and f* + // C* is actually floor(C*) in this case + // C* and f* need shifting and masking, as shown by + // shiftright128[] and maskhigh128[] + // 1 <= x <= 34 + // kx = 10^(-x) = ten2mk128[ind - 1] + // C* = (C1 + 1/2 * 10^x) * 10^(-x) + // the approximation of 10^(-x) was rounded up to 118 bits + __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]); + if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 + res.w[1] = P256.w[3]; + res.w[0] = P256.w[2]; + } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 + shift = shiftright128[ind - 1]; // 3 <= shift <= 63 + res.w[1] = (P256.w[3] >> shift); + res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift); + } else { // 22 <= ind - 1 <= 33 + shift = shiftright128[ind - 1] - 64; // 2 <= shift <= 38 + res.w[1] = 0; + res.w[0] = P256.w[3] >> shift; + } + res.w[1] = x_sign | 0x3040000000000000ull | res.w[1]; + BID_RETURN (res); +} else { // if exp < 0 and q + exp <= 0 the result is +0 or -0 + res.w[1] = x_sign | 0x3040000000000000ull; + res.w[0] = 0x0000000000000000ull; + BID_RETURN (res); +} +} + +/***************************************************************************** + * BID128_round_integral_nearest_away + ****************************************************************************/ + +BID128_FUNCTION_ARG1_NORND (bid128_round_integral_nearest_away, x) + + UINT128 res; + UINT64 x_sign; + UINT64 x_exp; + int exp; // unbiased exponent + // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo + // (all are UINT64) + UINT64 tmp64; + BID_UI64DOUBLE tmp1; + unsigned int x_nr_bits; + int q, ind, shift; + UINT128 C1; + // UINT128 res is C* at first - represents up to 34 decimal digits ~ + // 113 bits + // UINT256 fstar; + UINT256 P256; + + // check for NaN or Infinity +if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) { + // x is special +if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN + // if x = NaN, then res = Q (x) + // check first for non-canonical NaN payload + if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) || + (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) && + (x.w[0] > 0x38c15b09ffffffffull))) { + x.w[1] = x.w[1] & 0xffffc00000000000ull; + x.w[0] = 0x0ull; + } + if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN + // set invalid flag + *pfpsf |= INVALID_EXCEPTION; + // return quiet (x) + res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16] + res.w[0] = x.w[0]; + } else { // x is QNaN + // return x + res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16] + res.w[0] = x.w[0]; + } + BID_RETURN (res) +} else { // x is not a NaN, so it must be infinity + if ((x.w[1] & MASK_SIGN) == 0x0ull) { // x is +inf + // return +inf + res.w[1] = 0x7800000000000000ull; + res.w[0] = 0x0000000000000000ull; + } else { // x is -inf + // return -inf + res.w[1] = 0xf800000000000000ull; + res.w[0] = 0x0000000000000000ull; + } + BID_RETURN (res); +} +} + // unpack x +x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative +C1.w[1] = x.w[1] & MASK_COEFF; +C1.w[0] = x.w[0]; + + // check for non-canonical values (treated as zero) +if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11 + // non-canonical + x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits + C1.w[1] = 0; // significand high + C1.w[0] = 0; // significand low +} else { // G0_G1 != 11 + x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits + if (C1.w[1] > 0x0001ed09bead87c0ull || + (C1.w[1] == 0x0001ed09bead87c0ull + && C1.w[0] > 0x378d8e63ffffffffull)) { + // x is non-canonical if coefficient is larger than 10^34 -1 + C1.w[1] = 0; + C1.w[0] = 0; + } else { // canonical + ; + } +} + + // test for input equal to zero +if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) { + // x is 0 + // return 0 preserving the sign bit and the preferred exponent + // of MAX(Q(x), 0) + if (x_exp <= (0x1820ull << 49)) { + res.w[1] = (x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull; + } else { + res.w[1] = x_sign | x_exp; + } + res.w[0] = 0x0000000000000000ull; + BID_RETURN (res); +} + // x is not special and is not zero + + // if (exp <= -(p+1)) return 0.0 +if (x_exp <= 0x2ffa000000000000ull) { // 0x2ffa000000000000ull == -35 + res.w[1] = x_sign | 0x3040000000000000ull; + res.w[0] = 0x0000000000000000ull; + BID_RETURN (res); +} + // q = nr. of decimal digits in x + // determine first the nr. of bits in x +if (C1.w[1] == 0) { + if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53 + // split the 64-bit value in two 32-bit halves to avoid rounding errors + if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32 + tmp1.d = (double) (C1.w[0] >> 32); // exact conversion + x_nr_bits = + 33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); + } else { // x < 2^32 + tmp1.d = (double) (C1.w[0]); // exact conversion + x_nr_bits = + 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); + } + } else { // if x < 2^53 + tmp1.d = (double) C1.w[0]; // exact conversion + x_nr_bits = + 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); + } +} else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1]) + tmp1.d = (double) C1.w[1]; // exact conversion + x_nr_bits = + 65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); +} + +q = nr_digits[x_nr_bits - 1].digits; +if (q == 0) { + q = nr_digits[x_nr_bits - 1].digits1; + if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi || + (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi && + C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo)) + q++; +} +exp = (x_exp >> 49) - 6176; +if (exp >= 0) { // -exp <= 0 + // the argument is an integer already + res.w[1] = x.w[1]; + res.w[0] = x.w[0]; + BID_RETURN (res); +} else if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q + // need to shift right -exp digits from the coefficient; the exp will be 0 + ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x' + // chop off ind digits from the lower part of C1 + // C1 = C1 + 1/2 * 10^x where the result C1 fits in 127 bits + tmp64 = C1.w[0]; + if (ind <= 19) { + C1.w[0] = C1.w[0] + midpoint64[ind - 1]; + } else { + C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0]; + C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1]; + } + if (C1.w[0] < tmp64) + C1.w[1]++; + // calculate C* and f* + // C* is actually floor(C*) in this case + // C* and f* need shifting and masking, as shown by + // shiftright128[] and maskhigh128[] + // 1 <= x <= 34 + // kx = 10^(-x) = ten2mk128[ind - 1] + // C* = (C1 + 1/2 * 10^x) * 10^(-x) + // the approximation of 10^(-x) was rounded up to 118 bits + __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]); + // the top Ex bits of 10^(-x) are T* = ten2mk128trunc[ind], e.g. + // if x=1, T*=ten2mk128trunc[0]=0x19999999999999999999999999999999 + // if (0 < f* < 10^(-x)) then the result is a midpoint + // if floor(C*) is even then C* = floor(C*) - logical right + // shift; C* has p decimal digits, correct by Prop. 1) + // else if floor(C*) is odd C* = floor(C*)-1 (logical right + // shift; C* has p decimal digits, correct by Pr. 1) + // else + // C* = floor(C*) (logical right shift; C has p decimal digits, + // correct by Property 1) + // n = C* * 10^(e+x) + + // shift right C* by Ex-128 = shiftright128[ind] + if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 + res.w[1] = P256.w[3]; + res.w[0] = P256.w[2]; + } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 + shift = shiftright128[ind - 1]; // 3 <= shift <= 63 + res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift); + res.w[1] = (P256.w[3] >> shift); + } else { // 22 <= ind - 1 <= 33 + shift = shiftright128[ind - 1]; // 2 <= shift <= 38 + res.w[1] = 0; + res.w[0] = (P256.w[3] >> (shift - 64)); // 2 <= shift - 64 <= 38 + } + // if the result was a midpoint, it was already rounded away from zero + res.w[1] |= x_sign | 0x3040000000000000ull; + BID_RETURN (res); +} else { // if ((q + exp) < 0) <=> q < -exp + // the result is +0 or -0 + res.w[1] = x_sign | 0x3040000000000000ull; + res.w[0] = 0x0000000000000000ull; + BID_RETURN (res); +} +}