Imported gcc-4.4.3

[msp430-gcc.git] / libgcc / config / libbid / bid128_round_integral.c
diff --git a/libgcc/config/libbid/bid128_round_integral.c b/libgcc/config/libbid/bid128_round_integral.c

new file mode 100644 (file)

index 0000000..6efcd47
--- /dev/null
+++ b/libgcc/config/libbid/bid128_round_integral.c
@@ -0,0 +1,1951 @@
+/* Copyright (C) 2007, 2009  Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#define BID_128RES
+
+#include "bid_internal.h"
+
+/*****************************************************************************
+ *  BID128_round_integral_exact
+ ****************************************************************************/
+
+BID128_FUNCTION_ARG1 (bid128_round_integral_exact, x)
+
+     UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull}
+     };
+UINT64 x_sign;
+UINT64 x_exp;
+int exp;                       // unbiased exponent
+  // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64)
+UINT64 tmp64;
+BID_UI64DOUBLE tmp1;
+unsigned int x_nr_bits;
+int q, ind, shift;
+UINT128 C1;
+UINT256 fstar;
+UINT256 P256;
+
+  // check for NaN or Infinity
+if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) {
+  // x is special
+  if ((x.w[1] & MASK_NAN) == MASK_NAN) {       // x is NAN
+    // if x = NaN, then res = Q (x)
+    // check first for non-canonical NaN payload
+    if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
+       (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
+        (x.w[0] > 0x38c15b09ffffffffull))) {
+      x.w[1] = x.w[1] & 0xffffc00000000000ull;
+      x.w[0] = 0x0ull;
+    }
+    if ((x.w[1] & MASK_SNAN) == MASK_SNAN) {   // x is SNAN
+      // set invalid flag
+      *pfpsf |= INVALID_EXCEPTION;
+      // return quiet (x)
+      res.w[1] = x.w[1] & 0xfc003fffffffffffull;       // clear out also G[6]-G[16]
+      res.w[0] = x.w[0];
+    } else {   // x is QNaN
+      // return x
+      res.w[1] = x.w[1] & 0xfc003fffffffffffull;       // clear out G[6]-G[16]
+      res.w[0] = x.w[0];
+    }
+    BID_RETURN (res)
+  } else {     // x is not a NaN, so it must be infinity
+    if ((x.w[1] & MASK_SIGN) == 0x0ull) {      // x is +inf
+      // return +inf
+      res.w[1] = 0x7800000000000000ull;
+      res.w[0] = 0x0000000000000000ull;
+    } else {   // x is -inf 
+      // return -inf
+      res.w[1] = 0xf800000000000000ull;
+      res.w[0] = 0x0000000000000000ull;
+    }
+    BID_RETURN (res);
+  }
+}
+  // unpack x
+x_sign = x.w[1] & MASK_SIGN;   // 0 for positive, MASK_SIGN for negative
+C1.w[1] = x.w[1] & MASK_COEFF;
+C1.w[0] = x.w[0];
+
+  // check for non-canonical values (treated as zero)
+if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) {       // G0_G1=11
+  // non-canonical
+  x_exp = (x.w[1] << 2) & MASK_EXP;    // biased and shifted left 49 bits
+  C1.w[1] = 0; // significand high
+  C1.w[0] = 0; // significand low
+} else {       // G0_G1 != 11
+  x_exp = x.w[1] & MASK_EXP;   // biased and shifted left 49 bits
+  if (C1.w[1] > 0x0001ed09bead87c0ull ||
+      (C1.w[1] == 0x0001ed09bead87c0ull
+       && C1.w[0] > 0x378d8e63ffffffffull)) {
+    // x is non-canonical if coefficient is larger than 10^34 -1
+    C1.w[1] = 0;
+    C1.w[0] = 0;
+  } else {     // canonical
+    ;
+  }
+}
+
+  // test for input equal to zero
+if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) {
+  // x is 0
+  // return 0 preserving the sign bit and the preferred exponent
+  // of MAX(Q(x), 0)
+  if (x_exp <= (0x1820ull << 49)) {
+    res.w[1] = (x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull;
+  } else {
+    res.w[1] = x_sign | x_exp;
+  }
+  res.w[0] = 0x0000000000000000ull;
+  BID_RETURN (res);
+}
+  // x is not special and is not zero
+
+switch (rnd_mode) {
+case ROUNDING_TO_NEAREST:
+case ROUNDING_TIES_AWAY:
+  // if (exp <= -(p+1)) return 0.0
+  if (x_exp <= 0x2ffa000000000000ull) {        // 0x2ffa000000000000ull == -35
+    res.w[1] = x_sign | 0x3040000000000000ull;
+    res.w[0] = 0x0000000000000000ull;
+    *pfpsf |= INEXACT_EXCEPTION;
+    BID_RETURN (res);
+  }
+  break;
+case ROUNDING_DOWN:
+  // if (exp <= -p) return -1.0 or +0.0
+  if (x_exp <= 0x2ffc000000000000ull) {        // 0x2ffa000000000000ull == -34
+    if (x_sign) {
+      // if negative, return negative 1, because we know coefficient
+      // is non-zero (would have been caught above)
+      res.w[1] = 0xb040000000000000ull;
+      res.w[0] = 0x0000000000000001ull;
+    } else {
+      // if positive, return positive 0, because we know coefficient is
+      // non-zero (would have been caught above)
+      res.w[1] = 0x3040000000000000ull;
+      res.w[0] = 0x0000000000000000ull;
+    }
+    *pfpsf |= INEXACT_EXCEPTION;
+    BID_RETURN (res);
+  }
+  break;
+case ROUNDING_UP:
+  // if (exp <= -p) return -0.0 or +1.0
+  if (x_exp <= 0x2ffc000000000000ull) {        // 0x2ffc000000000000ull == -34
+    if (x_sign) {
+      // if negative, return negative 0, because we know the coefficient
+      // is non-zero (would have been caught above)
+      res.w[1] = 0xb040000000000000ull;
+      res.w[0] = 0x0000000000000000ull;
+    } else {
+      // if positive, return positive 1, because we know coefficient is
+      // non-zero (would have been caught above)
+      res.w[1] = 0x3040000000000000ull;
+      res.w[0] = 0x0000000000000001ull;
+    }
+    *pfpsf |= INEXACT_EXCEPTION;
+    BID_RETURN (res);
+  }
+  break;
+case ROUNDING_TO_ZERO:
+  // if (exp <= -p) return -0.0 or +0.0
+  if (x_exp <= 0x2ffc000000000000ull) {        // 0x2ffc000000000000ull == -34
+    res.w[1] = x_sign | 0x3040000000000000ull;
+    res.w[0] = 0x0000000000000000ull;
+    *pfpsf |= INEXACT_EXCEPTION;
+    BID_RETURN (res);
+  }
+  break;
+}
+
+  // q = nr. of decimal digits in x
+  //  determine first the nr. of bits in x
+if (C1.w[1] == 0) {
+  if (C1.w[0] >= 0x0020000000000000ull) {      // x >= 2^53
+    // split the 64-bit value in two 32-bit halves to avoid rounding errors
+    if (C1.w[0] >= 0x0000000100000000ull) {    // x >= 2^32
+      tmp1.d = (double) (C1.w[0] >> 32);       // exact conversion
+      x_nr_bits =
+       33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
+    } else {   // x < 2^32
+      tmp1.d = (double) (C1.w[0]);     // exact conversion
+      x_nr_bits =
+       1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
+    }
+  } else {     // if x < 2^53
+    tmp1.d = (double) C1.w[0]; // exact conversion
+    x_nr_bits =
+      1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
+  }
+} else {       // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
+  tmp1.d = (double) C1.w[1];   // exact conversion
+  x_nr_bits =
+    65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
+}
+
+q = nr_digits[x_nr_bits - 1].digits;
+if (q == 0) {
+  q = nr_digits[x_nr_bits - 1].digits1;
+  if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi ||
+      (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi &&
+       C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo))
+    q++;
+}
+exp = (x_exp >> 49) - 6176;
+if (exp >= 0) {        // -exp <= 0
+  // the argument is an integer already
+  res.w[1] = x.w[1];
+  res.w[0] = x.w[0];
+  BID_RETURN (res);
+}
+  // exp < 0
+switch (rnd_mode) {
+case ROUNDING_TO_NEAREST:
+  if ((q + exp) >= 0) {        // exp < 0 and 1 <= -exp <= q
+    // need to shift right -exp digits from the coefficient; exp will be 0
+    ind = -exp;        // 1 <= ind <= 34; ind is a synonym for 'x'
+    // chop off ind digits from the lower part of C1 
+    // C1 = C1 + 1/2 * 10^x where the result C1 fits in 127 bits
+    tmp64 = C1.w[0];
+    if (ind <= 19) {
+      C1.w[0] = C1.w[0] + midpoint64[ind - 1];
+    } else {
+      C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
+      C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
+    }
+    if (C1.w[0] < tmp64)
+      C1.w[1]++;
+    // calculate C* and f*
+    // C* is actually floor(C*) in this case
+    // C* and f* need shifting and masking, as shown by
+    // shiftright128[] and maskhigh128[]
+    // 1 <= x <= 34
+    // kx = 10^(-x) = ten2mk128[ind - 1]
+    // C* = (C1 + 1/2 * 10^x) * 10^(-x)
+    // the approximation of 10^(-x) was rounded up to 118 bits
+    __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]);
+    // determine the value of res and fstar
+
+    // determine inexactness of the rounding of C*
+    // if (0 < f* - 1/2 < 10^(-x)) then
+    //   the result is exact
+    // else // if (f* - 1/2 > T*) then
+    //   the result is inexact
+    // Note: we are going to use ten2mk128[] instead of ten2mk128trunc[]
+
+    if (ind - 1 <= 2) {        // 0 <= ind - 1 <= 2 => shift = 0
+      // redundant shift = shiftright128[ind - 1]; // shift = 0
+      res.w[1] = P256.w[3];
+      res.w[0] = P256.w[2];
+      // redundant fstar.w[3] = 0;
+      // redundant fstar.w[2] = 0;
+      fstar.w[1] = P256.w[1];
+      fstar.w[0] = P256.w[0];
+      // fraction f* < 10^(-x) <=> midpoint
+      // f* is in the right position to be compared with
+      // 10^(-x) from ten2mk128[]
+      // if 0 < fstar < 10^(-x), subtract 1 if odd (for rounding to even)
+      if ((res.w[0] & 0x0000000000000001ull) &&        // is result odd?
+         ((fstar.w[1] < (ten2mk128[ind - 1].w[1]))
+          || ((fstar.w[1] == ten2mk128[ind - 1].w[1])
+              && (fstar.w[0] < ten2mk128[ind - 1].w[0])))) {
+       // subract 1 to make even
+       if (res.w[0]-- == 0) {
+         res.w[1]--;
+       }
+      }
+      if (fstar.w[1] > 0x8000000000000000ull ||
+         (fstar.w[1] == 0x8000000000000000ull
+          && fstar.w[0] > 0x0ull)) {
+       // f* > 1/2 and the result may be exact
+       tmp64 = fstar.w[1] - 0x8000000000000000ull;     // f* - 1/2
+       if (tmp64 > ten2mk128[ind - 1].w[1] ||
+           (tmp64 == ten2mk128[ind - 1].w[1] &&
+            fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
+         // set the inexact flag
+         *pfpsf |= INEXACT_EXCEPTION;
+       }       // else the result is exact 
+      } else { // the result is inexact; f2* <= 1/2  
+       // set the inexact flag 
+       *pfpsf |= INEXACT_EXCEPTION;
+      }
+    } else if (ind - 1 <= 21) {        // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
+      shift = shiftright128[ind - 1];  // 3 <= shift <= 63
+      res.w[1] = (P256.w[3] >> shift);
+      res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
+      // redundant fstar.w[3] = 0;
+      fstar.w[2] = P256.w[2] & maskhigh128[ind - 1];
+      fstar.w[1] = P256.w[1];
+      fstar.w[0] = P256.w[0];
+      // fraction f* < 10^(-x) <=> midpoint
+      // f* is in the right position to be compared with
+      // 10^(-x) from ten2mk128[]
+      if ((res.w[0] & 0x0000000000000001ull) &&        // is result odd?
+         fstar.w[2] == 0 && (fstar.w[1] < ten2mk128[ind - 1].w[1] ||
+                             (fstar.w[1] == ten2mk128[ind - 1].w[1] &&
+                              fstar.w[0] < ten2mk128[ind - 1].w[0]))) {
+       // subract 1 to make even
+       if (res.w[0]-- == 0) {
+         res.w[1]--;
+       }
+      }
+      if (fstar.w[2] > onehalf128[ind - 1] ||
+         (fstar.w[2] == onehalf128[ind - 1]
+          && (fstar.w[1] || fstar.w[0]))) {
+       // f2* > 1/2 and the result may be exact
+       // Calculate f2* - 1/2
+       tmp64 = fstar.w[2] - onehalf128[ind - 1];
+       if (tmp64 || fstar.w[1] > ten2mk128[ind - 1].w[1] ||
+           (fstar.w[1] == ten2mk128[ind - 1].w[1] &&
+            fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
+         // set the inexact flag
+         *pfpsf |= INEXACT_EXCEPTION;
+       }       // else the result is exact
+      } else { // the result is inexact; f2* <= 1/2
+       // set the inexact flag
+       *pfpsf |= INEXACT_EXCEPTION;
+      }
+    } else {   // 22 <= ind - 1 <= 33
+      shift = shiftright128[ind - 1] - 64;     // 2 <= shift <= 38
+      res.w[1] = 0;
+      res.w[0] = P256.w[3] >> shift;
+      fstar.w[3] = P256.w[3] & maskhigh128[ind - 1];
+      fstar.w[2] = P256.w[2];
+      fstar.w[1] = P256.w[1];
+      fstar.w[0] = P256.w[0];
+      // fraction f* < 10^(-x) <=> midpoint
+      // f* is in the right position to be compared with
+      // 10^(-x) from ten2mk128[]
+      if ((res.w[0] & 0x0000000000000001ull) &&        // is result odd?
+         fstar.w[3] == 0 && fstar.w[2] == 0 &&
+         (fstar.w[1] < ten2mk128[ind - 1].w[1] ||
+          (fstar.w[1] == ten2mk128[ind - 1].w[1] &&
+           fstar.w[0] < ten2mk128[ind - 1].w[0]))) {
+       // subract 1 to make even
+       if (res.w[0]-- == 0) {
+         res.w[1]--;
+       }
+      }
+      if (fstar.w[3] > onehalf128[ind - 1] ||
+         (fstar.w[3] == onehalf128[ind - 1] &&
+          (fstar.w[2] || fstar.w[1] || fstar.w[0]))) {
+       // f2* > 1/2 and the result may be exact
+       // Calculate f2* - 1/2
+       tmp64 = fstar.w[3] - onehalf128[ind - 1];
+       if (tmp64 || fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1]
+           || (fstar.w[1] == ten2mk128[ind - 1].w[1]
+               && fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
+         // set the inexact flag
+         *pfpsf |= INEXACT_EXCEPTION;
+       }       // else the result is exact
+      } else { // the result is inexact; f2* <= 1/2
+       // set the inexact flag
+       *pfpsf |= INEXACT_EXCEPTION;
+      }
+    }
+    res.w[1] = x_sign | 0x3040000000000000ull | res.w[1];
+    BID_RETURN (res);
+  } else {     // if ((q + exp) < 0) <=> q < -exp
+    // the result is +0 or -0
+    res.w[1] = x_sign | 0x3040000000000000ull;
+    res.w[0] = 0x0000000000000000ull;
+    *pfpsf |= INEXACT_EXCEPTION;
+    BID_RETURN (res);
+  }
+  break;
+case ROUNDING_TIES_AWAY:
+  if ((q + exp) >= 0) {        // exp < 0 and 1 <= -exp <= q
+    // need to shift right -exp digits from the coefficient; exp will be 0
+    ind = -exp;        // 1 <= ind <= 34; ind is a synonym for 'x'
+    // chop off ind digits from the lower part of C1 
+    // C1 = C1 + 1/2 * 10^x where the result C1 fits in 127 bits
+    tmp64 = C1.w[0];
+    if (ind <= 19) {
+      C1.w[0] = C1.w[0] + midpoint64[ind - 1];
+    } else {
+      C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
+      C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
+    }
+    if (C1.w[0] < tmp64)
+      C1.w[1]++;
+    // calculate C* and f*
+    // C* is actually floor(C*) in this case
+    // C* and f* need shifting and masking, as shown by
+    // shiftright128[] and maskhigh128[]
+    // 1 <= x <= 34
+    // kx = 10^(-x) = ten2mk128[ind - 1]
+    // C* = (C1 + 1/2 * 10^x) * 10^(-x)
+    // the approximation of 10^(-x) was rounded up to 118 bits
+    __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]);
+    // the top Ex bits of 10^(-x) are T* = ten2mk128trunc[ind], e.g.
+    // if x=1, T*=ten2mk128trunc[0]=0x19999999999999999999999999999999
+    // if (0 < f* < 10^(-x)) then the result is a midpoint
+    //   if floor(C*) is even then C* = floor(C*) - logical right
+    //       shift; C* has p decimal digits, correct by Prop. 1)
+    //   else if floor(C*) is odd C* = floor(C*)-1 (logical right
+    //       shift; C* has p decimal digits, correct by Pr. 1)
+    // else
+    //   C* = floor(C*) (logical right shift; C has p decimal digits,
+    //       correct by Property 1)
+    // n = C* * 10^(e+x)
+
+    // determine also the inexactness of the rounding of C*
+    // if (0 < f* - 1/2 < 10^(-x)) then
+    //   the result is exact
+    // else // if (f* - 1/2 > T*) then
+    //   the result is inexact
+    // Note: we are going to use ten2mk128[] instead of ten2mk128trunc[]
+    // shift right C* by Ex-128 = shiftright128[ind]
+    if (ind - 1 <= 2) {        // 0 <= ind - 1 <= 2 => shift = 0
+      // redundant shift = shiftright128[ind - 1]; // shift = 0
+      res.w[1] = P256.w[3];
+      res.w[0] = P256.w[2];
+      // redundant fstar.w[3] = 0;
+      // redundant fstar.w[2] = 0;
+      fstar.w[1] = P256.w[1];
+      fstar.w[0] = P256.w[0];
+      if (fstar.w[1] > 0x8000000000000000ull ||
+         (fstar.w[1] == 0x8000000000000000ull
+          && fstar.w[0] > 0x0ull)) {
+       // f* > 1/2 and the result may be exact
+       tmp64 = fstar.w[1] - 0x8000000000000000ull;     // f* - 1/2
+       if ((tmp64 > ten2mk128[ind - 1].w[1] ||
+            (tmp64 == ten2mk128[ind - 1].w[1] &&
+             fstar.w[0] >= ten2mk128[ind - 1].w[0]))) {
+         // set the inexact flag
+         *pfpsf |= INEXACT_EXCEPTION;
+       }       // else the result is exact
+      } else { // the result is inexact; f2* <= 1/2
+       // set the inexact flag
+       *pfpsf |= INEXACT_EXCEPTION;
+      }
+    } else if (ind - 1 <= 21) {        // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
+      shift = shiftright128[ind - 1];  // 3 <= shift <= 63
+      res.w[1] = (P256.w[3] >> shift);
+      res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
+      // redundant fstar.w[3] = 0;
+      fstar.w[2] = P256.w[2] & maskhigh128[ind - 1];
+      fstar.w[1] = P256.w[1];
+      fstar.w[0] = P256.w[0];
+      if (fstar.w[2] > onehalf128[ind - 1] ||
+         (fstar.w[2] == onehalf128[ind - 1]
+          && (fstar.w[1] || fstar.w[0]))) {
+       // f2* > 1/2 and the result may be exact
+       // Calculate f2* - 1/2
+       tmp64 = fstar.w[2] - onehalf128[ind - 1];
+       if (tmp64 || fstar.w[1] > ten2mk128[ind - 1].w[1] ||
+           (fstar.w[1] == ten2mk128[ind - 1].w[1] &&
+            fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
+         // set the inexact flag
+         *pfpsf |= INEXACT_EXCEPTION;
+       }       // else the result is exact
+      } else { // the result is inexact; f2* <= 1/2
+       // set the inexact flag
+       *pfpsf |= INEXACT_EXCEPTION;
+      }
+    } else {   // 22 <= ind - 1 <= 33
+      shift = shiftright128[ind - 1] - 64;     // 2 <= shift <= 38
+      res.w[1] = 0;
+      res.w[0] = P256.w[3] >> shift;
+      fstar.w[3] = P256.w[3] & maskhigh128[ind - 1];
+      fstar.w[2] = P256.w[2];
+      fstar.w[1] = P256.w[1];
+      fstar.w[0] = P256.w[0];
+      if (fstar.w[3] > onehalf128[ind - 1] ||
+         (fstar.w[3] == onehalf128[ind - 1] &&
+          (fstar.w[2] || fstar.w[1] || fstar.w[0]))) {
+       // f2* > 1/2 and the result may be exact
+       // Calculate f2* - 1/2
+       tmp64 = fstar.w[3] - onehalf128[ind - 1];
+       if (tmp64 || fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1]
+           || (fstar.w[1] == ten2mk128[ind - 1].w[1]
+               && fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
+         // set the inexact flag
+         *pfpsf |= INEXACT_EXCEPTION;
+       }       // else the result is exact
+      } else { // the result is inexact; f2* <= 1/2
+       // set the inexact flag
+       *pfpsf |= INEXACT_EXCEPTION;
+      }
+    }
+    // if the result was a midpoint, it was already rounded away from zero
+    res.w[1] |= x_sign | 0x3040000000000000ull;
+    BID_RETURN (res);
+  } else {     // if ((q + exp) < 0) <=> q < -exp
+    // the result is +0 or -0
+    res.w[1] = x_sign | 0x3040000000000000ull;
+    res.w[0] = 0x0000000000000000ull;
+    *pfpsf |= INEXACT_EXCEPTION;
+    BID_RETURN (res);
+  }
+  break;
+case ROUNDING_DOWN:
+  if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q
+    // need to shift right -exp digits from the coefficient; exp will be 0
+    ind = -exp;        // 1 <= ind <= 34; ind is a synonym for 'x' 
+    // (number of digits to be chopped off)
+    // chop off ind digits from the lower part of C1 
+    // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
+    // FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP
+    // FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE
+    // FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE
+    // tmp64 = C1.w[0];
+    // if (ind <= 19) {
+    //   C1.w[0] = C1.w[0] + midpoint64[ind - 1];
+    // } else {
+    //   C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
+    //   C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
+    // }
+    // if (C1.w[0] < tmp64) C1.w[1]++;
+    // if carry-out from C1.w[0], increment C1.w[1]
+    // calculate C* and f*
+    // C* is actually floor(C*) in this case
+    // C* and f* need shifting and masking, as shown by
+    // shiftright128[] and maskhigh128[]
+    // 1 <= x <= 34
+    // kx = 10^(-x) = ten2mk128[ind - 1]
+    // C* = (C1 + 1/2 * 10^x) * 10^(-x)
+    // the approximation of 10^(-x) was rounded up to 118 bits
+    __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]);
+    if (ind - 1 <= 2) {        // 0 <= ind - 1 <= 2 => shift = 0
+      res.w[1] = P256.w[3];
+      res.w[0] = P256.w[2];
+      // redundant fstar.w[3] = 0;
+      // redundant fstar.w[2] = 0;
+      // redundant fstar.w[1] = P256.w[1];
+      // redundant fstar.w[0] = P256.w[0];
+      // fraction f* > 10^(-x) <=> inexact
+      // f* is in the right position to be compared with
+      // 10^(-x) from ten2mk128[]
+      if ((P256.w[1] > ten2mk128[ind - 1].w[1])
+         || (P256.w[1] == ten2mk128[ind - 1].w[1]
+             && (P256.w[0] >= ten2mk128[ind - 1].w[0]))) {
+       *pfpsf |= INEXACT_EXCEPTION;
+       // if positive, the truncated value is already the correct result
+       if (x_sign) {   // if negative
+         if (++res.w[0] == 0) {
+           res.w[1]++;
+         }
+       }
+      }
+    } else if (ind - 1 <= 21) {        // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
+      shift = shiftright128[ind - 1];  // 0 <= shift <= 102
+      res.w[1] = (P256.w[3] >> shift);
+      res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
+      // redundant fstar.w[3] = 0;
+      fstar.w[2] = P256.w[2] & maskhigh128[ind - 1];
+      fstar.w[1] = P256.w[1];
+      fstar.w[0] = P256.w[0];
+      // fraction f* > 10^(-x) <=> inexact
+      // f* is in the right position to be compared with
+      // 10^(-x) from ten2mk128[]
+      if (fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1] ||
+         (fstar.w[1] == ten2mk128[ind - 1].w[1] &&
+          fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
+       *pfpsf |= INEXACT_EXCEPTION;
+       // if positive, the truncated value is already the correct result
+       if (x_sign) {   // if negative
+         if (++res.w[0] == 0) {
+           res.w[1]++;
+         }
+       }
+      }
+    } else {   // 22 <= ind - 1 <= 33
+      shift = shiftright128[ind - 1] - 64;     // 2 <= shift <= 38
+      res.w[1] = 0;
+      res.w[0] = P256.w[3] >> shift;
+      fstar.w[3] = P256.w[3] & maskhigh128[ind - 1];
+      fstar.w[2] = P256.w[2];
+      fstar.w[1] = P256.w[1];
+      fstar.w[0] = P256.w[0];
+      // fraction f* > 10^(-x) <=> inexact
+      // f* is in the right position to be compared with
+      // 10^(-x) from ten2mk128[]
+      if (fstar.w[3] || fstar.w[2]
+         || fstar.w[1] > ten2mk128[ind - 1].w[1]
+         || (fstar.w[1] == ten2mk128[ind - 1].w[1]
+             && fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
+       *pfpsf |= INEXACT_EXCEPTION;
+       // if positive, the truncated value is already the correct result
+       if (x_sign) {   // if negative
+         if (++res.w[0] == 0) {
+           res.w[1]++;
+         }
+       }
+      }
+    }
+    res.w[1] = x_sign | 0x3040000000000000ull | res.w[1];
+    BID_RETURN (res);
+  } else {     // if exp < 0 and q + exp <= 0
+    if (x_sign) {      // negative rounds down to -1.0
+      res.w[1] = 0xb040000000000000ull;
+      res.w[0] = 0x0000000000000001ull;
+    } else {   // positive rpunds down to +0.0
+      res.w[1] = 0x3040000000000000ull;
+      res.w[0] = 0x0000000000000000ull;
+    }
+    *pfpsf |= INEXACT_EXCEPTION;
+    BID_RETURN (res);
+  }
+  break;
+case ROUNDING_UP:
+  if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q
+    // need to shift right -exp digits from the coefficient; exp will be 0
+    ind = -exp;        // 1 <= ind <= 34; ind is a synonym for 'x' 
+    // (number of digits to be chopped off)
+    // chop off ind digits from the lower part of C1 
+    // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
+    // FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP
+    // FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE
+    // FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE
+    // tmp64 = C1.w[0];
+    // if (ind <= 19) {
+    //   C1.w[0] = C1.w[0] + midpoint64[ind - 1];
+    // } else {
+    //   C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
+    //   C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
+    // }
+    // if (C1.w[0] < tmp64) C1.w[1]++;  
+    // if carry-out from C1.w[0], increment C1.w[1]
+    // calculate C* and f*
+    // C* is actually floor(C*) in this case
+    // C* and f* need shifting and masking, as shown by
+    // shiftright128[] and maskhigh128[]
+    // 1 <= x <= 34
+    // kx = 10^(-x) = ten2mk128[ind - 1]
+    // C* = C1 * 10^(-x)
+    // the approximation of 10^(-x) was rounded up to 118 bits
+    __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]);
+    if (ind - 1 <= 2) {        // 0 <= ind - 1 <= 2 => shift = 0
+      res.w[1] = P256.w[3];
+      res.w[0] = P256.w[2];
+      // redundant fstar.w[3] = 0;
+      // redundant fstar.w[2] = 0;
+      // redundant fstar.w[1] = P256.w[1]; 
+      // redundant fstar.w[0] = P256.w[0];
+      // fraction f* > 10^(-x) <=> inexact
+      // f* is in the right position to be compared with 
+      // 10^(-x) from ten2mk128[]
+      if ((P256.w[1] > ten2mk128[ind - 1].w[1])
+         || (P256.w[1] == ten2mk128[ind - 1].w[1]
+             && (P256.w[0] >= ten2mk128[ind - 1].w[0]))) {
+       *pfpsf |= INEXACT_EXCEPTION;
+       // if negative, the truncated value is already the correct result
+       if (!x_sign) {  // if positive
+         if (++res.w[0] == 0) {
+           res.w[1]++;
+         }
+       }
+      }
+    } else if (ind - 1 <= 21) {        // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
+      shift = shiftright128[ind - 1];  // 3 <= shift <= 63
+      res.w[1] = (P256.w[3] >> shift);
+      res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
+      // redundant fstar.w[3] = 0;
+      fstar.w[2] = P256.w[2] & maskhigh128[ind - 1];
+      fstar.w[1] = P256.w[1];
+      fstar.w[0] = P256.w[0];
+      // fraction f* > 10^(-x) <=> inexact
+      // f* is in the right position to be compared with 
+      // 10^(-x) from ten2mk128[]
+      if (fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1] ||
+         (fstar.w[1] == ten2mk128[ind - 1].w[1] &&
+          fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
+       *pfpsf |= INEXACT_EXCEPTION;
+       // if negative, the truncated value is already the correct result
+       if (!x_sign) {  // if positive
+         if (++res.w[0] == 0) {
+           res.w[1]++;
+         }
+       }
+      }
+    } else {   // 22 <= ind - 1 <= 33
+      shift = shiftright128[ind - 1] - 64;     // 2 <= shift <= 38
+      res.w[1] = 0;
+      res.w[0] = P256.w[3] >> shift;
+      fstar.w[3] = P256.w[3] & maskhigh128[ind - 1];
+      fstar.w[2] = P256.w[2];
+      fstar.w[1] = P256.w[1];
+      fstar.w[0] = P256.w[0];
+      // fraction f* > 10^(-x) <=> inexact
+      // f* is in the right position to be compared with 
+      // 10^(-x) from ten2mk128[]
+      if (fstar.w[3] || fstar.w[2]
+         || fstar.w[1] > ten2mk128[ind - 1].w[1]
+         || (fstar.w[1] == ten2mk128[ind - 1].w[1]
+             && fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
+       *pfpsf |= INEXACT_EXCEPTION;
+       // if negative, the truncated value is already the correct result
+       if (!x_sign) {  // if positive
+         if (++res.w[0] == 0) {
+           res.w[1]++;
+         }
+       }
+      }
+    }
+    res.w[1] = x_sign | 0x3040000000000000ull | res.w[1];
+    BID_RETURN (res);
+  } else {     // if exp < 0 and q + exp <= 0
+    if (x_sign) {      // negative rounds up to -0.0
+      res.w[1] = 0xb040000000000000ull;
+      res.w[0] = 0x0000000000000000ull;
+    } else {   // positive rpunds up to +1.0
+      res.w[1] = 0x3040000000000000ull;
+      res.w[0] = 0x0000000000000001ull;
+    }
+    *pfpsf |= INEXACT_EXCEPTION;
+    BID_RETURN (res);
+  }
+  break;
+case ROUNDING_TO_ZERO:
+  if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q
+    // need to shift right -exp digits from the coefficient; exp will be 0
+    ind = -exp;        // 1 <= ind <= 34; ind is a synonym for 'x'
+    // (number of digits to be chopped off)
+    // chop off ind digits from the lower part of C1 
+    // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
+    // FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP
+    // FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE
+    // FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE
+    //tmp64 = C1.w[0];
+    // if (ind <= 19) {
+    //   C1.w[0] = C1.w[0] + midpoint64[ind - 1];
+    // } else {
+    //   C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
+    //   C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
+    // }
+    // if (C1.w[0] < tmp64) C1.w[1]++;  
+    // if carry-out from C1.w[0], increment C1.w[1]
+    // calculate C* and f*
+    // C* is actually floor(C*) in this case
+    // C* and f* need shifting and masking, as shown by
+    // shiftright128[] and maskhigh128[]
+    // 1 <= x <= 34
+    // kx = 10^(-x) = ten2mk128[ind - 1]
+    // C* = (C1 + 1/2 * 10^x) * 10^(-x)
+    // the approximation of 10^(-x) was rounded up to 118 bits
+    __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]);
+    if (ind - 1 <= 2) {        // 0 <= ind - 1 <= 2 => shift = 0
+      res.w[1] = P256.w[3];
+      res.w[0] = P256.w[2];
+      // redundant fstar.w[3] = 0;
+      // redundant fstar.w[2] = 0;
+      // redundant fstar.w[1] = P256.w[1]; 
+      // redundant fstar.w[0] = P256.w[0];
+      // fraction f* > 10^(-x) <=> inexact
+      // f* is in the right position to be compared with 
+      // 10^(-x) from ten2mk128[]
+      if ((P256.w[1] > ten2mk128[ind - 1].w[1])
+         || (P256.w[1] == ten2mk128[ind - 1].w[1]
+             && (P256.w[0] >= ten2mk128[ind - 1].w[0]))) {
+       *pfpsf |= INEXACT_EXCEPTION;
+      }
+    } else if (ind - 1 <= 21) {        // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
+      shift = shiftright128[ind - 1];  // 3 <= shift <= 63
+      res.w[1] = (P256.w[3] >> shift);
+      res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
+      // redundant fstar.w[3] = 0;
+      fstar.w[2] = P256.w[2] & maskhigh128[ind - 1];
+      fstar.w[1] = P256.w[1];
+      fstar.w[0] = P256.w[0];
+      // fraction f* > 10^(-x) <=> inexact
+      // f* is in the right position to be compared with 
+      // 10^(-x) from ten2mk128[]
+      if (fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1] ||
+         (fstar.w[1] == ten2mk128[ind - 1].w[1] &&
+          fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
+       *pfpsf |= INEXACT_EXCEPTION;
+      }
+    } else {   // 22 <= ind - 1 <= 33
+      shift = shiftright128[ind - 1] - 64;     // 2 <= shift <= 38
+      res.w[1] = 0;
+      res.w[0] = P256.w[3] >> shift;
+      fstar.w[3] = P256.w[3] & maskhigh128[ind - 1];
+      fstar.w[2] = P256.w[2];
+      fstar.w[1] = P256.w[1];
+      fstar.w[0] = P256.w[0];
+      // fraction f* > 10^(-x) <=> inexact
+      // f* is in the right position to be compared with 
+      // 10^(-x) from ten2mk128[]
+      if (fstar.w[3] || fstar.w[2]
+         || fstar.w[1] > ten2mk128[ind - 1].w[1]
+         || (fstar.w[1] == ten2mk128[ind - 1].w[1]
+             && fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
+       *pfpsf |= INEXACT_EXCEPTION;
+      }
+    }
+    res.w[1] = x_sign | 0x3040000000000000ull | res.w[1];
+    BID_RETURN (res);
+  } else {     // if exp < 0 and q + exp <= 0 the result is +0 or -0
+    res.w[1] = x_sign | 0x3040000000000000ull;
+    res.w[0] = 0x0000000000000000ull;
+    *pfpsf |= INEXACT_EXCEPTION;
+    BID_RETURN (res);
+  }
+  break;
+}
+
+BID_RETURN (res);
+}
+
+/*****************************************************************************
+ *  BID128_round_integral_nearest_even
+ ****************************************************************************/
+
+BID128_FUNCTION_ARG1_NORND (bid128_round_integral_nearest_even, x)
+
+     UINT128 res;
+     UINT64 x_sign;
+     UINT64 x_exp;
+     int exp;                  // unbiased exponent
+  // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64)
+     UINT64 tmp64;
+     BID_UI64DOUBLE tmp1;
+     unsigned int x_nr_bits;
+     int q, ind, shift;
+     UINT128 C1;
+  // UINT128 res is C* at first - represents up to 34 decimal digits ~ 113 bits
+     UINT256 fstar;
+     UINT256 P256;
+
+  // check for NaN or Infinity
+if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) {
+    // x is special
+if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN
+  // if x = NaN, then res = Q (x)
+  // check first for non-canonical NaN payload
+  if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
+      (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
+       (x.w[0] > 0x38c15b09ffffffffull))) {
+    x.w[1] = x.w[1] & 0xffffc00000000000ull;
+    x.w[0] = 0x0ull;
+  }
+  if ((x.w[1] & MASK_SNAN) == MASK_SNAN) {     // x is SNAN
+    // set invalid flag
+    *pfpsf |= INVALID_EXCEPTION;
+    // return quiet (x)
+    res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16]
+    res.w[0] = x.w[0];
+  } else {     // x is QNaN
+    // return x
+    res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16]
+    res.w[0] = x.w[0];
+  }
+  BID_RETURN (res)
+} else {       // x is not a NaN, so it must be infinity
+  if ((x.w[1] & MASK_SIGN) == 0x0ull) {        // x is +inf
+    // return +inf
+    res.w[1] = 0x7800000000000000ull;
+    res.w[0] = 0x0000000000000000ull;
+  } else {     // x is -inf 
+    // return -inf
+    res.w[1] = 0xf800000000000000ull;
+    res.w[0] = 0x0000000000000000ull;
+  }
+  BID_RETURN (res);
+}
+}
+  // unpack x
+x_sign = x.w[1] & MASK_SIGN;   // 0 for positive, MASK_SIGN for negative
+C1.w[1] = x.w[1] & MASK_COEFF;
+C1.w[0] = x.w[0];
+
+  // check for non-canonical values (treated as zero)
+if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) {       // G0_G1=11
+  // non-canonical
+  x_exp = (x.w[1] << 2) & MASK_EXP;    // biased and shifted left 49 bits
+  C1.w[1] = 0; // significand high
+  C1.w[0] = 0; // significand low
+} else {       // G0_G1 != 11
+  x_exp = x.w[1] & MASK_EXP;   // biased and shifted left 49 bits
+  if (C1.w[1] > 0x0001ed09bead87c0ull ||
+      (C1.w[1] == 0x0001ed09bead87c0ull
+       && C1.w[0] > 0x378d8e63ffffffffull)) {
+    // x is non-canonical if coefficient is larger than 10^34 -1
+    C1.w[1] = 0;
+    C1.w[0] = 0;
+  } else {     // canonical
+    ;
+  }
+}
+
+  // test for input equal to zero
+if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) {
+  // x is 0
+  // return 0 preserving the sign bit and the preferred exponent
+  // of MAX(Q(x), 0)
+  if (x_exp <= (0x1820ull << 49)) {
+    res.w[1] = (x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull;
+  } else {
+    res.w[1] = x_sign | x_exp;
+  }
+  res.w[0] = 0x0000000000000000ull;
+  BID_RETURN (res);
+}
+  // x is not special and is not zero
+
+  // if (exp <= -(p+1)) return 0
+if (x_exp <= 0x2ffa000000000000ull) {  // 0x2ffa000000000000ull == -35
+  res.w[1] = x_sign | 0x3040000000000000ull;
+  res.w[0] = 0x0000000000000000ull;
+  BID_RETURN (res);
+}
+  // q = nr. of decimal digits in x
+  //  determine first the nr. of bits in x
+if (C1.w[1] == 0) {
+  if (C1.w[0] >= 0x0020000000000000ull) {      // x >= 2^53
+    // split the 64-bit value in two 32-bit halves to avoid rounding errors
+    if (C1.w[0] >= 0x0000000100000000ull) {    // x >= 2^32
+      tmp1.d = (double) (C1.w[0] >> 32);       // exact conversion
+      x_nr_bits =
+       33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
+    } else {   // x < 2^32
+      tmp1.d = (double) (C1.w[0]);     // exact conversion
+      x_nr_bits =
+       1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
+    }
+  } else {     // if x < 2^53
+    tmp1.d = (double) C1.w[0]; // exact conversion
+    x_nr_bits =
+      1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
+  }
+} else {       // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
+  tmp1.d = (double) C1.w[1];   // exact conversion
+  x_nr_bits =
+    65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
+}
+
+q = nr_digits[x_nr_bits - 1].digits;
+if (q == 0) {
+  q = nr_digits[x_nr_bits - 1].digits1;
+  if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi
+      || (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi &&
+         C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo))
+    q++;
+}
+exp = (x_exp >> 49) - 6176;
+if (exp >= 0) {        // -exp <= 0
+  // the argument is an integer already
+  res.w[1] = x.w[1];
+  res.w[0] = x.w[0];
+  BID_RETURN (res);
+} else if ((q + exp) >= 0) {   // exp < 0 and 1 <= -exp <= q
+  // need to shift right -exp digits from the coefficient; the exp will be 0
+  ind = -exp;  // 1 <= ind <= 34; ind is a synonym for 'x'
+  // chop off ind digits from the lower part of C1 
+  // C1 = C1 + 1/2 * 10^x where the result C1 fits in 127 bits
+  tmp64 = C1.w[0];
+  if (ind <= 19) {
+    C1.w[0] = C1.w[0] + midpoint64[ind - 1];
+  } else {
+    C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
+    C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
+  }
+  if (C1.w[0] < tmp64)
+    C1.w[1]++;
+  // calculate C* and f*
+  // C* is actually floor(C*) in this case
+  // C* and f* need shifting and masking, as shown by
+  // shiftright128[] and maskhigh128[]
+  // 1 <= x <= 34
+  // kx = 10^(-x) = ten2mk128[ind - 1]
+  // C* = (C1 + 1/2 * 10^x) * 10^(-x)
+  // the approximation of 10^(-x) was rounded up to 118 bits
+  __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]);
+  // determine the value of res and fstar
+  if (ind - 1 <= 2) {  // 0 <= ind - 1 <= 2 => shift = 0
+    // redundant shift = shiftright128[ind - 1]; // shift = 0
+    res.w[1] = P256.w[3];
+    res.w[0] = P256.w[2];
+    // redundant fstar.w[3] = 0;
+    // redundant fstar.w[2] = 0;
+    // redundant fstar.w[1] = P256.w[1];
+    // redundant fstar.w[0] = P256.w[0];
+    // fraction f* < 10^(-x) <=> midpoint
+    // f* is in the right position to be compared with
+    // 10^(-x) from ten2mk128[]
+    // if 0 < fstar < 10^(-x), subtract 1 if odd (for rounding to even)
+    if ((res.w[0] & 0x0000000000000001ull) &&  // is result odd?
+       ((P256.w[1] < (ten2mk128[ind - 1].w[1]))
+        || ((P256.w[1] == ten2mk128[ind - 1].w[1])
+            && (P256.w[0] < ten2mk128[ind - 1].w[0])))) {
+      // subract 1 to make even
+      if (res.w[0]-- == 0) {
+       res.w[1]--;
+      }
+    }
+  } else if (ind - 1 <= 21) {  // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
+    shift = shiftright128[ind - 1];    // 3 <= shift <= 63
+    res.w[1] = (P256.w[3] >> shift);
+    res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
+    // redundant fstar.w[3] = 0;
+    fstar.w[2] = P256.w[2] & maskhigh128[ind - 1];
+    fstar.w[1] = P256.w[1];
+    fstar.w[0] = P256.w[0];
+    // fraction f* < 10^(-x) <=> midpoint
+    // f* is in the right position to be compared with
+    // 10^(-x) from ten2mk128[]
+    if ((res.w[0] & 0x0000000000000001ull) &&  // is result odd?
+       fstar.w[2] == 0 && (fstar.w[1] < ten2mk128[ind - 1].w[1] ||
+                           (fstar.w[1] == ten2mk128[ind - 1].w[1] &&
+                            fstar.w[0] < ten2mk128[ind - 1].w[0]))) {
+      // subract 1 to make even
+      if (res.w[0]-- == 0) {
+       res.w[1]--;
+      }
+    }
+  } else {     // 22 <= ind - 1 <= 33
+    shift = shiftright128[ind - 1] - 64;       // 2 <= shift <= 38
+    res.w[1] = 0;
+    res.w[0] = P256.w[3] >> shift;
+    fstar.w[3] = P256.w[3] & maskhigh128[ind - 1];
+    fstar.w[2] = P256.w[2];
+    fstar.w[1] = P256.w[1];
+    fstar.w[0] = P256.w[0];
+    // fraction f* < 10^(-x) <=> midpoint
+    // f* is in the right position to be compared with
+    // 10^(-x) from ten2mk128[]
+    if ((res.w[0] & 0x0000000000000001ull) &&  // is result odd?
+       fstar.w[3] == 0 && fstar.w[2] == 0
+       && (fstar.w[1] < ten2mk128[ind - 1].w[1]
+           || (fstar.w[1] == ten2mk128[ind - 1].w[1]
+               && fstar.w[0] < ten2mk128[ind - 1].w[0]))) {
+      // subract 1 to make even
+      if (res.w[0]-- == 0) {
+       res.w[1]--;
+      }
+    }
+  }
+  res.w[1] = x_sign | 0x3040000000000000ull | res.w[1];
+  BID_RETURN (res);
+} else {       // if ((q + exp) < 0) <=> q < -exp
+  // the result is +0 or -0
+  res.w[1] = x_sign | 0x3040000000000000ull;
+  res.w[0] = 0x0000000000000000ull;
+  BID_RETURN (res);
+}
+}
+
+/*****************************************************************************
+ *  BID128_round_integral_negative
+ ****************************************************************************/
+
+BID128_FUNCTION_ARG1_NORND (bid128_round_integral_negative, x)
+
+     UINT128 res;
+     UINT64 x_sign;
+     UINT64 x_exp;
+     int exp;                  // unbiased exponent
+  // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo 
+  // (all are UINT64)
+     BID_UI64DOUBLE tmp1;
+     unsigned int x_nr_bits;
+     int q, ind, shift;
+     UINT128 C1;
+  // UINT128 res is C* at first - represents up to 34 decimal digits ~ 
+  // 113 bits
+     UINT256 fstar;
+     UINT256 P256;
+
+  // check for NaN or Infinity
+if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) {
+    // x is special
+if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN
+  // if x = NaN, then res = Q (x)
+  // check first for non-canonical NaN payload
+  if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
+      (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
+       (x.w[0] > 0x38c15b09ffffffffull))) {
+    x.w[1] = x.w[1] & 0xffffc00000000000ull;
+    x.w[0] = 0x0ull;
+  }
+  if ((x.w[1] & MASK_SNAN) == MASK_SNAN) {     // x is SNAN
+    // set invalid flag
+    *pfpsf |= INVALID_EXCEPTION;
+    // return quiet (x)
+    res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16]
+    res.w[0] = x.w[0];
+  } else {     // x is QNaN
+    // return x
+    res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16]
+    res.w[0] = x.w[0];
+  }
+  BID_RETURN (res)
+} else {       // x is not a NaN, so it must be infinity
+  if ((x.w[1] & MASK_SIGN) == 0x0ull) {        // x is +inf
+    // return +inf
+    res.w[1] = 0x7800000000000000ull;
+    res.w[0] = 0x0000000000000000ull;
+  } else {     // x is -inf 
+    // return -inf
+    res.w[1] = 0xf800000000000000ull;
+    res.w[0] = 0x0000000000000000ull;
+  }
+  BID_RETURN (res);
+}
+}
+  // unpack x
+x_sign = x.w[1] & MASK_SIGN;   // 0 for positive, MASK_SIGN for negative
+C1.w[1] = x.w[1] & MASK_COEFF;
+C1.w[0] = x.w[0];
+
+  // check for non-canonical values (treated as zero)
+if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) {       // G0_G1=11
+  // non-canonical
+  x_exp = (x.w[1] << 2) & MASK_EXP;    // biased and shifted left 49 bits
+  C1.w[1] = 0; // significand high
+  C1.w[0] = 0; // significand low
+} else {       // G0_G1 != 11
+  x_exp = x.w[1] & MASK_EXP;   // biased and shifted left 49 bits
+  if (C1.w[1] > 0x0001ed09bead87c0ull ||
+      (C1.w[1] == 0x0001ed09bead87c0ull
+       && C1.w[0] > 0x378d8e63ffffffffull)) {
+    // x is non-canonical if coefficient is larger than 10^34 -1
+    C1.w[1] = 0;
+    C1.w[0] = 0;
+  } else {     // canonical
+    ;
+  }
+}
+
+  // test for input equal to zero
+if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) {
+  // x is 0
+  // return 0 preserving the sign bit and the preferred exponent
+  // of MAX(Q(x), 0)
+  if (x_exp <= (0x1820ull << 49)) {
+    res.w[1] = (x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull;
+  } else {
+    res.w[1] = x_sign | x_exp;
+  }
+  res.w[0] = 0x0000000000000000ull;
+  BID_RETURN (res);
+}
+  // x is not special and is not zero
+
+  // if (exp <= -p) return -1.0 or +0.0
+if (x_exp <= 0x2ffc000000000000ull) {  // 0x2ffc000000000000ull == -34
+  if (x_sign) {
+    // if negative, return negative 1, because we know the coefficient
+    // is non-zero (would have been caught above)
+    res.w[1] = 0xb040000000000000ull;
+    res.w[0] = 0x0000000000000001ull;
+  } else {
+    // if positive, return positive 0, because we know coefficient is
+    // non-zero (would have been caught above)
+    res.w[1] = 0x3040000000000000ull;
+    res.w[0] = 0x0000000000000000ull;
+  }
+  BID_RETURN (res);
+}
+  // q = nr. of decimal digits in x
+  // determine first the nr. of bits in x
+if (C1.w[1] == 0) {
+  if (C1.w[0] >= 0x0020000000000000ull) {      // x >= 2^53
+    // split the 64-bit value in two 32-bit halves to avoid rounding errors
+    if (C1.w[0] >= 0x0000000100000000ull) {    // x >= 2^32
+      tmp1.d = (double) (C1.w[0] >> 32);       // exact conversion
+      x_nr_bits =
+       33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
+    } else {   // x < 2^32
+      tmp1.d = (double) (C1.w[0]);     // exact conversion
+      x_nr_bits =
+       1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
+    }
+  } else {     // if x < 2^53
+    tmp1.d = (double) C1.w[0]; // exact conversion
+    x_nr_bits =
+      1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
+  }
+} else {       // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
+  tmp1.d = (double) C1.w[1];   // exact conversion
+  x_nr_bits =
+    65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
+}
+
+q = nr_digits[x_nr_bits - 1].digits;
+if (q == 0) {
+  q = nr_digits[x_nr_bits - 1].digits1;
+  if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi ||
+      (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi &&
+       C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo))
+    q++;
+}
+exp = (x_exp >> 49) - 6176;
+if (exp >= 0) {        // -exp <= 0
+  // the argument is an integer already
+  res.w[1] = x.w[1];
+  res.w[0] = x.w[0];
+  BID_RETURN (res);
+} else if ((q + exp) > 0) {    // exp < 0 and 1 <= -exp < q
+  // need to shift right -exp digits from the coefficient; the exp will be 0
+  ind = -exp;  // 1 <= ind <= 34; ind is a synonym for 'x' 
+  // (number of digits to be chopped off)
+  // chop off ind digits from the lower part of C1 
+  // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
+  // FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP
+  // FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE
+  // FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE
+  //tmp64 = C1.w[0];
+  // if (ind <= 19) {
+  //   C1.w[0] = C1.w[0] + midpoint64[ind - 1];
+  // } else {
+  //   C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
+  //   C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
+  // }
+  // if (C1.w[0] < tmp64) C1.w[1]++;
+  // if carry-out from C1.w[0], increment C1.w[1]
+  // calculate C* and f*
+  // C* is actually floor(C*) in this case
+  // C* and f* need shifting and masking, as shown by
+  // shiftright128[] and maskhigh128[]
+  // 1 <= x <= 34
+  // kx = 10^(-x) = ten2mk128[ind - 1]
+  // C* = (C1 + 1/2 * 10^x) * 10^(-x)
+  // the approximation of 10^(-x) was rounded up to 118 bits
+  __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]);
+  if (ind - 1 <= 2) {  // 0 <= ind - 1 <= 2 => shift = 0
+    res.w[1] = P256.w[3];
+    res.w[0] = P256.w[2];
+    // if positive, the truncated value is already the correct result
+    if (x_sign) {      // if negative
+      // redundant fstar.w[3] = 0;
+      // redundant fstar.w[2] = 0;
+      // redundant fstar.w[1] = P256.w[1];
+      // redundant fstar.w[0] = P256.w[0];
+      // fraction f* > 10^(-x) <=> inexact
+      // f* is in the right position to be compared with
+      // 10^(-x) from ten2mk128[]
+      if ((P256.w[1] > ten2mk128[ind - 1].w[1])
+         || (P256.w[1] == ten2mk128[ind - 1].w[1]
+             && (P256.w[0] >= ten2mk128[ind - 1].w[0]))) {
+       if (++res.w[0] == 0) {
+         res.w[1]++;
+       }
+      }
+    }
+  } else if (ind - 1 <= 21) {  // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
+    shift = shiftright128[ind - 1];    // 0 <= shift <= 102
+    res.w[1] = (P256.w[3] >> shift);
+    res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
+    // if positive, the truncated value is already the correct result
+    if (x_sign) {      // if negative
+      // redundant fstar.w[3] = 0;
+      fstar.w[2] = P256.w[2] & maskhigh128[ind - 1];
+      fstar.w[1] = P256.w[1];
+      fstar.w[0] = P256.w[0];
+      // fraction f* > 10^(-x) <=> inexact
+      // f* is in the right position to be compared with
+      // 10^(-x) from ten2mk128[]
+      if (fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1] ||
+         (fstar.w[1] == ten2mk128[ind - 1].w[1] &&
+          fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
+       if (++res.w[0] == 0) {
+         res.w[1]++;
+       }
+      }
+    }
+  } else {     // 22 <= ind - 1 <= 33
+    shift = shiftright128[ind - 1] - 64;       // 2 <= shift <= 38
+    res.w[1] = 0;
+    res.w[0] = P256.w[3] >> shift;
+    // if positive, the truncated value is already the correct result
+    if (x_sign) {      // if negative
+      fstar.w[3] = P256.w[3] & maskhigh128[ind - 1];
+      fstar.w[2] = P256.w[2];
+      fstar.w[1] = P256.w[1];
+      fstar.w[0] = P256.w[0];
+      // fraction f* > 10^(-x) <=> inexact
+      // f* is in the right position to be compared with
+      // 10^(-x) from ten2mk128[]
+      if (fstar.w[3] || fstar.w[2]
+         || fstar.w[1] > ten2mk128[ind - 1].w[1]
+         || (fstar.w[1] == ten2mk128[ind - 1].w[1]
+             && fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
+       if (++res.w[0] == 0) {
+         res.w[1]++;
+       }
+      }
+    }
+  }
+  res.w[1] = x_sign | 0x3040000000000000ull | res.w[1];
+  BID_RETURN (res);
+} else {       // if exp < 0 and q + exp <= 0
+  if (x_sign) {        // negative rounds down to -1.0
+    res.w[1] = 0xb040000000000000ull;
+    res.w[0] = 0x0000000000000001ull;
+  } else {     // positive rpunds down to +0.0
+    res.w[1] = 0x3040000000000000ull;
+    res.w[0] = 0x0000000000000000ull;
+  }
+  BID_RETURN (res);
+}
+}
+
+/*****************************************************************************
+ *  BID128_round_integral_positive
+ ****************************************************************************/
+
+BID128_FUNCTION_ARG1_NORND (bid128_round_integral_positive, x)
+
+     UINT128 res;
+     UINT64 x_sign;
+     UINT64 x_exp;
+     int exp;                  // unbiased exponent
+  // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo 
+  // (all are UINT64)
+     BID_UI64DOUBLE tmp1;
+     unsigned int x_nr_bits;
+     int q, ind, shift;
+     UINT128 C1;
+  // UINT128 res is C* at first - represents up to 34 decimal digits ~ 
+  // 113 bits
+     UINT256 fstar;
+     UINT256 P256;
+
+  // check for NaN or Infinity
+if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) {
+    // x is special
+if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN
+  // if x = NaN, then res = Q (x)
+  // check first for non-canonical NaN payload
+  if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
+      (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
+       (x.w[0] > 0x38c15b09ffffffffull))) {
+    x.w[1] = x.w[1] & 0xffffc00000000000ull;
+    x.w[0] = 0x0ull;
+  }
+  if ((x.w[1] & MASK_SNAN) == MASK_SNAN) {     // x is SNAN
+    // set invalid flag
+    *pfpsf |= INVALID_EXCEPTION;
+    // return quiet (x)
+    res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16]
+    res.w[0] = x.w[0];
+  } else {     // x is QNaN
+    // return x
+    res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16]
+    res.w[0] = x.w[0];
+  }
+  BID_RETURN (res)
+} else {       // x is not a NaN, so it must be infinity
+  if ((x.w[1] & MASK_SIGN) == 0x0ull) {        // x is +inf
+    // return +inf
+    res.w[1] = 0x7800000000000000ull;
+    res.w[0] = 0x0000000000000000ull;
+  } else {     // x is -inf 
+    // return -inf
+    res.w[1] = 0xf800000000000000ull;
+    res.w[0] = 0x0000000000000000ull;
+  }
+  BID_RETURN (res);
+}
+}
+  // unpack x
+x_sign = x.w[1] & MASK_SIGN;   // 0 for positive, MASK_SIGN for negative
+C1.w[1] = x.w[1] & MASK_COEFF;
+C1.w[0] = x.w[0];
+
+  // check for non-canonical values (treated as zero)
+if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) {       // G0_G1=11
+  // non-canonical
+  x_exp = (x.w[1] << 2) & MASK_EXP;    // biased and shifted left 49 bits
+  C1.w[1] = 0; // significand high
+  C1.w[0] = 0; // significand low
+} else {       // G0_G1 != 11
+  x_exp = x.w[1] & MASK_EXP;   // biased and shifted left 49 bits
+  if (C1.w[1] > 0x0001ed09bead87c0ull ||
+      (C1.w[1] == 0x0001ed09bead87c0ull
+       && C1.w[0] > 0x378d8e63ffffffffull)) {
+    // x is non-canonical if coefficient is larger than 10^34 -1
+    C1.w[1] = 0;
+    C1.w[0] = 0;
+  } else {     // canonical
+    ;
+  }
+}
+
+  // test for input equal to zero
+if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) {
+  // x is 0
+  // return 0 preserving the sign bit and the preferred exponent 
+  // of MAX(Q(x), 0)
+  if (x_exp <= (0x1820ull << 49)) {
+    res.w[1] = (x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull;
+  } else {
+    res.w[1] = x_sign | x_exp;
+  }
+  res.w[0] = 0x0000000000000000ull;
+  BID_RETURN (res);
+}
+  // x is not special and is not zero
+
+  // if (exp <= -p) return -0.0 or +1.0
+if (x_exp <= 0x2ffc000000000000ull) {  // 0x2ffc000000000000ull == -34
+  if (x_sign) {
+    // if negative, return negative 0, because we know the coefficient 
+    // is non-zero (would have been caught above)
+    res.w[1] = 0xb040000000000000ull;
+    res.w[0] = 0x0000000000000000ull;
+  } else {
+    // if positive, return positive 1, because we know coefficient is 
+    // non-zero (would have been caught above)
+    res.w[1] = 0x3040000000000000ull;
+    res.w[0] = 0x0000000000000001ull;
+  }
+  BID_RETURN (res);
+}
+  // q = nr. of decimal digits in x
+  // determine first the nr. of bits in x
+if (C1.w[1] == 0) {
+  if (C1.w[0] >= 0x0020000000000000ull) {      // x >= 2^53
+    // split 64-bit value in two 32-bit halves to avoid rounding errors
+    if (C1.w[0] >= 0x0000000100000000ull) {    // x >= 2^32
+      tmp1.d = (double) (C1.w[0] >> 32);       // exact conversion
+      x_nr_bits =
+       33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
+    } else {   // x < 2^32
+      tmp1.d = (double) (C1.w[0]);     // exact conversion
+      x_nr_bits =
+       1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
+    }
+  } else {     // if x < 2^53
+    tmp1.d = (double) C1.w[0]; // exact conversion
+    x_nr_bits =
+      1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
+  }
+} else {       // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
+  tmp1.d = (double) C1.w[1];   // exact conversion
+  x_nr_bits =
+    65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
+}
+
+q = nr_digits[x_nr_bits - 1].digits;
+if (q == 0) {
+  q = nr_digits[x_nr_bits - 1].digits1;
+  if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi ||
+      (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi &&
+       C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo))
+    q++;
+}
+exp = (x_exp >> 49) - 6176;
+if (exp >= 0) {        // -exp <= 0
+  // the argument is an integer already
+  res.w[1] = x.w[1];
+  res.w[0] = x.w[0];
+  BID_RETURN (res);
+} else if ((q + exp) > 0) {    // exp < 0 and 1 <= -exp < q
+  // need to shift right -exp digits from the coefficient; exp will be 0
+  ind = -exp;  // 1 <= ind <= 34; ind is a synonym for 'x' 
+  // (number of digits to be chopped off)
+  // chop off ind digits from the lower part of C1 
+  // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
+  // FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP
+  // FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE
+  // FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE
+  // tmp64 = C1.w[0];
+  // if (ind <= 19) {
+  //   C1.w[0] = C1.w[0] + midpoint64[ind - 1];
+  // } else {
+  //   C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
+  //   C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
+  // }
+  // if (C1.w[0] < tmp64) C1.w[1]++;  
+  // if carry-out from C1.w[0], increment C1.w[1]
+  // calculate C* and f*
+  // C* is actually floor(C*) in this case
+  // C* and f* need shifting and masking, as shown by
+  // shiftright128[] and maskhigh128[]
+  // 1 <= x <= 34
+  // kx = 10^(-x) = ten2mk128[ind - 1]
+  // C* = C1 * 10^(-x)
+  // the approximation of 10^(-x) was rounded up to 118 bits
+  __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]);
+  if (ind - 1 <= 2) {  // 0 <= ind - 1 <= 2 => shift = 0
+    res.w[1] = P256.w[3];
+    res.w[0] = P256.w[2];
+    // if negative, the truncated value is already the correct result
+    if (!x_sign) {     // if positive
+      // redundant fstar.w[3] = 0;
+      // redundant fstar.w[2] = 0;
+      // redundant fstar.w[1] = P256.w[1]; 
+      // redundant fstar.w[0] = P256.w[0];
+      // fraction f* > 10^(-x) <=> inexact
+      // f* is in the right position to be compared with 
+      // 10^(-x) from ten2mk128[]
+      if ((P256.w[1] > ten2mk128[ind - 1].w[1])
+         || (P256.w[1] == ten2mk128[ind - 1].w[1]
+             && (P256.w[0] >= ten2mk128[ind - 1].w[0]))) {
+       if (++res.w[0] == 0) {
+         res.w[1]++;
+       }
+      }
+    }
+  } else if (ind - 1 <= 21) {  // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
+    shift = shiftright128[ind - 1];    // 3 <= shift <= 63
+    res.w[1] = (P256.w[3] >> shift);
+    res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
+    // if negative, the truncated value is already the correct result
+    if (!x_sign) {     // if positive
+      // redundant fstar.w[3] = 0;
+      fstar.w[2] = P256.w[2] & maskhigh128[ind - 1];
+      fstar.w[1] = P256.w[1];
+      fstar.w[0] = P256.w[0];
+      // fraction f* > 10^(-x) <=> inexact
+      // f* is in the right position to be compared with 
+      // 10^(-x) from ten2mk128[]
+      if (fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1] ||
+         (fstar.w[1] == ten2mk128[ind - 1].w[1] &&
+          fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
+       if (++res.w[0] == 0) {
+         res.w[1]++;
+       }
+      }
+    }
+  } else {     // 22 <= ind - 1 <= 33
+    shift = shiftright128[ind - 1] - 64;       // 2 <= shift <= 38
+    res.w[1] = 0;
+    res.w[0] = P256.w[3] >> shift;
+    // if negative, the truncated value is already the correct result
+    if (!x_sign) {     // if positive
+      fstar.w[3] = P256.w[3] & maskhigh128[ind - 1];
+      fstar.w[2] = P256.w[2];
+      fstar.w[1] = P256.w[1];
+      fstar.w[0] = P256.w[0];
+      // fraction f* > 10^(-x) <=> inexact
+      // f* is in the right position to be compared with 
+      // 10^(-x) from ten2mk128[]
+      if (fstar.w[3] || fstar.w[2]
+         || fstar.w[1] > ten2mk128[ind - 1].w[1]
+         || (fstar.w[1] == ten2mk128[ind - 1].w[1]
+             && fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
+       if (++res.w[0] == 0) {
+         res.w[1]++;
+       }
+      }
+    }
+  }
+  res.w[1] = x_sign | 0x3040000000000000ull | res.w[1];
+  BID_RETURN (res);
+} else {       // if exp < 0 and q + exp <= 0
+  if (x_sign) {        // negative rounds up to -0.0
+    res.w[1] = 0xb040000000000000ull;
+    res.w[0] = 0x0000000000000000ull;
+  } else {     // positive rpunds up to +1.0
+    res.w[1] = 0x3040000000000000ull;
+    res.w[0] = 0x0000000000000001ull;
+  }
+  BID_RETURN (res);
+}
+}
+
+/*****************************************************************************
+ *  BID128_round_integral_zero
+ ****************************************************************************/
+
+BID128_FUNCTION_ARG1_NORND (bid128_round_integral_zero, x)
+
+     UINT128 res;
+     UINT64 x_sign;
+     UINT64 x_exp;
+     int exp;                  // unbiased exponent
+  // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo
+  // (all are UINT64)
+     BID_UI64DOUBLE tmp1;
+     unsigned int x_nr_bits;
+     int q, ind, shift;
+     UINT128 C1;
+  // UINT128 res is C* at first - represents up to 34 decimal digits ~
+  // 113 bits
+     UINT256 P256;
+
+  // check for NaN or Infinity
+if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) {
+    // x is special
+if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN
+  // if x = NaN, then res = Q (x)
+  // check first for non-canonical NaN payload
+  if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
+      (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
+       (x.w[0] > 0x38c15b09ffffffffull))) {
+    x.w[1] = x.w[1] & 0xffffc00000000000ull;
+    x.w[0] = 0x0ull;
+  }
+  if ((x.w[1] & MASK_SNAN) == MASK_SNAN) {     // x is SNAN
+    // set invalid flag
+    *pfpsf |= INVALID_EXCEPTION;
+    // return quiet (x)
+    res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16]
+    res.w[0] = x.w[0];
+  } else {     // x is QNaN
+    // return x
+    res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16]
+    res.w[0] = x.w[0];
+  }
+  BID_RETURN (res)
+} else {       // x is not a NaN, so it must be infinity
+  if ((x.w[1] & MASK_SIGN) == 0x0ull) {        // x is +inf
+    // return +inf
+    res.w[1] = 0x7800000000000000ull;
+    res.w[0] = 0x0000000000000000ull;
+  } else {     // x is -inf 
+    // return -inf
+    res.w[1] = 0xf800000000000000ull;
+    res.w[0] = 0x0000000000000000ull;
+  }
+  BID_RETURN (res);
+}
+}
+  // unpack x
+x_sign = x.w[1] & MASK_SIGN;   // 0 for positive, MASK_SIGN for negative
+C1.w[1] = x.w[1] & MASK_COEFF;
+C1.w[0] = x.w[0];
+
+  // check for non-canonical values (treated as zero)
+if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) {       // G0_G1=11
+  // non-canonical
+  x_exp = (x.w[1] << 2) & MASK_EXP;    // biased and shifted left 49 bits
+  C1.w[1] = 0; // significand high
+  C1.w[0] = 0; // significand low
+} else {       // G0_G1 != 11
+  x_exp = x.w[1] & MASK_EXP;   // biased and shifted left 49 bits
+  if (C1.w[1] > 0x0001ed09bead87c0ull ||
+      (C1.w[1] == 0x0001ed09bead87c0ull
+       && C1.w[0] > 0x378d8e63ffffffffull)) {
+    // x is non-canonical if coefficient is larger than 10^34 -1
+    C1.w[1] = 0;
+    C1.w[0] = 0;
+  } else {     // canonical
+    ;
+  }
+}
+
+  // test for input equal to zero
+if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) {
+  // x is 0
+  // return 0 preserving the sign bit and the preferred exponent
+  // of MAX(Q(x), 0)
+  if (x_exp <= (0x1820ull << 49)) {
+    res.w[1] = (x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull;
+  } else {
+    res.w[1] = x_sign | x_exp;
+  }
+  res.w[0] = 0x0000000000000000ull;
+  BID_RETURN (res);
+}
+  // x is not special and is not zero
+
+  // if (exp <= -p) return -0.0 or +0.0
+if (x_exp <= 0x2ffc000000000000ull) {  // 0x2ffc000000000000ull == -34
+  res.w[1] = x_sign | 0x3040000000000000ull;
+  res.w[0] = 0x0000000000000000ull;
+  BID_RETURN (res);
+}
+  // q = nr. of decimal digits in x
+  // determine first the nr. of bits in x
+if (C1.w[1] == 0) {
+  if (C1.w[0] >= 0x0020000000000000ull) {      // x >= 2^53
+    // split the 64-bit value in two 32-bit halves to avoid rounding errors
+    if (C1.w[0] >= 0x0000000100000000ull) {    // x >= 2^32
+      tmp1.d = (double) (C1.w[0] >> 32);       // exact conversion
+      x_nr_bits =
+       33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
+    } else {   // x < 2^32
+      tmp1.d = (double) (C1.w[0]);     // exact conversion
+      x_nr_bits =
+       1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
+    }
+  } else {     // if x < 2^53
+    tmp1.d = (double) C1.w[0]; // exact conversion
+    x_nr_bits =
+      1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
+  }
+} else {       // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
+  tmp1.d = (double) C1.w[1];   // exact conversion
+  x_nr_bits =
+    65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
+}
+
+q = nr_digits[x_nr_bits - 1].digits;
+if (q == 0) {
+  q = nr_digits[x_nr_bits - 1].digits1;
+  if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi ||
+      (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi &&
+       C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo))
+    q++;
+}
+exp = (x_exp >> 49) - 6176;
+if (exp >= 0) {        // -exp <= 0
+  // the argument is an integer already
+  res.w[1] = x.w[1];
+  res.w[0] = x.w[0];
+  BID_RETURN (res);
+} else if ((q + exp) > 0) {    // exp < 0 and 1 <= -exp < q
+  // need to shift right -exp digits from the coefficient; the exp will be 0
+  ind = -exp;  // 1 <= ind <= 34; ind is a synonym for 'x'
+  // (number of digits to be chopped off)
+  // chop off ind digits from the lower part of C1 
+  // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
+  // FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP
+  // FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE
+  // FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE
+  //tmp64 = C1.w[0];
+  // if (ind <= 19) {
+  //   C1.w[0] = C1.w[0] + midpoint64[ind - 1];
+  // } else {
+  //   C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
+  //   C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
+  // }
+  // if (C1.w[0] < tmp64) C1.w[1]++;  
+  // if carry-out from C1.w[0], increment C1.w[1]
+  // calculate C* and f*
+  // C* is actually floor(C*) in this case
+  // C* and f* need shifting and masking, as shown by
+  // shiftright128[] and maskhigh128[]
+  // 1 <= x <= 34
+  // kx = 10^(-x) = ten2mk128[ind - 1]
+  // C* = (C1 + 1/2 * 10^x) * 10^(-x)
+  // the approximation of 10^(-x) was rounded up to 118 bits
+  __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]);
+  if (ind - 1 <= 2) {  // 0 <= ind - 1 <= 2 => shift = 0
+    res.w[1] = P256.w[3];
+    res.w[0] = P256.w[2];
+  } else if (ind - 1 <= 21) {  // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
+    shift = shiftright128[ind - 1];    // 3 <= shift <= 63
+    res.w[1] = (P256.w[3] >> shift);
+    res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
+  } else {     // 22 <= ind - 1 <= 33
+    shift = shiftright128[ind - 1] - 64;       // 2 <= shift <= 38
+    res.w[1] = 0;
+    res.w[0] = P256.w[3] >> shift;
+  }
+  res.w[1] = x_sign | 0x3040000000000000ull | res.w[1];
+  BID_RETURN (res);
+} else {       // if exp < 0 and q + exp <= 0 the result is +0 or -0
+  res.w[1] = x_sign | 0x3040000000000000ull;
+  res.w[0] = 0x0000000000000000ull;
+  BID_RETURN (res);
+}
+}
+
+/*****************************************************************************
+ *  BID128_round_integral_nearest_away
+ ****************************************************************************/
+
+BID128_FUNCTION_ARG1_NORND (bid128_round_integral_nearest_away, x)
+
+     UINT128 res;
+     UINT64 x_sign;
+     UINT64 x_exp;
+     int exp;                  // unbiased exponent
+  // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo 
+  // (all are UINT64)
+     UINT64 tmp64;
+     BID_UI64DOUBLE tmp1;
+     unsigned int x_nr_bits;
+     int q, ind, shift;
+     UINT128 C1;
+  // UINT128 res is C* at first - represents up to 34 decimal digits ~ 
+  // 113 bits
+  // UINT256 fstar;
+     UINT256 P256;
+
+  // check for NaN or Infinity
+if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) {
+    // x is special
+if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN
+  // if x = NaN, then res = Q (x)
+  // check first for non-canonical NaN payload
+  if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
+      (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
+       (x.w[0] > 0x38c15b09ffffffffull))) {
+    x.w[1] = x.w[1] & 0xffffc00000000000ull;
+    x.w[0] = 0x0ull;
+  }
+  if ((x.w[1] & MASK_SNAN) == MASK_SNAN) {     // x is SNAN
+    // set invalid flag
+    *pfpsf |= INVALID_EXCEPTION;
+    // return quiet (x)
+    res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16]
+    res.w[0] = x.w[0];
+  } else {     // x is QNaN
+    // return x
+    res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16]
+    res.w[0] = x.w[0];
+  }
+  BID_RETURN (res)
+} else {       // x is not a NaN, so it must be infinity
+  if ((x.w[1] & MASK_SIGN) == 0x0ull) {        // x is +inf
+    // return +inf
+    res.w[1] = 0x7800000000000000ull;
+    res.w[0] = 0x0000000000000000ull;
+  } else {     // x is -inf 
+    // return -inf
+    res.w[1] = 0xf800000000000000ull;
+    res.w[0] = 0x0000000000000000ull;
+  }
+  BID_RETURN (res);
+}
+}
+  // unpack x
+x_sign = x.w[1] & MASK_SIGN;   // 0 for positive, MASK_SIGN for negative
+C1.w[1] = x.w[1] & MASK_COEFF;
+C1.w[0] = x.w[0];
+
+  // check for non-canonical values (treated as zero)
+if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) {       // G0_G1=11
+  // non-canonical
+  x_exp = (x.w[1] << 2) & MASK_EXP;    // biased and shifted left 49 bits
+  C1.w[1] = 0; // significand high
+  C1.w[0] = 0; // significand low
+} else {       // G0_G1 != 11
+  x_exp = x.w[1] & MASK_EXP;   // biased and shifted left 49 bits
+  if (C1.w[1] > 0x0001ed09bead87c0ull ||
+      (C1.w[1] == 0x0001ed09bead87c0ull
+       && C1.w[0] > 0x378d8e63ffffffffull)) {
+    // x is non-canonical if coefficient is larger than 10^34 -1
+    C1.w[1] = 0;
+    C1.w[0] = 0;
+  } else {     // canonical
+    ;
+  }
+}
+
+  // test for input equal to zero
+if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) {
+  // x is 0
+  // return 0 preserving the sign bit and the preferred exponent
+  // of MAX(Q(x), 0)
+  if (x_exp <= (0x1820ull << 49)) {
+    res.w[1] = (x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull;
+  } else {
+    res.w[1] = x_sign | x_exp;
+  }
+  res.w[0] = 0x0000000000000000ull;
+  BID_RETURN (res);
+}
+  // x is not special and is not zero
+
+  // if (exp <= -(p+1)) return 0.0
+if (x_exp <= 0x2ffa000000000000ull) {  // 0x2ffa000000000000ull == -35
+  res.w[1] = x_sign | 0x3040000000000000ull;
+  res.w[0] = 0x0000000000000000ull;
+  BID_RETURN (res);
+}
+  // q = nr. of decimal digits in x
+  //  determine first the nr. of bits in x
+if (C1.w[1] == 0) {
+  if (C1.w[0] >= 0x0020000000000000ull) {      // x >= 2^53
+    // split the 64-bit value in two 32-bit halves to avoid rounding errors
+    if (C1.w[0] >= 0x0000000100000000ull) {    // x >= 2^32
+      tmp1.d = (double) (C1.w[0] >> 32);       // exact conversion
+      x_nr_bits =
+       33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
+    } else {   // x < 2^32
+      tmp1.d = (double) (C1.w[0]);     // exact conversion
+      x_nr_bits =
+       1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
+    }
+  } else {     // if x < 2^53
+    tmp1.d = (double) C1.w[0]; // exact conversion
+    x_nr_bits =
+      1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
+  }
+} else {       // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
+  tmp1.d = (double) C1.w[1];   // exact conversion
+  x_nr_bits =
+    65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
+}
+
+q = nr_digits[x_nr_bits - 1].digits;
+if (q == 0) {
+  q = nr_digits[x_nr_bits - 1].digits1;
+  if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi ||
+      (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi &&
+       C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo))
+    q++;
+}
+exp = (x_exp >> 49) - 6176;
+if (exp >= 0) {        // -exp <= 0
+  // the argument is an integer already
+  res.w[1] = x.w[1];
+  res.w[0] = x.w[0];
+  BID_RETURN (res);
+} else if ((q + exp) >= 0) {   // exp < 0 and 1 <= -exp <= q
+  // need to shift right -exp digits from the coefficient; the exp will be 0
+  ind = -exp;  // 1 <= ind <= 34; ind is a synonym for 'x'
+  // chop off ind digits from the lower part of C1 
+  // C1 = C1 + 1/2 * 10^x where the result C1 fits in 127 bits
+  tmp64 = C1.w[0];
+  if (ind <= 19) {
+    C1.w[0] = C1.w[0] + midpoint64[ind - 1];
+  } else {
+    C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
+    C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
+  }
+  if (C1.w[0] < tmp64)
+    C1.w[1]++;
+  // calculate C* and f*
+  // C* is actually floor(C*) in this case
+  // C* and f* need shifting and masking, as shown by
+  // shiftright128[] and maskhigh128[]
+  // 1 <= x <= 34
+  // kx = 10^(-x) = ten2mk128[ind - 1]
+  // C* = (C1 + 1/2 * 10^x) * 10^(-x)
+  // the approximation of 10^(-x) was rounded up to 118 bits
+  __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]);
+  // the top Ex bits of 10^(-x) are T* = ten2mk128trunc[ind], e.g.
+  // if x=1, T*=ten2mk128trunc[0]=0x19999999999999999999999999999999
+  // if (0 < f* < 10^(-x)) then the result is a midpoint
+  //   if floor(C*) is even then C* = floor(C*) - logical right
+  //       shift; C* has p decimal digits, correct by Prop. 1)
+  //   else if floor(C*) is odd C* = floor(C*)-1 (logical right
+  //       shift; C* has p decimal digits, correct by Pr. 1)
+  // else
+  //   C* = floor(C*) (logical right shift; C has p decimal digits,
+  //       correct by Property 1)
+  // n = C* * 10^(e+x)
+
+  // shift right C* by Ex-128 = shiftright128[ind]
+  if (ind - 1 <= 2) {  // 0 <= ind - 1 <= 2 => shift = 0
+    res.w[1] = P256.w[3];
+    res.w[0] = P256.w[2];
+  } else if (ind - 1 <= 21) {  // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
+    shift = shiftright128[ind - 1];    // 3 <= shift <= 63
+    res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
+    res.w[1] = (P256.w[3] >> shift);
+  } else {     // 22 <= ind - 1 <= 33
+    shift = shiftright128[ind - 1];    // 2 <= shift <= 38
+    res.w[1] = 0;
+    res.w[0] = (P256.w[3] >> (shift - 64));    // 2 <= shift - 64 <= 38
+  }
+  // if the result was a midpoint, it was already rounded away from zero
+  res.w[1] |= x_sign | 0x3040000000000000ull;
+  BID_RETURN (res);
+} else {       // if ((q + exp) < 0) <=> q < -exp
+  // the result is +0 or -0
+  res.w[1] = x_sign | 0x3040000000000000ull;
+  res.w[0] = 0x0000000000000000ull;
+  BID_RETURN (res);
+}
+}