Names specified here
Name Description Notes Source Availability
copysign() Extract and apply sign M (·) <tgmath.h> C99 C11
copysign() Extract and apply sign (·) <math.h> C99 C11
copysignf() Extract and apply sign (·) <math.h> C99 C11
copysignl() Extract and apply sign (·) <math.h> C99 C11
FP_ILOGB0 Result of logarithm of zero M <math.h> C99 C11
FP_ILOGBNAN Result of logarithm of NaN M <math.h> C99 C11
frexp() Decompose floating-point number (·) <math.h> C89 C90 C95 C99 C11
frexp() Decompose floating-point number M (·) <tgmath.h> C99 C11
frexpf() Decompose floating-point number (·) <math.h> C99 C11
frexpl() Decompose floating-point number (·) <math.h> C99 C11
ilogb() Compute native-base logarithm (·) <math.h> C99 C11
ilogb() Compute native-base logarithm M (·) <tgmath.h> C99 C11
ilogbf() Compute native-base logarithm (·) <math.h> C99 C11
ilogbl() Compute native-base logarithm (·) <math.h> C99 C11
ldexp() Multiply by power of 2 M (·) <tgmath.h> C99 C11
ldexp() Multiply by power of 2 (·) <math.h> C89 C90 C95 C99 C11
ldexpf() Multiply by power of 2 (·) <math.h> C99 C11
ldexpl() Multiply by power of 2 (·) <math.h> C99 C11
modf() Extract integral and fractional parts L (·) Predefined C99 C11
modf() Extract integral and fractional parts (·) <math.h> C89 C90 C95 C99 C11
modff() Extract integral and fractional parts (·) <math.h> C99 C11
modfl() Extract integral and fractional parts (·) <math.h> C99 C11
signbit() Test for negative M (·) <math.h> C99 C11

<math.h> provides some functions for decomposing floating-point numbers into parts, and for building them up from parts. Some functions can be used to decompose into integers (which are then trivial to serialize for transmission), and then recombine them later.

#include <math.h>
int ilogbf(float x);
int ilogb(double x);
int ilogbl(long double x);
#include <tgmath.h>
int ilogb(real-floating-type x);

The ilogb functions extract the exponent of x as an integer, and return:

FP_ILOGB0 when x is zero
INT_MAX when x is infinite
FP_ILOGBNAN when x is NaN
the exponent otherwise
#include <math.h>
float frexpf(float v, int *xp);
double frexp(double v, int *xp);
long double frexpl(long double v, int *xp);
#include <tgmath.h>
real-floating-type frexp(real-floating-type v, int *xp);

The frexp functions split the input v into two parts: an exponent x written to *exp; and a mantissa m which is returned, and has the range [½, 1]. The two outputs can be used to reform the input, such that v=m.2x. If v is zero, then *xp and the return value will also be zero.

#include <math.h>
float ldexpf(float m, int x);
double ldexp(double m, int x);
long double ldexpl(long double m, int x);
#include <tgmath.h>
real-floating-type ldexp(real-floating-type m, int x);

The ldexp functions compute and return m.2x.

#include <math.h>
float modff(float val, float *ip);
double modf(double val, double *ip);
long double modfl(long double val, long double *ip);
#include <tgmath.h>
real-floating-type modf(real-floating-type val, real-floating-type *ip);

The modf functions split the number val into an integer part, stored in *ip, and a fractional part, which is returned. Both parts have the same sign as val.

#include <math.h>
float copysignf(float x, float y);
double copysign(double x, double y);
long double copysignl(long double x, long double y);
#include <tgmath.h>
real-floating-type copysign(real-floating-type x, real-floating-type y);

The copysign functions return a value which has the magnitude of x and the sign of y. If x is NaN, the result is also NaN, with the sign of y.

You can use frexp and ldexp to split a floating-point value into a mantissa and exponent for interoperable representation as integers. Here are two functions ieee754_bin64enc and ieee754_bin64dec to convert between double and uint_fast64_t holding an IEEE754 binary64 value:

#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <inttypes.h>

/* IEEE754 binary64 has 52 mantissa bits, 1 sign bit, and 11 bits for
   the exponent. */

#define MANTISSA_BIT 52
#define EXPONENT_BIT 11
#define SIGN_BIT 1

#define MANTISSA_SHIFT 0
#define MANTISSA_MASK ((UINTMAX_C(1) << MANTISSA_BIT) - 1u)
#define MANTISSA_FIELD (MANTISSA_MASK << MANTISSA_SHIFT)

#define EXPONENT_SHIFT (MANTISSA_SHIFT + MANTISSA_BIT)
#define EXPONENT_MASK ((UINTMAX_C(1) << EXPONENT_BIT) - 1u)
#define EXPONENT_FIELD (EXPONENT_MASK << EXPONENT_SHIFT)

#define SIGN_SHIFT (EXPONENT_SHIFT + EXPONENT_BIT)
#define SIGN_MASK ((UINTMAX_C(1) << SIGN_BIT) - 1u)
#define SIGN_FIELD (SIGN_MASK << SIGN_SHIFT)

uint_fast64_t ieee754_bin64enc(double input)
{
  double d_mant;
  unsigned u_exp;
  unsigned long long u_mant;
  bool neg;
  int s_exp;
  uint_fast64_t bytes;

  switch (fpclassify(input)) {
  case FP_ZERO:
    neg = signbit(input);
    u_mant = 0;
    u_exp = 0;
    break;

  case FP_INFINITE:
    neg = signbit(input);
    u_mant = 0;
    u_exp = 0x7ffu;
    break;

  case FP_NAN:
    neg = false;
    u_mant = 1;
    u_exp = 0x7ffu;
    break;

  case FP_SUBNORMAL:
  case FP_NORMAL:
    /* Handle normal and subnormal together.  The number might be one
       class for double, but another for binary64. */

    /* Decompose the input into a significand (mantissa + 1) and an
       exponent. */
    d_mant = frexp(input, &s_exp);

    /* Extract the sign bit from the mantissa. */
    neg = signbit(input);
    d_mant = fabs(d_mant);

    /* Offset the exponent so it can be represented as an unsigned
       value. */
    s_exp += 1022;

    /* Now we find out whether the number we represent is normal,
       subnormal, or overflows binary64. */
    if (s_exp >= 0x7ff) {
      /* The number is too big for binary64, so use the maximum
         value. */
      u_mant = MANTISSA_MASK;
      u_exp = 0x7feu;
    } else if (s_exp <= 0) {
      /* The number is subnormal in binary64. */

      /* Shift the mantissa so that its exponent would be 0. */
      u_mant = ldexp(d_mant, MANTISSA_BIT);

      u_mant >>= -s_exp;
      u_exp = 0;
    } else {
      /* The number is normal in binary64. */

      /* Use the suggested exponent. */
      u_exp = s_exp;

      /* Make the mantissa value into a positive integer. */
      u_mant = ldexp(d_mant, MANTISSA_BIT + 1);
    }

    break;
  }

  /* Transmit the bottom MANTISSA_BITs of u_mant.  The extra top bit
     will always be one because of normalization. */

  bytes = ((uint_fast64_t) u_mant & MANTISSA_MASK) << MANTISSA_SHIFT;
  bytes |= ((uint_fast64_t) u_exp & EXPONENT_MASK) << EXPONENT_SHIFT;
  bytes |= ((uint_fast64_t) neg & SIGN_MASK) << SIGN_SHIFT;

  return bytes;
}


double ieee754_bin64dec(uint_fast64_t bytes)
{
  double output;
  int s_exp;
  unsigned u_exp;
  unsigned long long u_mant;
  bool neg;

  /* Extract the bit fields. */
  u_exp = (bytes >> EXPONENT_SHIFT) & EXPONENT_MASK;
  u_mant = (bytes >> MANTISSA_SHIFT) & MANTISSA_MASK;
  neg = (bytes >> SIGN_SHIFT) & SIGN_MASK;

  if (u_exp == 0x7ffu) {
    if (u_mant == 0) {
#ifdef INFINITY
      return neg ? -INFINITY : +INFINITY;
#else
      return neg ? -DBL_MAX : +DBL_MAX;
#endif
    }
    return NAN;
  }

  if (u_exp == 0) {
    if (u_mant == 0)
      return neg ? -0.0 : +0.0;

    /* Subnormal value */

    /* Multiply the mantissa by a power of two. */
    output = ldexp(u_mant, -(MANTISSA_BIT + 1022));

    if (neg)
      output = -output;

    return output;
  }

  /* Recover the top bit of the mantissa. */
  u_mant |= MANTISSA_MASK + 1;

  /* Convert offset exponent back into a native signed value. */
  s_exp = (int) u_exp - 1022;

  /* Multiply the mantissa by a power of two. */
  output = ldexp(u_mant, s_exp - (MANTISSA_BIT + 1));

  if (neg)
    output = -output;

  return output;
}

int main(int argc, const char *argv[])
{
  double input = atof(argv[1]);
  union {
    double dbl;
    unsigned char bytes[sizeof(double)];
  } swap;
  uint_fast64_t bytes = ieee754_bin64enc(input);
  double output = ieee754_bin64dec(bytes);

  printf("input = %g\n", input);
  printf("format: %04x %04x %04x %04x\n",
         (unsigned) (bytes >> 48) & 0xffff,
         (unsigned) (bytes >> 32) & 0xffff,
         (unsigned) (bytes >> 16) & 0xffff,
         (unsigned) (bytes >> 0) & 0xffff);

  if (sizeof(double) == 8) {
    swap.dbl = input;
    printf("union: %02x%02x %02x%02x %02x%02x %02x%02x\n",
           swap.bytes[7],
           swap.bytes[6],
           swap.bytes[5],
           swap.bytes[4],
           swap.bytes[3],
           swap.bytes[2],
           swap.bytes[1],
           swap.bytes[0]);
  }

  printf("output = %g\n", output);

  printf("diff = %g\n", (output - input));

  return EXIT_SUCCESS;
}

ldexp with a negative power will restore the mantissa to a floating-point value, then another ldexp will incorporate the exponent (hence, they are combined in the code above).


CHaR
Sitemap Supported
Site format updated 2024-06-05T22:37:07.391+0000
Data updated 1970-01-01T00:00:00.000+0000
Page updated 2022-06-17T21:43:05.000+0000