Name | Description | Notes | Source | Availability | |||||||
---|---|---|---|---|---|---|---|---|---|---|---|
copysign() |
Extract and apply sign | M | (·) | <tgmath.h> |
C99 | C11 | |||||
copysign() |
Extract and apply sign | (·) | <math.h> |
C99 | C11 | ||||||
copysignf() |
Extract and apply sign | (·) | <math.h> |
C99 | C11 | ||||||
copysignl() |
Extract and apply sign | (·) | <math.h> |
C99 | C11 | ||||||
FP_ |
Result of logarithm of zero | M | <math.h> |
C99 | C11 | ||||||
FP_ |
Result of logarithm of NaN | M | <math.h> |
C99 | C11 | ||||||
frexp() |
Decompose floating-point number | (·) | <math.h> |
C89 | C90 | C95 | C99 | C11 | |||
frexp() |
Decompose floating-point number | M | (·) | <tgmath.h> |
C99 | C11 | |||||
frexpf() |
Decompose floating-point number | (·) | <math.h> |
C99 | C11 | ||||||
frexpl() |
Decompose floating-point number | (·) | <math.h> |
C99 | C11 | ||||||
ilogb() |
Compute native-base logarithm | (·) | <math.h> |
C99 | C11 | ||||||
ilogb() |
Compute native-base logarithm | M | (·) | <tgmath.h> |
C99 | C11 | |||||
ilogbf() |
Compute native-base logarithm | (·) | <math.h> |
C99 | C11 | ||||||
ilogbl() |
Compute native-base logarithm | (·) | <math.h> |
C99 | C11 | ||||||
ldexp() |
Multiply by power of 2 | M | (·) | <tgmath.h> |
C99 | C11 | |||||
ldexp() |
Multiply by power of 2 | (·) | <math.h> |
C89 | C90 | C95 | C99 | C11 | |||
ldexpf() |
Multiply by power of 2 | (·) | <math.h> |
C99 | C11 | ||||||
ldexpl() |
Multiply by power of 2 | (·) | <math.h> |
C99 | C11 | ||||||
modf() |
Extract integral and fractional parts | L | (·) | Predefined | C99 | C11 | |||||
modf() |
Extract integral and fractional parts | (·) | <math.h> |
C89 | C90 | C95 | C99 | C11 | |||
modff() |
Extract integral and fractional parts | (·) | <math.h> |
C99 | C11 | ||||||
modfl() |
Extract integral and fractional parts | (·) | <math.h> |
C99 | C11 | ||||||
signbit() |
Test for negative | M | (·) | <math.h> |
C99 | C11 |
<math.h>
provides some
functions for decomposing floating-point numbers into parts, and for
building them up from parts. Some functions can be used to
decompose into integers (which are then trivial to serialize
for transmission), and then recombine them later.
#include <math.h>
int ilogbf(float x);
int ilogb(double x);
int ilogbl(long double x);
#include <tgmath.h>
int ilogb(real-floating-type x);
The
ilogb
functions extract the exponent of x
as an integer, and return:
FP_ |
when x is zero |
INT_ |
when x is infinite
|
FP_ |
when x is NaN
|
the exponent | otherwise |
#include <math.h>
float frexpf(float v, int *xp);
double frexp(double v, int *xp);
long double frexpl(long double v, int *xp);
#include <tgmath.h>
real-floating-type frexp(real-floating-type v, int *xp);
The
frexp
functions split the input v
into two parts: an exponent x written
to *exp
; and a mantissa m
which is returned, and has the range [½, 1]. The two outputs
can be used to reform the input, such that
v=m.2x. If
v
is zero, then *xp
and the return value will also be zero.
#include <math.h>
float ldexpf(float m, int x);
double ldexp(double m, int x);
long double ldexpl(long double m, int x);
#include <tgmath.h>
real-floating-type ldexp(real-floating-type m, int x);
The
ldexp
functions compute and return
m.2x.
#include <math.h>
float modff(float val, float *ip);
double modf(double val, double *ip);
long double modfl(long double val, long double *ip);
#include <tgmath.h>
real-floating-type modf(real-floating-type val, real-floating-type *ip);
The
modf
functions split the number val
into an integer part, stored in *ip
, and a fractional part, which is returned.
Both parts have the same sign as val
.
#include <math.h>
float copysignf(float x, float y);
double copysign(double x, double y);
long double copysignl(long double x, long double y);
#include <tgmath.h>
real-floating-type copysign(real-floating-type x, real-floating-type y);
The
copysign
functions return a value which has the
magnitude of x
and the sign of
y
. If x
is
NaN, the
result is also NaN, with the sign of y
.
You can use
frexp
and
ldexp
to split a floating-point value into a
mantissa and exponent for interoperable representation as
integers. Here are two functions ieee754_bin64enc
and ieee754_bin64dec
to convert between double
and
uint_
holding an
IEEE754 binary64 value:
#include<math.h>
#include<stdio.h>
#include<stdlib.h>
#include<stdbool.h>
#include<inttypes.h>
/* IEEE754 binary64 has 52 mantissa bits, 1 sign bit, and 11 bits for the exponent. */ #define MANTISSA_BIT 52 #define EXPONENT_BIT 11 #define SIGN_BIT 1 #define MANTISSA_SHIFT 0 #define MANTISSA_MASK ((UINTMAX_C(1) << MANTISSA_BIT) - 1u) #define MANTISSA_FIELD (MANTISSA_MASK << MANTISSA_SHIFT) #define EXPONENT_SHIFT (MANTISSA_SHIFT + MANTISSA_BIT) #define EXPONENT_MASK ((UINTMAX_C(1) << EXPONENT_BIT) - 1u) #define EXPONENT_FIELD (EXPONENT_MASK << EXPONENT_SHIFT) #define SIGN_SHIFT (EXPONENT_SHIFT + EXPONENT_BIT) #define SIGN_MASK ((UINTMAX_C(1) << SIGN_BIT) - 1u) #define SIGN_FIELD (SIGN_MASK << SIGN_SHIFT) uint_fast64_t ieee754_bin64enc(double input) { double d_mant; unsigned u_exp; unsigned long long u_mant; bool neg; int s_exp; uint_fast64_t bytes; switch (fpclassify(input)) { case FP_ZERO : neg = signbit(input); u_mant = 0; u_exp = 0; break; case FP_INFINITE : neg = signbit(input); u_mant = 0; u_exp = 0x7ffu; break; case FP_NAN : neg = false; u_mant = 1; u_exp = 0x7ffu; break; case FP_SUBNORMAL : case FP_NORMAL : /* Handle normal and subnormal together. The number might be one class fordouble
, but another for binary64. */ /* Decompose the input into a significand (mantissa + 1) and an exponent. */ d_mant = frexp(input, &s_exp); /* Extract the sign bit from the mantissa. */ neg = signbit(input); d_mant = fabs(d_mant); /* Offset the exponent so it can be represented as an unsigned value. */ s_exp += 1022; /* Now we find out whether the number we represent is normal, subnormal, or overflows binary64. */ if (s_exp >= 0x7ff) { /* The number is too big for binary64, so use the maximum value. */ u_mant = MANTISSA_MASK; u_exp = 0x7feu; } else if (s_exp <= 0) { /* The number is subnormal in binary64. */ /* Shift the mantissa so that its exponent would be 0. */ u_mant = ldexp(d_mant, MANTISSA_BIT); u_mant >>= -s_exp; u_exp = 0; } else { /* The number is normal in binary64. */ /* Use the suggested exponent. */ u_exp = s_exp; /* Make the mantissa value into a positive integer. */ u_mant = ldexp(d_mant, MANTISSA_BIT + 1); } break; } /* Transmit the bottomMANTISSA_BIT
s ofu_mant
. The extra top bit will always be one because of normalization. */ bytes = ((uint_fast64_t ) u_mant & MANTISSA_MASK) << MANTISSA_SHIFT; bytes |= ((uint_fast64_t ) u_exp & EXPONENT_MASK) << EXPONENT_SHIFT; bytes |= ((uint_fast64_t ) neg & SIGN_MASK) << SIGN_SHIFT; return bytes; } double ieee754_bin64dec(uint_fast64_t bytes) { double output; int s_exp; unsigned u_exp; unsigned long long u_mant; bool neg; /* Extract the bit fields. */ u_exp = (bytes >> EXPONENT_SHIFT) & EXPONENT_MASK; u_mant = (bytes >> MANTISSA_SHIFT) & MANTISSA_MASK; neg = (bytes >> SIGN_SHIFT) & SIGN_MASK; if (u_exp == 0x7ffu) { if (u_mant == 0) { #ifdef INFINITY return neg ? -INFINITY : +INFINITY; #else return neg ? -DBL_MAX : +DBL_MAX ; #endif } return NAN; } if (u_exp == 0) { if (u_mant == 0) return neg ? -0.0 : +0.0; /* Subnormal value */ /* Multiply the mantissa by a power of two. */ output = ldexp(u_mant, -(MANTISSA_BIT + 1022)); if (neg) output = -output; return output; } /* Recover the top bit of the mantissa. */ u_mant |= MANTISSA_MASK + 1; /* Convert offset exponent back into a native signed value. */ s_exp = (int) u_exp - 1022; /* Multiply the mantissa by a power of two. */ output = ldexp(u_mant, s_exp - (MANTISSA_BIT + 1)); if (neg) output = -output; return output; } int main(int argc, const char *argv[]) { double input = atof(argv[1]); union { double dbl; unsigned char bytes[sizeof(double)]; } swap; uint_fast64_t bytes = ieee754_bin64enc(input); double output = ieee754_bin64dec(bytes); printf("input = %g\n", input); printf("format: %04x %04x %04x %04x\n", (unsigned) (bytes >> 48) & 0xffff, (unsigned) (bytes >> 32) & 0xffff, (unsigned) (bytes >> 16) & 0xffff, (unsigned) (bytes >> 0) & 0xffff); if (sizeof(double) == 8) { swap.dbl = input; printf("union: %02x%02x %02x%02x %02x%02x %02x%02x\n", swap.bytes[7], swap.bytes[6], swap.bytes[5], swap.bytes[4], swap.bytes[3], swap.bytes[2], swap.bytes[1], swap.bytes[0]); } printf("output = %g\n", output); printf("diff = %g\n", (output - input)); return EXIT_SUCCESS ; }
ldexp
with a negative power will restore the
mantissa to a floating-point value, then another
ldexp
will incorporate the exponent (hence,
they are combined in the code above).