尽管ab通常通过计算exp(blog(a)),在现实生活中,对数等函数的高质量实现是用额外的精度pow()
计算的,以抵消误差放大效应exp, 例如本答案所示。这表明我们应该支持通过内置的通用指数运算符或函数(如pow()
. 在许多情况下,这将给出更准确的结果,即使对于不是最先进的一般取幂的实现,也不太可能给出更差的结果。有关这些方面的快速实验,请参阅下面的 ISO-C99 代码。在我的平台上,它打印:
max ulp error using expf(): 3.72998984e+005
max ulp error using powf(): 2.16050157e+004
即便如此,通过一般取幂计算倒数幂同样会受到计算1b,因此在特定函数可用于计算立方根的情况下(即,b=3),建议使用这些,如本答案中所述。一些编程环境提供了rootn()
计算n-th 根,例如。
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <math.h>
#define N (10000000)
float func (float a, float b)
{
return expf ((logf (a) - logf (b)) / b);
}
float func_alt (float a, float b)
{
return powf (a / b, 1.0f / b);
}
double func_ref (double a, double b)
{
return pow (a / b, 1.0 / b);
}
uint32_t float_as_uint32 (float a)
{
uint32_t r;
memcpy (&r, &a, sizeof r);
return r;
}
float uint32_as_float (uint32_t a)
{
float r;
memcpy (&r, &a, sizeof r);
return r;
}
uint64_t double_as_uint64 (double a)
{
uint64_t r;
memcpy (&r, &a, sizeof r);
return r;
}
double floatUlpErr (float res, double ref)
{
uint64_t i, j, err, refi;
int expoRef;
/* ulp error cannot be computed if either operand is NaN, infinity, zero */
if (isnan (res) || isnan (ref) || isinf (res) || isinf (ref) ||
(res == 0.0f) || (ref == 0.0f)) {
return 0.0;
}
/* Convert the float result to an "extended float". This is like a float
with 56 instead of 24 effective mantissa bits.
*/
i = ((uint64_t)float_as_uint32(res)) << 32;
/* Convert the double reference to an "extended float". If the reference is
>= 2^129, we need to clamp to the maximum "extended float". If reference
is < 2^-126, we need to denormalize because of the float types's limited
exponent range.
*/
refi = double_as_uint64(ref);
expoRef = (int)(((refi >> 52) & 0x7ff) - 1023);
if (expoRef >= 129) {
j = 0x7fffffffffffffffULL;
} else if (expoRef < -126) {
j = ((refi << 11) | 0x8000000000000000ULL) >> 8;
j = j >> (-(expoRef + 126));
} else {
j = ((refi << 11) & 0x7fffffffffffffffULL) >> 8;
j = j | ((uint64_t)(expoRef + 127) << 55);
}
j = j | (refi & 0x8000000000000000ULL);
err = (i < j) ? (j - i) : (i - j);
return err / 4294967296.0;
}
// Fixes via: Greg Rose, KISS: A Bit Too Simple. http://eprint.iacr.org/2011/007
static unsigned int z=362436069,w=521288629,jsr=362436069,jcong=123456789;
#define znew (z=36969*(z&0xffff)+(z>>16))
#define wnew (w=18000*(w&0xffff)+(w>>16))
#define MWC ((znew<<16)+wnew)
#define SHR3 (jsr^=(jsr<<13),jsr^=(jsr>>17),jsr^=(jsr<<5)) /* 2^32-1 */
#define CONG (jcong=69069*jcong+13579) /* 2^32 */
#define KISS ((MWC^CONG)+SHR3)
int main (void)
{
float a, b, res1, res2;
double ref, ulp1, ulp2, maxulp1 = 0, maxulp2 = 0;
for (int i = 0; i < N; i++) {
do {
a = uint32_as_float (KISS & 0x7fffffff); // ensure positive
} while (!isnormal (a));
do {
b = uint32_as_float (KISS & 0x7fffffff); // ensure positive
} while (!isnormal (a));
res1 = func (a, b);
res2 = func_alt (a, b);
ref = func_ref ((double)a, (double)b);
ulp1 = floatUlpErr (res1, ref);
ulp2 = floatUlpErr (res2, ref);
maxulp1 = fmax (ulp1, maxulp1);
maxulp2 = fmax (ulp2, maxulp2);
}
printf ("max ulp error using expf(): %15.8e\n", maxulp1);
printf ("max ulp error using powf(): %15.8e\n", maxulp2);
return EXIT_SUCCESS;
}
```