Weird power series coefficients in reference implementations of atan()

72 views Asked by At

glibc (more precise, libm) and many other C libraries contain the following software implementation of the atan() function (see here):

/* s_atanf.c -- float version of s_atan.c.
 * Conversion to float by Ian Lance Taylor, Cygnus Support, [email protected].
 */

/*
 * ====================================================
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
 *
 * Developed at SunPro, a Sun Microsystems, Inc. business.
 * Permission to use, copy, modify, and distribute this
 * software is freely granted, provided that this notice
 * is preserved.
 * ====================================================
 */

#if defined(LIBM_SCCS) && !defined(lint)
static char rcsid[] = "$NetBSD: s_atanf.c,v 1.4 1995/05/10 20:46:47 jtc Exp $";
#endif

#include <float.h>
#include <math.h>
#include <math_private.h>
#include <math-underflow.h>
#include <libm-alias-float.h>

static const float atanhi[] = {
  4.6364760399e-01, /* atan(0.5)hi 0x3eed6338 */
  7.8539812565e-01, /* atan(1.0)hi 0x3f490fda */
  9.8279368877e-01, /* atan(1.5)hi 0x3f7b985e */
  1.5707962513e+00, /* atan(inf)hi 0x3fc90fda */
};

static const float atanlo[] = {
  5.0121582440e-09, /* atan(0.5)lo 0x31ac3769 */
  3.7748947079e-08, /* atan(1.0)lo 0x33222168 */
  3.4473217170e-08, /* atan(1.5)lo 0x33140fb4 */
  7.5497894159e-08, /* atan(inf)lo 0x33a22168 */
};

static const float aT[] = {
  3.3333334327e-01, /* 0x3eaaaaaa */
 -2.0000000298e-01, /* 0xbe4ccccd */
  1.4285714924e-01, /* 0x3e124925 */
 -1.1111110449e-01, /* 0xbde38e38 */
  9.0908870101e-02, /* 0x3dba2e6e */
 -7.6918758452e-02, /* 0xbd9d8795 */
  6.6610731184e-02, /* 0x3d886b35 */
 -5.8335702866e-02, /* 0xbd6ef16b */
  4.9768779427e-02, /* 0x3d4bda59 */
 -3.6531571299e-02, /* 0xbd15a221 */
  1.6285819933e-02, /* 0x3c8569d7 */
};

static const float
one   = 1.0,
huge   = 1.0e30;

float __atanf(float x)
{
    float w,s1,s2,z;
    int32_t ix,hx,id;

    GET_FLOAT_WORD(hx,x);
    ix = hx&0x7fffffff;
    if(ix>=0x4c000000) {    /* if |x| >= 2^25 */
        if(ix>0x7f800000)
        return x+x;     /* NaN */
        if(hx>0) return  atanhi[3]+atanlo[3];
        else     return -atanhi[3]-atanlo[3];
    } if (ix < 0x3ee00000) {    /* |x| < 0.4375 */
        if (ix < 0x31000000) {  /* |x| < 2^-29 */
        math_check_force_underflow (x);
        if(huge+x>one) return x;    /* raise inexact */
        }
        id = -1;
    } else {
    x = fabsf(x);
    if (ix < 0x3f980000) {      /* |x| < 1.1875 */
        if (ix < 0x3f300000) {  /* 7/16 <=|x|<11/16 */
        id = 0; x = ((float)2.0*x-one)/((float)2.0+x);
        } else {            /* 11/16<=|x|< 19/16 */
        id = 1; x  = (x-one)/(x+one);
        }
    } else {
        if (ix < 0x401c0000) {  /* |x| < 2.4375 */
        id = 2; x  = (x-(float)1.5)/(one+(float)1.5*x);
        } else {            /* 2.4375 <= |x| < 2^66 */
        id = 3; x  = -(float)1.0/x;
        }
    }}
    /* end of argument reduction */
    z = x*x;
    w = z*z;
    /* break sum from i=0 to 10 aT[i]z**(i+1) into odd and even poly */
    s1 = z*(aT[0]+w*(aT[2]+w*(aT[4]+w*(aT[6]+w*(aT[8]+w*aT[10])))));
    s2 = w*(aT[1]+w*(aT[3]+w*(aT[5]+w*(aT[7]+w*aT[9]))));
    if (id<0) return x - x*(s1+s2);
    else {
        z = atanhi[id] - ((x*(s1+s2) - atanlo[id]) - x);
        return (hx<0)? -z:z;
    }
}
libm_alias_float (__atan, atan)

I am understanding how that function works, except for the values in the aT[] array. The actual work happens after the argument reduction in the following lines:

z = x*x;
w = z*z;
/* break sum from i=0 to 10 aT[i]z**(i+1) into odd and even poly */
s1 = z*(aT[0]+w*(aT[2]+w*(aT[4]+w*(aT[6]+w*(aT[8]+w*aT[10])))));
s2 = w*(aT[1]+w*(aT[3]+w*(aT[5]+w*(aT[7]+w*aT[9]))));
if (id<0) return x - x*(s1+s2);
/* ... */

If we carry out the multiplications and the subtraction in the last line, we get the power series of atan(): x - 1/3 * x^3 + 1/5 * x^5 - 1/7 * x^7 .... However, the coefficients in the aT[] array are different from the power series' coefficients:

The first one is as expected in the sense that it is the float nearest to 1/3. The next ones more and more deviate from the expected values, each one more than its predecessor, until the last one, which is expected to be 1/23 (4.347826e-2 if truncated to float precision), but actually is 1.6285819933e-02.

Why do the coefficients deviate so much from the expected values? Is this a mathematical trick to mitigate the fact that the power series has to be aborted at some point (in this case, at x^23)?

0

There are 0 answers