Add in sincos(), an efficient method of computing the sine and cosine of an angle together

This commit is contained in:
Elliot Saba 2013-07-15 18:28:01 -07:00
parent 29af332f36
commit 0cf89fad5d
4 changed files with 388 additions and 44 deletions

View file

@ -1,37 +1,37 @@
$(CUR_SRCS) = \ $(CUR_SRCS) = \
e_acos.c e_acosf.c e_acosh.c e_acoshf.c e_asin.c e_asinf.c \ e_acos.c e_acosf.c e_acosh.c e_acoshf.c e_asin.c e_asinf.c \
e_atan2.c e_atan2f.c e_atanh.c e_atanhf.c e_cosh.c e_coshf.c e_exp.c \ e_atan2.c e_atan2f.c e_atanh.c e_atanhf.c e_cosh.c e_coshf.c e_exp.c \
e_expf.c e_fmod.c e_fmodf.c e_gamma.c e_gamma_r.c e_gammaf.c \ e_expf.c e_fmod.c e_fmodf.c e_gamma.c e_gamma_r.c e_gammaf.c \
e_gammaf_r.c e_hypot.c e_hypotf.c e_j0.c e_j0f.c e_j1.c e_j1f.c \ e_gammaf_r.c e_hypot.c e_hypotf.c e_j0.c e_j0f.c e_j1.c e_j1f.c \
e_jn.c e_jnf.c e_lgamma.c e_lgamma_r.c e_lgammaf.c e_lgammaf_r.c \ e_jn.c e_jnf.c e_lgamma.c e_lgamma_r.c e_lgammaf.c e_lgammaf_r.c \
e_log.c e_log10.c e_log10f.c e_log2.c e_log2f.c e_logf.c \ e_log.c e_log10.c e_log10f.c e_log2.c e_log2f.c e_logf.c \
e_pow.c e_powf.c e_remainder.c e_remainderf.c e_scalb.c e_scalbf.c \ e_pow.c e_powf.c e_remainder.c e_remainderf.c e_scalb.c e_scalbf.c \
e_rem_pio2.c e_rem_pio2f.c \ e_rem_pio2.c e_rem_pio2f.c \
e_sinh.c e_sinhf.c e_sqrt.c e_sqrtf.c \ e_sinh.c e_sinhf.c e_sqrt.c e_sqrtf.c \
k_cos.c k_exp.c k_expf.c k_rem_pio2.c k_sin.c k_tan.c \ k_cos.c k_exp.c k_expf.c k_rem_pio2.c k_sin.c k_tan.c \
k_cosf.c k_sinf.c k_tanf.c \ k_cosf.c k_sinf.c k_tanf.c \
s_asinh.c s_asinhf.c s_atan.c s_atanf.c s_carg.c s_cargf.c s_cargl.c \ s_asinh.c s_asinhf.c s_atan.c s_atanf.c s_carg.c s_cargf.c s_cargl.c \
s_cbrt.c s_cbrtf.c s_ceil.c s_ceilf.c \ s_cbrt.c s_cbrtf.c s_ceil.c s_ceilf.c \
s_copysign.c s_copysignf.c s_cos.c s_cosf.c \ s_copysign.c s_copysignf.c s_cos.c s_cosf.c \
s_csqrt.c s_csqrtf.c s_erf.c s_erff.c \ s_csqrt.c s_csqrtf.c s_erf.c s_erff.c \
s_exp2.c s_exp2f.c s_expm1.c s_expm1f.c s_fabs.c s_fabsf.c s_fdim.c \ s_exp2.c s_exp2f.c s_expm1.c s_expm1f.c s_fabs.c s_fabsf.c s_fdim.c \
s_finite.c s_finitef.c \ s_finite.c s_finitef.c \
s_floor.c s_floorf.c s_fma.c s_fmaf.c \ s_floor.c s_floorf.c s_fma.c s_fmaf.c \
s_fmax.c s_fmaxf.c s_fmaxl.c s_fmin.c \ s_fmax.c s_fmaxf.c s_fmaxl.c s_fmin.c \
s_fminf.c s_fminl.c s_fpclassify.c \ s_fminf.c s_fminl.c s_fpclassify.c \
s_frexp.c s_frexpf.c s_ilogb.c s_ilogbf.c \ s_frexp.c s_frexpf.c s_ilogb.c s_ilogbf.c \
s_ilogbl.c s_isinf.c s_isfinite.c s_isnormal.c s_isnan.c \ s_ilogbl.c s_isinf.c s_isfinite.c s_isnormal.c s_isnan.c \
s_llrint.c s_llrintf.c s_llround.c s_llroundf.c s_llroundl.c \ s_llrint.c s_llrintf.c s_llround.c s_llroundf.c s_llroundl.c \
s_log1p.c s_log1pf.c s_logb.c s_logbf.c s_lrint.c s_lrintf.c \ s_log1p.c s_log1pf.c s_logb.c s_logbf.c s_lrint.c s_lrintf.c \
s_lround.c s_lroundf.c s_lroundl.c s_modf.c s_modff.c \ s_lround.c s_lroundf.c s_lroundl.c s_modf.c s_modff.c \
s_nearbyint.c s_nextafter.c s_nextafterf.c \ s_nearbyint.c s_nextafter.c s_nextafterf.c \
s_nexttowardf.c s_remquo.c s_remquof.c \ s_nexttowardf.c s_remquo.c s_remquof.c \
s_rint.c s_rintf.c s_round.c s_roundf.c s_roundl.c \ s_rint.c s_rintf.c s_round.c s_roundf.c s_roundl.c \
s_scalbln.c s_scalbn.c s_scalbnf.c s_signbit.c \ s_scalbln.c s_scalbn.c s_scalbnf.c s_signbit.c \
s_signgam.c s_significand.c s_significandf.c s_sin.c s_sinf.c \ s_signgam.c s_significand.c s_significandf.c s_sin.c s_sincos.c \
s_tan.c s_tanf.c s_tanh.c s_tanhf.c s_tgammaf.c s_trunc.c s_truncf.c \ s_sinf.c s_sincosf.c s_tan.c s_tanf.c s_tanh.c s_tanhf.c s_tgammaf.c \
s_cpow.c s_cpowf.c s_cpowl.c \ s_trunc.c s_truncf.c s_cpow.c s_cpowf.c s_cpowl.c \
w_cabs.c w_cabsf.c w_drem.c w_dremf.c w_cabs.c w_cabsf.c w_drem.c w_dremf.c
ifneq ($(OS), WINNT) ifneq ($(OS), WINNT)
$(CUR_SRCS) += s_nan.c $(CUR_SRCS) += s_nan.c
@ -42,18 +42,18 @@ $(CUR_SRCS) += s_copysignl.c s_fabsl.c s_llrintl.c s_lrintl.c s_modfl.c
# If long double != double use these; otherwise, we alias the double versions. # If long double != double use these; otherwise, we alias the double versions.
$(CUR_SRCS) += e_acosl.c e_asinl.c e_atan2l.c e_fmodl.c \ $(CUR_SRCS) += e_acosl.c e_asinl.c e_atan2l.c e_fmodl.c \
e_hypotl.c e_remainderl.c e_sqrtl.c \ e_hypotl.c e_remainderl.c e_sqrtl.c \
s_atanl.c s_ceill.c s_cosl.c s_cprojl.c \ s_atanl.c s_ceill.c s_cosl.c s_cprojl.c \
s_csqrtl.c s_floorl.c s_fmal.c \ s_csqrtl.c s_floorl.c s_fmal.c \
s_frexpl.c s_logbl.c s_nexttoward.c \ s_frexpl.c s_logbl.c s_nexttoward.c \
s_remquol.c \ s_remquol.c \
s_sinl.c s_tanl.c s_truncl.c w_cabsl.c \ s_sinl.c s_sincosl.c s_tanl.c s_truncl.c w_cabsl.c \
s_nextafterl.c s_rintl.c s_scalbnl.c s_nextafterl.c s_rintl.c s_scalbnl.c
# s_cbrtl.c # s_cbrtl.c
# C99 complex functions # C99 complex functions
$(CUR_SRCS) += s_ccosh.c s_ccoshf.c s_cexp.c s_cexpf.c \ $(CUR_SRCS) += s_ccosh.c s_ccoshf.c s_cexp.c s_cexpf.c \
s_cimag.c s_cimagf.c s_cimagl.c \ s_cimag.c s_cimagf.c s_cimagl.c \
s_conj.c s_conjf.c s_conjl.c \ s_conj.c s_conjf.c s_conjl.c \
s_cproj.c s_cprojf.c s_creal.c s_crealf.c s_creall.c \ s_cproj.c s_cprojf.c s_creal.c s_crealf.c s_creall.c \
s_csinh.c s_csinhf.c s_ctanh.c s_ctanhf.c s_csinh.c s_csinhf.c s_ctanh.c s_ctanhf.c

151
src/s_sincos.c Normal file
View file

@ -0,0 +1,151 @@
/* @(#)s_sincos.c 5.1 13/07/15 */
/*
* ====================================================
* Copyright (C) 2013 Elliot Saba. All rights reserved.
*
* Developed at the University of Washington.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
#include "cdefs-compat.h"
/* sincos(x, s, c)
* Several applications need sine and cosine of the same
* angle x. This function computes both at the same time,
* and stores the results in *sin and *cos.
*
* kernel function:
* __kernel_sin ... sine function on [-pi/4,pi/4]
* __kernel_cos ... cose function on [-pi/4,pi/4]
* __ieee754_rem_pio2 ... argument reduction routine
*
* Method.
* Borrow liberally from s_sin.c and s_cos.c, merging
* efforts where applicable and returning their values in
* appropriate variables, thereby slightly reducing the
* amount of work relative to just calling sin/cos(x)
* separately
*
* Special cases:
* Let trig be any of sin, cos, or tan.
* sincos(+-INF, s, c) is NaN, with signals;
* sincos(NaN, s, c) is that NaN;
*/
#include <float.h>
#include "openlibm.h"
//#define INLINE_REM_PIO2
#include "math_private.h"
//#include "e_rem_pio2.c"
/* Constants used in polynomial approximation of sin/cos */
static const double
one = 1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */
half = 5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */
S1 = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */
S2 = 8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */
S3 = -1.98412698298579493134e-04, /* 0xBF2A01A0, 0x19C161D5 */
S4 = 2.75573137070700676789e-06, /* 0x3EC71DE3, 0x57B1FE7D */
S5 = -2.50507602534068634195e-08, /* 0xBE5AE5E6, 0x8A2B9CEB */
S6 = 1.58969099521155010221e-10, /* 0x3DE5D93A, 0x5ACFD57C */
C1 = 4.16666666666666019037e-02, /* 0x3FA55555, 0x5555554C */
C2 = -1.38888888888741095749e-03, /* 0xBF56C16C, 0x16C15177 */
C3 = 2.48015872894767294178e-05, /* 0x3EFA01A0, 0x19CB1590 */
C4 = -2.75573143513906633035e-07, /* 0xBE927E4F, 0x809C52AD */
C5 = 2.08757232129817482790e-09, /* 0x3E21EE9E, 0xBDB4B1C4 */
C6 = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */
void
__kernel_sincos( double x, double y, int iy, double * k_s, double * k_c )
{
/* Inline calculation of sin/cos, as we can save
some work, and we will always need to calculate
both values, no matter the result of switch */
double z, w, r, v, hz;
z = x*x;
w = z*z;
/* cos-specific computation; equivalent to calling
__kernel_cos(x,y) and storing in k_c*/
r = z*(C1+z*(C2+z*C3)) + w*w*(C4+z*(C5+z*C6));
hz = 0.5*z;
v = one-hz;
*k_c = v + (((one-v)-hz) + (z*r-x*y));
/* sin-specific computation; equivalent to calling
__kernel_sin(x,y,1) and storing in k_s*/
r = S2+z*(S3+z*S4) + z*w*(S5+z*S6);
v = z*x;
if(iy == 0)
*k_s = x+v*(S1+z*r);
else
*k_s = x-((z*(half*y-v*r)-y)-v*S1);
}
void
sincos(double x, double * s, double * c)
{
double y[2];
int32_t ix;
/* Store high word of x in ix */
GET_HIGH_WORD(ix,x);
/* |x| ~< pi/4 */
ix &= 0x7fffffff;
if(ix <= 0x3fe921fb) {
/* Check for small x for sin and cos */
if(ix<0x3e46a09e) {
/* Check for exact zero */
if( (int)x==0 ) {
*s = x;
*c = 1.0;
return;
}
}
/* Call kernel function with 0 extra */
__kernel_sincos(x,0.0,0, s, c);
} else if( ix >= 0x7ff00000 ) {
/* sincos(Inf or NaN) is NaN */
*s = x-x;
*c = x-x;
}
/*argument reduction needed*/
else {
double k_c, k_s;
printf( "bleh?\n");
/* Calculate remainer, then sub out to kernel */
int32_t n = __ieee754_rem_pio2(x,y);
__kernel_sincos( y[0], y[1], 1, &k_s, &k_c );
/* Figure out permutation of sin/cos outputs to true outputs */
switch(n&3) {
case 0:
*c = k_c;
*s = k_s;
break;
case 1:
*c = -k_s;
*s = k_c;
break;
case 2:
*c = -k_c;
*s = -k_s;
break;
default:
*c = k_s;
*s = -k_c;
break;
}
}
}
#if (LDBL_MANT_DIG == 53)
__weak_reference(sincos, sincosl);
#endif

162
src/s_sincosf.c Normal file
View file

@ -0,0 +1,162 @@
/* s_sincosf.c -- float version of s_sincos.c
*
* Copyright (C) 2013 Elliot Saba
* Developed at the University of Washington
*
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
#include "cdefs-compat.h"
#include <float.h>
#include "openlibm.h"
//#define INLINE_KERNEL_COSDF
//#define INLINE_KERNEL_SINDF
//#define INLINE_REM_PIO2F
#include "math_private.h"
//#include "e_rem_pio2f.c"
//#include "k_cosf.c"
//#include "k_sinf.c"
/* Constants used in shortcircuits in sincosf */
static const double
sc1pio2 = 1*M_PI_2, /* 0x3FF921FB, 0x54442D18 */
sc2pio2 = 2*M_PI_2, /* 0x400921FB, 0x54442D18 */
sc3pio2 = 3*M_PI_2, /* 0x4012D97C, 0x7F3321D2 */
sc4pio2 = 4*M_PI_2, /* 0x401921FB, 0x54442D18 */
/* Constants used in polynomial approximation of sin/cos */
one = 1.0,
S1 = -0x15555554cbac77.0p-55, /* -0.166666666416265235595 */
S2 = 0x111110896efbb2.0p-59, /* 0.0083333293858894631756 */
S3 = -0x1a00f9e2cae774.0p-65, /* -0.000198393348360966317347 */
S4 = 0x16cd878c3b46a7.0p-71, /* 0.0000027183114939898219064 */
C0 = -0x1ffffffd0c5e81.0p-54, /* -0.499999997251031003120 */
C1 = 0x155553e1053a42.0p-57, /* 0.0416666233237390631894 */
C2 = -0x16c087e80f1e27.0p-62, /* -0.00138867637746099294692 */
C3 = 0x199342e0ee5069.0p-68; /* 0.0000243904487962774090654 */
void
__kernel_sincosdf( double x, float * s, float * c )
{
double r, w, z, v;
z = x*x;
w = z*z;
/* cos-specific computation; equivalent to calling
__kernel_cos(x,y) and storing in k_c*/
r = C2+z*C3;
double k_c = ((one+z*C0) + w*C1) + (w*z)*r;
/* sin-specific computation; equivalent to calling
__kernel_sin(x,y,1) and storing in k_s*/
r = S3+z*S4;
v = z*x;
double k_s = (x + v*(S1+z*S2)) + v*w*r;
*c = k_c;
*s = k_s;
}
void
sincosf(float x, float * s, float * c) {
// Worst approximation of sin and cos NA
*s = x;
*c = x;
double y;
float k_c, k_s;
int32_t n, hx, ix;
GET_FLOAT_WORD(hx,x);
ix = hx & 0x7fffffff;
if(ix <= 0x3f490fda) { /* |x| ~<= pi/4 */
if(ix<0x39800000) { /* |x| < 2**-12 */
/* Check if x is exactly zero */
if(((int)x)==0) {
*s = x;
*c = 1.0f;
return;
}
}
__kernel_sincosdf(x, s, c);
return;
}
/* |x| ~<= 5*pi/4 */
if (ix<=0x407b53d1) {
/* |x| ~<= 3pi/4 */
if(ix<=0x4016cbe3) {
if(hx>0) {
__kernel_sincosdf( sc1pio2 - x, c, s );
}
else {
__kernel_sincosdf( sc1pio2 + x, c, &k_s );
*s = -k_s;
}
} else {
if(hx>0) {
__kernel_sincosdf( sc2pio2 - x, s, &k_c );
*c = -k_c;
} else {
__kernel_sincosdf( -sc2pio2 - x, s, &k_c );
*c = -k_c;
}
}
return;
}
/* |x| ~<= 9*pi/4 */
if(ix<=0x40e231d5) {
/* |x| ~> 7*pi/4 */
if(ix<=0x40afeddf) {
if(hx>0) {
__kernel_sincosdf( x - sc3pio2, c, &k_s );
*s = -k_s;
} else {
__kernel_sincosdf( x + sc3pio2, &k_c, s );
*c = -k_c;
}
}
else {
if( hx > 0 ) {
__kernel_sincosdf( x - sc4pio2, s, c );
} else {
__kernel_sincosdf( x + sc4pio2, s, c );
}
}
return;
}
/* cos(Inf or NaN) is NaN */
else if(ix>=0x7f800000) {
*c = *s = x-x;
} else {
/* general argument reduction needed */
n = __ieee754_rem_pio2f(x,&y);
switch(n&3) {
case 0:
__kernel_sincosdf( y, s, c );
break;
case 1:
__kernel_sincosdf( -y, c, s );
break;
case 2:
__kernel_sincosdf( -y, s, &k_c);
*c = -k_c;
break;
default:
__kernel_sincosdf( -y, &k_c, &k_s );
*c = -k_c;
*s = -k_s;
break;
}
}
}

31
src/s_sincosl.c Normal file
View file

@ -0,0 +1,31 @@
/* s_sincosl.c -- long double version of s_sincos.c
*
* Copyright (C) 2013 Elliot Saba
* Developed at the University of Washington
*
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
#include "cdefs-compat.h"
#include <float.h>
#include "openlibm.h"
#include "math_private.h"
#if LDBL_MANT_DIG == 64
#include "../ld80/e_rem_pio2l.h"
#elif LDBL_MANT_DIG == 113
#include "../ld128/e_rem_pio2l.h"
#else
#error "Unsupported long double format"
#endif
void
sincosl( long double x, long double * s, long double * c )
{
*s = cosl( x );
*c = sinl( x );
}