Add in sincos(), an efficient method of computing the sine and cosine of an angle together

2025-07-04 11:36:53 +02:00 · 2013-07-15 18:28:01 -07:00 · 2013-07-15 18:28:01 -07:00 · 0cf89fad5d
commit 0cf89fad5d
parent 29af332f36
4 changed files with 388 additions and 44 deletions
--- a/src/Make.files
+++ b/src/Make.files
@ -1,37 +1,37 @@
 $(CUR_SRCS) = \
-        e_acos.c e_acosf.c e_acosh.c e_acoshf.c e_asin.c e_asinf.c \
-        e_atan2.c e_atan2f.c e_atanh.c e_atanhf.c e_cosh.c e_coshf.c e_exp.c \
-        e_expf.c e_fmod.c e_fmodf.c e_gamma.c e_gamma_r.c e_gammaf.c \
-        e_gammaf_r.c e_hypot.c e_hypotf.c e_j0.c e_j0f.c e_j1.c e_j1f.c \
-        e_jn.c e_jnf.c e_lgamma.c e_lgamma_r.c e_lgammaf.c e_lgammaf_r.c \
-        e_log.c e_log10.c e_log10f.c e_log2.c e_log2f.c e_logf.c \
-        e_pow.c e_powf.c e_remainder.c e_remainderf.c e_scalb.c e_scalbf.c \
-        e_rem_pio2.c e_rem_pio2f.c \
-        e_sinh.c e_sinhf.c e_sqrt.c e_sqrtf.c \
-        k_cos.c k_exp.c k_expf.c k_rem_pio2.c k_sin.c k_tan.c \
-        k_cosf.c k_sinf.c k_tanf.c \
-        s_asinh.c s_asinhf.c s_atan.c s_atanf.c s_carg.c s_cargf.c s_cargl.c \
-        s_cbrt.c s_cbrtf.c s_ceil.c s_ceilf.c \
-        s_copysign.c s_copysignf.c s_cos.c s_cosf.c \
-        s_csqrt.c s_csqrtf.c s_erf.c s_erff.c \
-        s_exp2.c s_exp2f.c s_expm1.c s_expm1f.c s_fabs.c s_fabsf.c s_fdim.c \
-        s_finite.c s_finitef.c \
-        s_floor.c s_floorf.c s_fma.c s_fmaf.c \
-        s_fmax.c s_fmaxf.c s_fmaxl.c s_fmin.c \
-        s_fminf.c s_fminl.c s_fpclassify.c \
-        s_frexp.c s_frexpf.c s_ilogb.c s_ilogbf.c \
-        s_ilogbl.c s_isinf.c s_isfinite.c s_isnormal.c s_isnan.c \
-        s_llrint.c s_llrintf.c s_llround.c s_llroundf.c s_llroundl.c \
-        s_log1p.c s_log1pf.c s_logb.c s_logbf.c s_lrint.c s_lrintf.c \
-        s_lround.c s_lroundf.c s_lroundl.c s_modf.c s_modff.c \
-        s_nearbyint.c s_nextafter.c s_nextafterf.c \
-        s_nexttowardf.c s_remquo.c s_remquof.c \
-        s_rint.c s_rintf.c s_round.c s_roundf.c s_roundl.c \
-        s_scalbln.c s_scalbn.c s_scalbnf.c s_signbit.c \
-        s_signgam.c s_significand.c s_significandf.c s_sin.c s_sinf.c \
-        s_tan.c s_tanf.c s_tanh.c s_tanhf.c s_tgammaf.c s_trunc.c s_truncf.c \
-	s_cpow.c  s_cpowf.c s_cpowl.c \
-        w_cabs.c w_cabsf.c w_drem.c w_dremf.c
+	e_acos.c e_acosf.c e_acosh.c e_acoshf.c e_asin.c e_asinf.c \
+	e_atan2.c e_atan2f.c e_atanh.c e_atanhf.c e_cosh.c e_coshf.c e_exp.c \
+	e_expf.c e_fmod.c e_fmodf.c e_gamma.c e_gamma_r.c e_gammaf.c \
+	e_gammaf_r.c e_hypot.c e_hypotf.c e_j0.c e_j0f.c e_j1.c e_j1f.c \
+	e_jn.c e_jnf.c e_lgamma.c e_lgamma_r.c e_lgammaf.c e_lgammaf_r.c \
+	e_log.c e_log10.c e_log10f.c e_log2.c e_log2f.c e_logf.c \
+	e_pow.c e_powf.c e_remainder.c e_remainderf.c e_scalb.c e_scalbf.c \
+	e_rem_pio2.c e_rem_pio2f.c \
+	e_sinh.c e_sinhf.c e_sqrt.c e_sqrtf.c \
+	k_cos.c k_exp.c k_expf.c k_rem_pio2.c k_sin.c k_tan.c \
+	k_cosf.c k_sinf.c k_tanf.c \
+	s_asinh.c s_asinhf.c s_atan.c s_atanf.c s_carg.c s_cargf.c s_cargl.c \
+	s_cbrt.c s_cbrtf.c s_ceil.c s_ceilf.c \
+	s_copysign.c s_copysignf.c s_cos.c s_cosf.c \
+	s_csqrt.c s_csqrtf.c s_erf.c s_erff.c \
+	s_exp2.c s_exp2f.c s_expm1.c s_expm1f.c s_fabs.c s_fabsf.c s_fdim.c \
+	s_finite.c s_finitef.c \
+	s_floor.c s_floorf.c s_fma.c s_fmaf.c \
+	s_fmax.c s_fmaxf.c s_fmaxl.c s_fmin.c \
+	s_fminf.c s_fminl.c s_fpclassify.c \
+	s_frexp.c s_frexpf.c s_ilogb.c s_ilogbf.c \
+	s_ilogbl.c s_isinf.c s_isfinite.c s_isnormal.c s_isnan.c \
+	s_llrint.c s_llrintf.c s_llround.c s_llroundf.c s_llroundl.c \
+	s_log1p.c s_log1pf.c s_logb.c s_logbf.c s_lrint.c s_lrintf.c \
+	s_lround.c s_lroundf.c s_lroundl.c s_modf.c s_modff.c \
+	s_nearbyint.c s_nextafter.c s_nextafterf.c \
+	s_nexttowardf.c s_remquo.c s_remquof.c \
+	s_rint.c s_rintf.c s_round.c s_roundf.c s_roundl.c \
+	s_scalbln.c s_scalbn.c s_scalbnf.c s_signbit.c \
+	s_signgam.c s_significand.c s_significandf.c s_sin.c s_sincos.c \
+	s_sinf.c s_sincosf.c s_tan.c s_tanf.c s_tanh.c s_tanhf.c s_tgammaf.c \
+	s_trunc.c s_truncf.c s_cpow.c  s_cpowf.c s_cpowl.c \
+	w_cabs.c w_cabsf.c w_drem.c w_dremf.c

 ifneq ($(OS), WINNT)
 $(CUR_SRCS) += s_nan.c
@ -42,18 +42,18 @@ $(CUR_SRCS) +=	s_copysignl.c s_fabsl.c s_llrintl.c s_lrintl.c s_modfl.c

 # If long double != double use these; otherwise, we alias the double versions.
 $(CUR_SRCS) +=	e_acosl.c e_asinl.c e_atan2l.c e_fmodl.c \
-        e_hypotl.c e_remainderl.c e_sqrtl.c \
-        s_atanl.c s_ceill.c s_cosl.c s_cprojl.c \
-        s_csqrtl.c s_floorl.c s_fmal.c \
-        s_frexpl.c s_logbl.c s_nexttoward.c \
-        s_remquol.c \
-        s_sinl.c s_tanl.c s_truncl.c w_cabsl.c \
-        s_nextafterl.c s_rintl.c s_scalbnl.c
+	e_hypotl.c e_remainderl.c e_sqrtl.c \
+	s_atanl.c s_ceill.c s_cosl.c s_cprojl.c \
+	s_csqrtl.c s_floorl.c s_fmal.c \
+	s_frexpl.c s_logbl.c s_nexttoward.c \
+	s_remquol.c \
+	s_sinl.c s_sincosl.c s_tanl.c s_truncl.c w_cabsl.c \
+	s_nextafterl.c s_rintl.c s_scalbnl.c
 #	s_cbrtl.c

 # C99 complex functions
 $(CUR_SRCS) +=	s_ccosh.c s_ccoshf.c s_cexp.c s_cexpf.c \
-        s_cimag.c s_cimagf.c s_cimagl.c \
-        s_conj.c s_conjf.c s_conjl.c \
-        s_cproj.c s_cprojf.c s_creal.c s_crealf.c s_creall.c \
-        s_csinh.c s_csinhf.c s_ctanh.c s_ctanhf.c
+	s_cimag.c s_cimagf.c s_cimagl.c \
+	s_conj.c s_conjf.c s_conjl.c \
+	s_cproj.c s_cprojf.c s_creal.c s_crealf.c s_creall.c \
+	s_csinh.c s_csinhf.c s_ctanh.c s_ctanhf.c
--- a/src/s_sincos.c
+++ b/src/s_sincos.c
@ -0,0 +1,151 @@
+/* @(#)s_sincos.c 5.1 13/07/15 */
+/*
+ * ====================================================
+ * Copyright (C) 2013 Elliot Saba. All rights reserved.
+ *
+ * Developed at the University of Washington.
+ * Permission to use, copy, modify, and distribute this
+ * software is freely granted, provided that this notice
+ * is preserved.
+ * ====================================================
+ */
+ #include "cdefs-compat.h"
+
+/* sincos(x, s, c)
+ * Several applications need sine and cosine of the same
+ * angle x. This function computes both at the same time,
+ * and stores the results in *sin and *cos.
+ *
+ * kernel function:
+ *	__kernel_sin		... sine function on [-pi/4,pi/4]
+ *	__kernel_cos		... cose function on [-pi/4,pi/4]
+ *	__ieee754_rem_pio2	... argument reduction routine
+ *
+ * Method.
+ *      Borrow liberally from s_sin.c and s_cos.c, merging
+ *  efforts where applicable and returning their values in
+ * appropriate variables, thereby slightly reducing the
+ * amount of work relative to just calling sin/cos(x) 
+ * separately
+ *
+ * Special cases:
+ *      Let trig be any of sin, cos, or tan.
+ *      sincos(+-INF, s, c)  is NaN, with signals;
+ *      sincos(NaN, s, c)    is that NaN;
+ */
+
+#include <float.h>
+
+#include "openlibm.h"
+//#define INLINE_REM_PIO2
+#include "math_private.h"
+//#include "e_rem_pio2.c"
+
+/* Constants used in polynomial approximation of sin/cos */
+static const double
+one =  1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */
+half =  5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */
+S1  = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */
+S2  =  8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */
+S3  = -1.98412698298579493134e-04, /* 0xBF2A01A0, 0x19C161D5 */
+S4  =  2.75573137070700676789e-06, /* 0x3EC71DE3, 0x57B1FE7D */
+S5  = -2.50507602534068634195e-08, /* 0xBE5AE5E6, 0x8A2B9CEB */
+S6  =  1.58969099521155010221e-10, /* 0x3DE5D93A, 0x5ACFD57C */
+C1  =  4.16666666666666019037e-02, /* 0x3FA55555, 0x5555554C */
+C2  = -1.38888888888741095749e-03, /* 0xBF56C16C, 0x16C15177 */
+C3  =  2.48015872894767294178e-05, /* 0x3EFA01A0, 0x19CB1590 */
+C4  = -2.75573143513906633035e-07, /* 0xBE927E4F, 0x809C52AD */
+C5  =  2.08757232129817482790e-09, /* 0x3E21EE9E, 0xBDB4B1C4 */
+C6  = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */
+
+void
+__kernel_sincos( double x, double y, int iy, double * k_s, double * k_c )
+{
+    /* Inline calculation of sin/cos, as we can save
+    some work, and we will always need to calculate
+    both values, no matter the result of switch */
+    double z, w, r, v, hz;
+    z   = x*x;
+    w   = z*z;
+
+    /* cos-specific computation; equivalent to calling
+     __kernel_cos(x,y) and storing in k_c*/
+    r   = z*(C1+z*(C2+z*C3)) + w*w*(C4+z*(C5+z*C6));
+    hz  = 0.5*z;
+    v   = one-hz;
+
+    *k_c = v + (((one-v)-hz) + (z*r-x*y));
+
+    /* sin-specific computation; equivalent to calling
+    __kernel_sin(x,y,1) and storing in k_s*/
+    r   = S2+z*(S3+z*S4) + z*w*(S5+z*S6);
+    v   = z*x;
+    if(iy == 0)
+        *k_s = x+v*(S1+z*r);
+    else
+        *k_s = x-((z*(half*y-v*r)-y)-v*S1);
+}
+
+void
+sincos(double x, double * s, double * c)
+{
+    double y[2];
+    int32_t ix;
+
+    /* Store high word of x in ix */
+    GET_HIGH_WORD(ix,x);
+
+    /* |x| ~< pi/4 */
+    ix &= 0x7fffffff;
+    if(ix <= 0x3fe921fb) {
+        /* Check for small x for sin and cos */
+        if(ix<0x3e46a09e) {
+            /* Check for exact zero */
+            if( (int)x==0 ) {
+                *s = x;
+                *c = 1.0;
+                return;
+            }
+        }
+        /* Call kernel function with 0 extra */
+        __kernel_sincos(x,0.0,0, s, c);
+    } else if( ix >= 0x7ff00000 ) {
+        /* sincos(Inf or NaN) is NaN */
+        *s = x-x;
+        *c = x-x;
+    }
+
+    /*argument reduction needed*/
+    else {
+        double k_c, k_s;
+        printf( "bleh?\n");
+
+        /* Calculate remainer, then sub out to kernel */
+        int32_t n = __ieee754_rem_pio2(x,y);
+        __kernel_sincos( y[0], y[1], 1, &k_s, &k_c );
+
+        /* Figure out permutation of sin/cos outputs to true outputs */
+        switch(n&3) {
+            case 0:
+                *c =  k_c;
+                *s =  k_s;
+                break;
+            case 1:
+                *c = -k_s;
+                *s =  k_c;
+                break;
+            case 2:
+                *c = -k_c;
+                *s = -k_s;
+                break;
+            default:
+                *c =  k_s;
+                *s = -k_c;
+                break;
+        }
+    }
+}
+
+#if (LDBL_MANT_DIG == 53)
+__weak_reference(sincos, sincosl);
+#endif
--- a/src/s_sincosf.c
+++ b/src/s_sincosf.c
@ -0,0 +1,162 @@
+/* s_sincosf.c -- float version of s_sincos.c
+ *
+ * Copyright (C) 2013 Elliot Saba
+ * Developed at the University of Washington
+ *
+ * Permission to use, copy, modify, and distribute this
+ * software is freely granted, provided that this notice
+ * is preserved.
+ * ====================================================
+*/
+
+#include "cdefs-compat.h"
+#include <float.h>
+#include "openlibm.h"
+
+//#define	INLINE_KERNEL_COSDF
+//#define	INLINE_KERNEL_SINDF
+//#define INLINE_REM_PIO2F
+#include "math_private.h"
+//#include "e_rem_pio2f.c"
+//#include "k_cosf.c"
+//#include "k_sinf.c"
+
+
+/* Constants used in shortcircuits in sincosf */
+static const double
+sc1pio2 = 1*M_PI_2,			/* 0x3FF921FB, 0x54442D18 */
+sc2pio2 = 2*M_PI_2,			/* 0x400921FB, 0x54442D18 */
+sc3pio2 = 3*M_PI_2,			/* 0x4012D97C, 0x7F3321D2 */
+sc4pio2 = 4*M_PI_2,			/* 0x401921FB, 0x54442D18 */
+
+/* Constants used in polynomial approximation of sin/cos */
+one =  1.0,
+S1 = -0x15555554cbac77.0p-55,	/* -0.166666666416265235595 */
+S2 =  0x111110896efbb2.0p-59,	/*  0.0083333293858894631756 */
+S3 = -0x1a00f9e2cae774.0p-65,	/* -0.000198393348360966317347 */
+S4 =  0x16cd878c3b46a7.0p-71,	/*  0.0000027183114939898219064 */
+C0  = -0x1ffffffd0c5e81.0p-54,	/* -0.499999997251031003120 */
+C1  =  0x155553e1053a42.0p-57,	/*  0.0416666233237390631894 */
+C2  = -0x16c087e80f1e27.0p-62,	/* -0.00138867637746099294692 */
+C3  =  0x199342e0ee5069.0p-68;	/*  0.0000243904487962774090654 */
+
+void
+__kernel_sincosdf( double x, float * s, float * c )
+{
+	double r, w, z, v;
+	z = x*x;
+	w = z*z;
+
+	/* cos-specific computation; equivalent to calling
+     __kernel_cos(x,y) and storing in k_c*/
+	r = C2+z*C3;
+	double k_c = ((one+z*C0) + w*C1) + (w*z)*r;
+
+	/* sin-specific computation; equivalent to calling
+    __kernel_sin(x,y,1) and storing in k_s*/
+	r = S3+z*S4;
+	v = z*x;
+	double k_s = (x + v*(S1+z*S2)) + v*w*r;
+
+	*c = k_c;
+	*s = k_s;
+}
+
+void
+sincosf(float x, float * s, float * c) {
+	// Worst approximation of sin and cos NA
+	*s = x;
+	*c = x;
+
+	double y;
+	float k_c, k_s;
+	int32_t n, hx, ix;
+
+	GET_FLOAT_WORD(hx,x);
+	ix = hx & 0x7fffffff;
+
+	if(ix <= 0x3f490fda) {		/* |x| ~<= pi/4 */
+	    if(ix<0x39800000) {		/* |x| < 2**-12 */
+			/* Check if x is exactly zero */
+			if(((int)x)==0) {
+				*s = x;
+				*c = 1.0f;
+				return;
+			}
+		}
+		__kernel_sincosdf(x, s, c);
+		return;
+	}
+	/* |x| ~<= 5*pi/4 */
+	if (ix<=0x407b53d1) {
+		/* |x| ~<= 3pi/4 */
+	    if(ix<=0x4016cbe3) {
+			if(hx>0) {
+				__kernel_sincosdf( sc1pio2 - x, c, s );
+			}
+			else {
+				__kernel_sincosdf( sc1pio2 + x, c, &k_s );
+				*s = -k_s;
+		    }
+		} else {
+
+			if(hx>0) {
+				__kernel_sincosdf( sc2pio2 - x, s, &k_c );
+				*c = -k_c;
+			} else  {
+				__kernel_sincosdf( -sc2pio2 - x, s, &k_c );
+				*c = -k_c;
+			}
+		}
+		return;
+	}
+
+	/* |x| ~<= 9*pi/4 */
+	if(ix<=0x40e231d5) {
+		/* |x|  ~> 7*pi/4 */
+	    if(ix<=0x40afeddf) {	
+			if(hx>0) {
+				__kernel_sincosdf( x - sc3pio2, c, &k_s );
+				*s = -k_s;
+			} else {
+				__kernel_sincosdf( x + sc3pio2, &k_c, s );
+				*c = -k_c;
+		    } 
+		}
+		else {
+	    	if( hx > 0 ) {
+	    		__kernel_sincosdf( x - sc4pio2, s, c );
+	    	} else {
+	    		__kernel_sincosdf( x + sc4pio2, s, c );
+	    	}
+	    }
+		return;
+	}
+
+	/* cos(Inf or NaN) is NaN */
+	else if(ix>=0x7f800000) {
+		*c = *s = x-x;
+	} else {
+		/* general argument reduction needed */
+		n = __ieee754_rem_pio2f(x,&y);
+
+		switch(n&3) {
+			case 0:
+				__kernel_sincosdf( y, s, c );
+				break;
+			case 1:
+				__kernel_sincosdf( -y, c, s );
+				break;
+			case 2: 
+				__kernel_sincosdf( -y, s, &k_c);
+				*c = -k_c;
+				break;
+			default:
+				__kernel_sincosdf( -y, &k_c, &k_s );
+				*c = -k_c;
+				*s = -k_s;
+				break;
+	    }
+	}
+
+}
--- a/src/s_sincosl.c
+++ b/src/s_sincosl.c
@ -0,0 +1,31 @@
+/* s_sincosl.c -- long double version of s_sincos.c
+ *
+ * Copyright (C) 2013 Elliot Saba
+ * Developed at the University of Washington
+ *
+ * Permission to use, copy, modify, and distribute this
+ * software is freely granted, provided that this notice
+ * is preserved.
+ * ====================================================
+*/
+
+ #include "cdefs-compat.h"
+
+ #include <float.h>
+
+#include "openlibm.h"
+#include "math_private.h"
+#if LDBL_MANT_DIG == 64
+#include "../ld80/e_rem_pio2l.h"
+#elif LDBL_MANT_DIG == 113
+#include "../ld128/e_rem_pio2l.h"
+#else
+#error "Unsupported long double format"
+#endif
+
+ void
+ sincosl( long double x, long double * s, long double * c )
+ {
+ 	*s = cosl( x );
+ 	*c = sinl( x );
+ }