Faster bignum division convergence, avoiding pathologically

slow cases on 32-bit machines.
This commit is contained in:
Alex Shinn 2014-06-25 22:37:07 +09:00
parent daa2d96ff0
commit d26de4b701
2 changed files with 43 additions and 10 deletions

View file

@ -342,8 +342,16 @@ sexp sexp_bignum_sub_digits (sexp ctx, sexp dst, sexp a, sexp b) {
bdata = sexp_bignum_data(b); bdata = sexp_bignum_data(b);
cdata = sexp_bignum_data(c); cdata = sexp_bignum_data(c);
for (i=0; i<blen; i++) { for (i=0; i<blen; i++) {
if (adata[i] > bdata[i] || (adata[i] == bdata[i] && !borrow)) {
cdata[i] = adata[i] - bdata[i] - borrow; cdata[i] = adata[i] - bdata[i] - borrow;
borrow = (adata[i] < bdata[i] ? 1 : 0); borrow = 0;
} else {
cdata[i] = (SEXP_UINT_T_MAX - bdata[i]);
cdata[i] += 1;
cdata[i] -= borrow;
cdata[i] += adata[i];
borrow = 1;
}
} }
for ( ; borrow && (i<alen); i++) { for ( ; borrow && (i<alen); i++) {
borrow = (cdata[i] == 0 ? 1 : 0); borrow = (cdata[i] == 0 ? 1 : 0);
@ -470,7 +478,7 @@ sexp sexp_bignum_mul (sexp ctx, sexp dst, sexp a, sexp b) {
} }
sexp sexp_bignum_quot_rem (sexp ctx, sexp *rem, sexp a, sexp b) { sexp sexp_bignum_quot_rem (sexp ctx, sexp *rem, sexp a, sexp b) {
sexp_luint_t d; sexp_luint_t d, dn, dd;
sexp_uint_t dlo, dhi; sexp_uint_t dlo, dhi;
sexp_sint_t alen, blen=sexp_bignum_hi(b), sign=1, off=0; sexp_sint_t alen, blen=sexp_bignum_hi(b), sign=1, off=0;
sexp_gc_var5(q, x, y, a1, b1); sexp_gc_var5(q, x, y, a1, b1);
@ -504,13 +512,34 @@ sexp sexp_bignum_quot_rem (sexp ctx, sexp *rem, sexp a, sexp b) {
sexp_bignum_data(x)[off] = 0; sexp_bignum_data(x)[off] = 0;
if (off > 0) sexp_bignum_data(x)[off-1] = 0; if (off > 0) sexp_bignum_data(x)[off-1] = 0;
off = alen - blen + 1; off = alen - blen + 1;
d = (((sexp_luint_t)sexp_bignum_data(a1)[alen-1] << (sizeof(sexp_uint_t)*8)) dn = (((sexp_luint_t)sexp_bignum_data(a1)[alen-1]
+ sexp_bignum_data(a1)[alen-2]) << (sizeof(sexp_uint_t)*8))
/ (((sexp_luint_t)sexp_bignum_data(b1)[blen-1] << (sizeof(sexp_uint_t)*8)) + sexp_bignum_data(a1)[alen-2]);
dd = (((sexp_luint_t)sexp_bignum_data(b1)[blen-1]
<< (sizeof(sexp_uint_t)*8))
+ sexp_bignum_data(b1)[blen-2]); + sexp_bignum_data(b1)[blen-2]);
if (alen > 2 && blen > 2 &&
sexp_bignum_data(a1)[alen-1] < (1uL<<(sizeof(sexp_uint_t)*4)) &&
sexp_bignum_data(b1)[blen-1] < (1uL<<(sizeof(sexp_uint_t)*4))) {
dn = (dn << (sizeof(sexp_uint_t)*4))
+ (sexp_bignum_data(a1)[alen-3] >> (sizeof(sexp_uint_t)*4));
dd = (dd << (sizeof(sexp_uint_t)*4))
+ (sexp_bignum_data(b1)[blen-3] >> (sizeof(sexp_uint_t)*4));
}
d = dn / dd;
if (d == 0) { if (d == 0) {
d = ((sexp_luint_t)sexp_bignum_data(a1)[alen-1] << (sizeof(sexp_uint_t)*8)) + (sexp_luint_t)sexp_bignum_data(a1)[alen-2]; dn = (((sexp_luint_t)sexp_bignum_data(a1)[alen-1]
d /= sexp_bignum_data(b1)[blen-1]; << (sizeof(sexp_uint_t)*8))
+ sexp_bignum_data(a1)[alen-2]);
dd = sexp_bignum_data(b1)[blen-1];
if (sexp_bignum_data(a1)[alen-1] < (1uL<<(sizeof(sexp_uint_t)*4)) &&
sexp_bignum_data(b1)[blen-1] < (1uL<<(sizeof(sexp_uint_t)*4))) {
dn = (dn << (sizeof(sexp_uint_t)*4))
+ (sexp_bignum_data(a1)[alen-3] >> (sizeof(sexp_uint_t)*4));
dd = (dd << (sizeof(sexp_uint_t)*4))
+ (sexp_bignum_data(b1)[blen-2] >> (sizeof(sexp_uint_t)*4));
}
d = dn / dd;
off--; off--;
} }
dhi = d >> (sizeof(sexp_uint_t)*8); dhi = d >> (sizeof(sexp_uint_t)*8);

View file

@ -167,7 +167,11 @@
(test #f (>= -inf.0 +inf.0)) (test #f (>= -inf.0 +inf.0))
(test #f (> -inf.0 +inf.0)) (test #f (> -inf.0 +inf.0))
;;(test (expt 10 154) (sqrt (expt 10 308))) (test (expt 10 154) (sqrt (expt 10 308)))
(test 36893488147419103231
(- 340282366920938463463374607431768211456
340282366920938463426481119284349108225))
(cond-expand (cond-expand
(ratios (ratios