fixing distribution of random bignums, adding uniformity tests on the results (issue #675)

This commit is contained in:
Alex Shinn 2020-08-07 12:40:01 +09:00
parent 449312d3bd
commit 90f0425c37
2 changed files with 104 additions and 17 deletions

View file

@ -55,12 +55,12 @@ typedef unsigned int sexp_random_t;
sexp sexp_rs_random_integer (sexp ctx, sexp self, sexp_sint_t n, sexp rs, sexp bound) {
sexp_gc_var1(res);
int64_t m;
int i;
sexp_uint_t m;
sexp_int32_t m2;
#if SEXP_USE_BIGNUMS
/* sexp_uint_t mod; */
sexp_uint32_t *data;
sexp_int32_t hi, len, i;
sexp_uint_t *data;
int hi, j;
#endif
if (!sexp_random_source_p(self, rs))
return sexp_type_exception(ctx, self, sexp_unbox_fixnum(sexp_opcode_arg1_type(self)), rs);
@ -69,7 +69,7 @@ sexp sexp_rs_random_integer (sexp ctx, sexp self, sexp_sint_t n, sexp rs, sexp b
res = sexp_xtype_exception(ctx, self, "random bound must be positive", bound);
} else {
/* ensure we have sufficient bits */
for (i=m=0; i <= 1<<(CHAR_BIT*sizeof(m))/RAND_MAX; ++i) {
for (i=m=0; i-1 <= 1<<(CHAR_BIT*sizeof(m))/RAND_MAX; ++i) {
sexp_call_random(rs, m2);
m = m * RAND_MAX + m2;
}
@ -79,12 +79,15 @@ sexp sexp_rs_random_integer (sexp ctx, sexp self, sexp_sint_t n, sexp rs, sexp b
} else if (sexp_bignump(bound)) {
sexp_gc_preserve1(ctx, res);
hi = sexp_bignum_hi(bound);
len = hi * (sizeof(sexp_uint_t) / sizeof(sexp_int32_t));
res = sexp_make_bignum(ctx, hi + 1);
data = (sexp_uint32_t*) sexp_bignum_data(res);
for (i=0; i<len; i++) {
res = sexp_make_bignum(ctx, hi);
data = sexp_bignum_data(res);
for (i=0; i<hi; i++) {
for (j=m=0; j-1 <= 1<<(CHAR_BIT*sizeof(m))/RAND_MAX; ++j) {
sexp_call_random(rs, m2);
data[i] = m2;
m = m * (sexp_uint_t)RAND_MAX + (sexp_uint_t) m2;
}
data[i] = (sexp_uint_t) m;
/* fprintf(stderr, "i: %d j: %d m: %lu (%lx) RAND_MAX: %d\n", i, j, m, (sexp_uint_t)m, RAND_MAX); */
}
res = sexp_remainder(ctx, res, bound);
sexp_gc_release1(ctx);

View file

@ -1,13 +1,64 @@
(define-library (srfi 27 test)
(export run-tests)
(import (chibi)
(import (scheme base)
(scheme flonum)
(scheme inexact)
(scheme vector)
(srfi 27)
(chibi test))
(begin
(define (random-histogram bound n . o)
(let* ((hist (make-vector (if (pair? o) (car o) (min 10 bound)) 0))
(rs (make-random-source))
(rand (random-source-make-integers rs)))
(random-source-pseudo-randomize! rs 23 42)
(do ((i 0 (+ i 1)))
((= i n) hist)
(let* ((a (rand bound))
(b (quotient (* a (vector-length hist)) bound)))
(vector-set! hist b (+ 1 (vector-ref hist b)))))))
(define (loggamma x)
(call-with-values (lambda () (flloggamma x))
(lambda (res sign) res)))
;; continued fraction expansion, borrowed from (chibi math stats)
(define (lower-incomplete-gamma s z)
(let lp ((k 1) (x 1.0) (sum 1.0))
(if (or (= k 1000) (< (/ x sum) 1e-14))
(exp (+ (* s (log z))
(log sum)
(- z)
(- (loggamma (+ s 1.)))))
(let* ((x2 (* x (/ z (+ s k))))
(sum2 (+ sum x2)))
(lp (+ k 1) x2 sum2)))))
(define (chi^2-cdf X^2 df)
(min 1 (lower-incomplete-gamma (/ df 2) (/ X^2 2))))
(define (histogram-uniform? hist . o)
;; ultra-conservative alpha to avoid test failures on false positives
(let* ((alpha (if (pair? o) (car o) 1e-5))
(n (vector-fold + 0 hist))
(len (vector-length hist))
(expected (/ n (inexact len)))
(X^2 (vector-fold
(lambda (X^2 observed)
(+ X^2 (/ (square (- observed expected)) expected)))
0
hist))
(p (- 1.0 (chi^2-cdf X^2 (- len 1)))))
;;(write `(hist: ,hist X^2: ,X^2 p: ,p)) (newline)
(> p alpha)))
(define (run-tests)
(define (test-random rand n)
(test-assert (<= 0 (rand n) (- n 1))))
(test-begin "srfi-27: random")
;; sanity checks
(test 0 (random-integer 1))
(test-assert (<= 0 (random-integer 2) 1))
(test-error (random-integer 0))
(test-error (random-integer -1))
(let ((rs (make-random-source)))
;; chosen by fair dice roll. guaranteed to be random
(random-source-pseudo-randomize! rs 4 4)
@ -22,9 +73,42 @@
;; resetting the state
(test-not (= x (rand 1000000)))
(random-source-state-set! rs state)
;; (test x (rand 1000000))
))
(test 0 (random-integer 1))
(test-error (random-integer 0))
(test-error (random-integer -1)))
;; (test x (rand 1000000)) ;; actually impl defined
)))
;; Distribution Checks.
;; Since we fall back on the libc rand, we can't test the exact
;; result even for a given seed, so we run some conservative
;; statistical tests.
(test-assert
(histogram-uniform? (random-histogram 2 1000))) ; coin
(test-assert
(histogram-uniform? (random-histogram 6 10000))) ; die
(test-assert
(histogram-uniform? (random-histogram 27 10000 27))) ; small prime
;; boundaries
(test-assert
(histogram-uniform? (random-histogram (expt 2 31) 10000)))
(test-assert
(histogram-uniform? (random-histogram (expt 2 32) 10000)))
(test-assert
(histogram-uniform? (random-histogram (- (expt 2 62) 1) 10000)))
;; bignums
(test-assert
(histogram-uniform? (random-histogram (expt 2 62) 10000)))
(test-assert
(histogram-uniform? (random-histogram (expt 2 63) 10000)))
(test-assert
(histogram-uniform? (random-histogram (expt 2 63) 10000 100)))
(test-assert
(histogram-uniform? (random-histogram (- (expt 2 64) 1) 10000)))
(test-assert
(histogram-uniform? (random-histogram (expt 2 64) 10000)))
(test-assert
(histogram-uniform? (random-histogram (+ (expt 2 64) 1) 10000)))
(test-assert
(histogram-uniform? (random-histogram (expt 2 65) 10000)))
(test-assert
(histogram-uniform? (random-histogram (expt 2 164) 10000)))
(test-end))))