diff --git a/srfi/sorting/delndups.scm b/srfi/sorting/delndups.scm new file mode 100644 index 00000000..44e2c73a --- /dev/null +++ b/srfi/sorting/delndups.scm @@ -0,0 +1,186 @@ +;;; The sort package -- delete neighboring duplicate elts +;;; Copyright (c) 1998 by Olin Shivers. +;;; This code is open-source; see the end of the file for porting and +;;; more copyright information. +;;; Olin Shivers 11/98. + +;;; Problem: +;;; vector-delete-neighbor-dups pushes N stack frames, where N is the number +;;; of elements in the answer vector. This is arguably a very efficient thing +;;; to do, but it might blow out on a system with a limited stack but a big +;;; heap. We could rewrite this to "chunk" up answers in temp vectors if we +;;; push more than a certain number of frames, then allocate a final answer, +;;; copying all the chunks into the answer. But it's much more complex code. + +;;; Exports: +;;; (list-delete-neighbor-dups = lis) -> list +;;; (list-delete-neighbor-dups! = lis) -> list +;;; (vector-delete-neighbor-dups = v [start end]) -> vector +;;; (vector-delete-neighbor-dups! = v [start end]) -> end' + +;;; These procedures delete adjacent duplicate elements from a list or +;;; a vector, using a given element equality procedure. The first or leftmost +;;; element of a run of equal elements is the one that survives. The list +;;; or vector is not otherwise disordered. +;;; +;;; These procedures are linear time -- much faster than the O(n^2) general +;;; duplicate-elt deletors that do not assume any "bunching" of elements. +;;; If you want to delete duplicate elements from a large list or vector, +;;; sort the elements to bring equal items together, then use one of these +;;; procedures -- for a total time of O(n lg n). + +;;; LIST-DELETE-NEIGHBOR-DUPS +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Below are multiple versions of the LIST-DELETE-NEIGHBOR-DUPS procedure, +;;; from simple to complex. RECUR's contract: Strip off any leading X's from +;;; LIS, and return that list neighbor-dup-deleted. +;;; +;;; The final version +;;; - shares a common subtail between the input & output list, up to 1024 +;;; elements; +;;; - Needs no more than 1024 stack frames. + +#; +;;; Simplest version. +;;; - Always allocates a fresh list / never shares storage. +;;; - Needs N stack frames, if answer is length N. +(define (list-delete-neighbor-dups = lis) + (if (pair? lis) + (let ((x0 (car lis))) + (cons x0 (let recur ((x0 x0) (xs (cdr lis))) + (if (pair? xs) + (let ((x1 (car xs)) + (x2+ (cdr xs))) + (if (= x0 x1) + (recur x0 x2+) ; Loop, actually. + (cons x1 (recur x1 x2+)))) + xs)))) + lis)) + +;;; This version tries to use cons cells from input by sharing longest +;;; common tail between input & output. Still needs N stack frames, for ans +;;; of length N. +(define (list-delete-neighbor-dups = lis) + (if (pair? lis) + (let* ((x0 (car lis)) + (xs (cdr lis)) + (ans (let recur ((x0 x0) (xs xs)) + (if (pair? xs) + (let ((x1 (car xs)) + (x2+ (cdr xs))) + (if (= x0 x1) + (recur x0 x2+) + (let ((ans-tail (recur x1 x2+))) + (if (eq? ans-tail x2+) xs + (cons x1 ans-tail))))) + xs)))) + (if (eq? ans xs) lis (cons x0 ans))) + + lis)) + +;;; LIST-DELETE-NEIGHBOR-DUPS! +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; This code runs in constant list space, constant stack, and also +;;; does only the minimum SET-CDR!'s necessary. + +(define (list-delete-neighbor-dups! = lis) + (if (pair? lis) + (let lp1 ((prev lis) (prev-elt (car lis)) (lis (cdr lis))) + (if (pair? lis) + (let ((lis-elt (car lis)) + (next (cdr lis))) + (if (= prev-elt lis-elt) + + ;; We found the first elts of a run of dups, so we know + ;; we're going to have to do a SET-CDR!. Scan to the end of + ;; the run, do the SET-CDR!, and loop on LP1. + (let lp2 ((lis next)) + (if (pair? lis) + (let ((lis-elt (car lis)) + (next (cdr lis))) + (if (= prev-elt lis-elt) + (lp2 next) + (begin (set-cdr! prev lis) + (lp1 lis lis-elt next)))) + (set-cdr! prev lis))) ; Ran off end => quit. + + (lp1 lis lis-elt next)))))) + lis) + + +(define (vector-delete-neighbor-dups elt= v . maybe-start+end) + (call-with-values + (lambda () (vector-start+end v maybe-start+end)) + (lambda (start end) + (if (< start end) + (let* ((x (vector-ref v start)) + (ans (let recur ((x x) (i start) (j 1)) + (if (< i end) + (let ((y (vector-ref v i)) + (nexti (+ i 1))) + (if (elt= x y) + (recur x nexti j) + (let ((ansvec (recur y nexti (+ j 1)))) + (vector-set! ansvec j y) + ansvec))) + (make-vector j))))) + (vector-set! ans 0 x) + ans) + '#())))) + + +;;; Packs the surviving elements to the left, in range [start,end'), +;;; and returns END'. +(define (vector-delete-neighbor-dups! elt= v . maybe-start+end) + (call-with-values + (lambda () (vector-start+end v maybe-start+end)) + (lambda (start end) + + (if (>= start end) + end + ;; To eliminate unnecessary copying (read elt i then write the value + ;; back at index i), we scan until we find the first dup. + (let skip ((j start) (vj (vector-ref v start))) + (let ((j+1 (+ j 1))) + (if (>= j+1 end) + end + (let ((vj+1 (vector-ref v j+1))) + (if (not (elt= vj vj+1)) + (skip j+1 vj+1) + + ;; OK -- j & j+1 are dups, so we're committed to moving + ;; data around. In lp2, v[start,j] is what we've done; + ;; v[k,end) is what we have yet to handle. + (let lp2 ((j j) (vj vj) (k (+ j 2))) + (let lp3 ((k k)) + (if (>= k end) + (+ j 1) ; Done. + (let ((vk (vector-ref v k)) + (k+1 (+ k 1))) + (if (elt= vj vk) + (lp3 k+1) + (let ((j+1 (+ j 1))) + (vector-set! v j+1 vk) + (lp2 j+1 vk k+1)))))))))))))))) + +;;; Copyright +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; This code is +;;; Copyright (c) 1998 by Olin Shivers. +;;; The terms are: You may do as you please with this code, as long as +;;; you do not delete this notice or hold me responsible for any outcome +;;; related to its use. +;;; +;;; Blah blah blah. Don't you think source files should contain more lines +;;; of code than copyright notice? +;;; +;;; Code porting +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; +;;; If your Scheme has a faster mechanism for handling optional arguments +;;; (e.g., Chez), you should definitely port over to it. Note that argument +;;; defaulting and error-checking are interleaved -- you don't have to +;;; error-check defaulted START/END args to see if they are fixnums that are +;;; legal vector indices for the corresponding vector, etc. + + diff --git a/srfi/sorting/lmsort.scm b/srfi/sorting/lmsort.scm new file mode 100644 index 00000000..28211df6 --- /dev/null +++ b/srfi/sorting/lmsort.scm @@ -0,0 +1,386 @@ +;;; list merge & list merge-sort -*- Scheme -*- +;;; Copyright (c) 1998 by Olin Shivers. +;;; This code is open-source; see the end of the file for porting and +;;; more copyright information. +;;; Olin Shivers + +;;; Exports: +;;; (list-merge < lis lis) -> list +;;; (list-merge! < lis lis) -> list +;;; (list-merge-sort < lis) -> list +;;; (list-merge-sort! < lis) -> list + +;;; A stable list merge sort of my own device +;;; Two variants: pure & destructive +;;; +;;; This list merge sort is opportunistic (a "natural" sort) -- it exploits +;;; existing order in the input set. Instead of recursing all the way down to +;;; individual elements, the leaves of the merge tree are maximal contiguous +;;; runs of elements from the input list. So the algorithm does very well on +;;; data that is mostly ordered, with a best-case time of O(n) when the input +;;; list is already completely sorted. In any event, worst-case time is +;;; O(n lg n). +;;; +;;; The destructive variant is "in place," meaning that it allocates no new +;;; cons cells at all; it just rearranges the pairs of the input list with +;;; SET-CDR! to order it. +;;; +;;; The interesting control structure is the combination recursion/iteration +;;; of the core GROW function that does an "opportunistic" DFS walk of the +;;; merge tree, adaptively subdividing in response to the length of the +;;; merges, without requiring any auxiliary data structures beyond the +;;; recursion stack. It's actually quite simple -- ten lines of code. +;;; -Olin Shivers 10/20/98 + +;;; (mlet ((var-list mv-exp) ...) body ...) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; A LET* form that handles multiple values. Move this into the two clients +;;; if you don't have a module system handy to restrict its visibility... +(define-syntax mlet ; Multiple-value LET* + (syntax-rules () + ((mlet ((() exp) rest ...) body ...) + (begin exp (mlet (rest ...) body ...))) + + ((mlet (((var) exp) rest ...) body ...) + (let ((var exp)) (mlet (rest ...) body ...))) + + ((mlet ((vars exp) rest ...) body ...) + (call-with-values (lambda () exp) + (lambda vars (mlet (rest ...) body ...)))) + + ((mlet () body ...) (begin body ...)))) + + +;;; (list-merge-sort < lis) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; A natural, stable list merge sort. +;;; - natural: picks off maximal contiguous runs of pre-ordered data. +;;; - stable: won't invert the order of equal elements in the input list. + +(define (list-merge-sort elt< lis) + + ;; (getrun lis) -> run runlen rest + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ;; Pick a run of non-decreasing data off of non-empty list LIS. + ;; Return the length of this run, and the following list. + (define (getrun lis) + (let lp ((ans '()) (i 1) (prev (car lis)) (xs (cdr lis))) + (if (pair? xs) + (let ((x (car xs))) + (if (elt< x prev) + (values (append-reverse ans (cons prev '())) i xs) + (lp (cons prev ans) (+ i 1) x (cdr xs)))) + (values (append-reverse ans (cons prev '())) i xs)))) + + (define (append-reverse rev-head tail) + (let lp ((rev-head rev-head) (tail tail)) + (if (null-list? rev-head) tail + (lp (cdr rev-head) (cons (car rev-head) tail))))) + + (define (null-list? l) + (cond ((pair? l) #f) + ((null? l) #t) + (else (error "argument out of domain" l)))) + + ;; (merge a b) -> list + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ;; List merge -- stably merge lists A (length > 0) & B (length > 0). + ;; This version requires up to |a|+|b| stack frames. + (define (merge a b) + (let recur ((x (car a)) (a a) + (y (car b)) (b b)) + (if (elt< y x) + (cons y (let ((b (cdr b))) + (if (pair? b) + (recur x a (car b) b) + a))) + (cons x (let ((a (cdr a))) + (if (pair? a) + (recur (car a) a y b) + b)))))) + + ;; (grow s ls ls2 u lw) -> [a la unused] + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ;; The core routine. Read the next 20 lines of comments & all is obvious. + ;; - S is a sorted list of length LS > 1. + ;; - LS2 is some power of two <= LS. + ;; - U is an unsorted list. + ;; - LW is a positive integer. + ;; Starting with S, and taking data from U as needed, produce + ;; a sorted list of *at least* length LW, if there's enough data + ;; (LW <= LS + length(U)), or use all of U if not. + ;; + ;; GROW takes maximal contiguous runs of data from U at a time; + ;; it is allowed to return a list *longer* than LW if it gets lucky + ;; with a long run. + ;; + ;; The key idea: If you want a merge operation to "pay for itself," the two + ;; lists being merged should be about the same length. Remember that. + ;; + ;; Returns: + ;; - A: The result list + ;; - LA: The length of the result list + ;; - UNUSED: The unused tail of U. + + (define (grow s ls ls2 u lw) ; The core of the sort algorithm. + (if (or (<= lw ls) (not (pair? u))) ; Met quota or out of data? + (values s ls u) ; If so, we're done. + (mlet (((ls2) (let lp ((ls2 ls2)) + (let ((ls2*2 (+ ls2 ls2))) + (if (<= ls2*2 ls) (lp ls2*2) ls2)))) + ;; LS2 is now the largest power of two <= LS. + ;; (Just think of it as being roughly LS.) + ((r lr u2) (getrun u)) ; Get a run, then + ((t lt u3) (grow r lr 1 u2 ls2))) ; grow it up to be T. + (grow (merge s t) (+ ls lt) ; Merge S & T, + (+ ls2 ls2) u3 lw)))) ; and loop. + + ;; Note: (LENGTH LIS) or any constant guaranteed + ;; to be greater can be used in place of INFINITY. + (if (pair? lis) ; Don't sort an empty list. + (mlet (((r lr tail) (getrun lis)) ; Pick off an initial run, + ((infinity) #o100000000) ; then grow it up maximally. + ((a la v) (grow r lr 1 tail infinity))) + a) + '())) + + +;;; (list-merge-sort! < lis) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; A natural, stable, destructive, in-place list merge sort. +;;; - natural: picks off maximal contiguous runs of pre-ordered data. +;;; - stable: won't invert the order of equal elements in the input list. +;;; - destructive, in-place: this routine allocates no extra working memory; +;;; it simply rearranges the list with SET-CDR! operations. + +(define (list-merge-sort! elt< lis) + ;; (getrun lis) -> runlen last rest + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ;; Pick a run of non-decreasing data off of non-empty list LIS. + ;; Return the length of this run, the last cons cell of the run, + ;; and the following list. + (define (getrun lis) + (let lp ((lis lis) (x (car lis)) (i 1) (next (cdr lis))) + (if (pair? next) + (let ((y (car next))) + (if (elt< y x) + (values i lis next) + (lp next y (+ i 1) (cdr next)))) + (values i lis next)))) + + ;; (merge! a enda b endb) -> [m endm] + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ;; Destructively and stably merge non-empty lists A & B. + ;; The last cons of A is ENDA. (The cdr of ENDA can be non-nil.) + ;; the last cons of B is ENDB. (The cdr of ENDB can be non-nil.) + ;; + ;; Return the first and last cons cells of the merged list. + ;; This routine is iterative & in-place: it runs in constant stack and + ;; doesn't allocate any cons cells. It is also tedious but simple; don't + ;; bother reading it unless necessary. + (define (merge! a enda b endb) + ;; The logic of these two loops is completely driven by these invariants: + ;; SCAN-A: (CDR PREV) = A. X = (CAR A). Y = (CAR B). + ;; SCAN-B: (CDR PREV) = B. X = (CAR A). Y = (CAR B). + (letrec ((scan-a (lambda (prev x a y b) ; Zip down A until we + (cond ((elt< y x) ; find an elt > (CAR B). + (set-cdr! prev b) + (let ((next-b (cdr b))) + (if (eq? b endb) + (begin (set-cdr! b a) enda) ; Done. + (scan-b b x a (car next-b) next-b)))) + + ((eq? a enda) (maybe-set-cdr! a b) endb) ; Done. + + (else (let ((next-a (cdr a))) ; Continue scan. + (scan-a a (car next-a) next-a y b)))))) + + (scan-b (lambda (prev x a y b) ; Zip down B while its + (cond ((elt< y x) ; elts are < (CAR A). + (if (eq? b endb) + (begin (set-cdr! b a) enda) ; Done. + (let ((next-b (cdr b))) ; Continue scan. + (scan-b b x a (car next-b) next-b)))) + + (else (set-cdr! prev a) + (if (eq? a enda) + (begin (maybe-set-cdr! a b) endb) ; Done. + (let ((next-a (cdr a))) + (scan-a a (car next-a) next-a y b))))))) + + ;; This guy only writes if he has to. Called at most once. + ;; Pointer equality rules; pure languages are for momma's boys. + (maybe-set-cdr! (lambda (pair val) (if (not (eq? (cdr pair) val)) + (set-cdr! pair val))))) + + (let ((x (car a)) (y (car b))) + (if (elt< y x) + + ;; B starts the answer list. + (values b (if (eq? b endb) + (begin (set-cdr! b a) enda) + (let ((next-b (cdr b))) + (scan-b b x a (car next-b) next-b)))) + + ;; A starts the answer list. + (values a (if (eq? a enda) + (begin (maybe-set-cdr! a b) endb) + (let ((next-a (cdr a))) + (scan-a a (car next-a) next-a y b)))))))) + + ;; (grow s ends ls ls2 u lw) -> [a enda la unused] + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ;; The core routine. + ;; - S is a sorted list of length LS > 1, with final cons cell ENDS. + ;; (CDR ENDS) doesn't have to be nil. + ;; - LS2 is some power of two <= LS. + ;; - U is an unsorted list. + ;; - LW is a positive integer. + ;; Starting with S, and taking data from U as needed, produce + ;; a sorted list of *at least* length LW, if there's enough data + ;; (LW <= LS + length(U)), or use all of U if not. + ;; + ;; GROW takes maximal contiguous runs of data from U at a time; + ;; it is allowed to return a list *longer* than LW if it gets lucky + ;; with a long run. + ;; + ;; The key idea: If you want a merge operation to "pay for itself," the two + ;; lists being merged should be about the same length. Remember that. + ;; + ;; Returns: + ;; - A: The result list (not properly terminated) + ;; - ENDA: The last cons cell of the result list. + ;; - LA: The length of the result list + ;; - UNUSED: The unused tail of U. + (define (grow s ends ls ls2 u lw) + (if (and (pair? u) (< ls lw)) + + ;; We haven't met the LW quota but there's still some U data to use. + (mlet (((ls2) (let lp ((ls2 ls2)) + (let ((ls2*2 (+ ls2 ls2))) + (if (<= ls2*2 ls) (lp ls2*2) ls2)))) + ;; LS2 is now the largest power of two <= LS. + ;; (Just think of it as being roughly LS.) + ((lr endr u2) (getrun u)) ; Get a run from U; + ((t endt lt u3) (grow u endr lr 1 u2 ls2)) ; grow it up to be T. + ((st end-st) (merge! s ends t endt))) ; Merge S & T, + (grow st end-st (+ ls lt) (+ ls2 ls2) u3 lw)) ; then loop. + + (values s ends ls u))) ; Done -- met LW quota or ran out of data. + + ;; Note: (LENGTH LIS) or any constant guaranteed + ;; to be greater can be used in place of INFINITY. + (if (pair? lis) + (mlet (((lr endr rest) (getrun lis)) ; Pick off an initial run. + ((infinity) #o100000000) ; Then grow it up maximally. + ((a enda la v) (grow lis endr lr 1 rest infinity))) + (set-cdr! enda '()) ; Nil-terminate answer. + a) ; We're done. + + '())) ; Don't sort an empty list. + + +;;; Merge +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; These two merge procedures are stable -- ties favor list A. + +(define (list-merge < a b) + (cond ((not (pair? a)) b) + ((not (pair? b)) a) + (else (let recur ((x (car a)) (a a) ; A is a pair; X = (CAR A). + (y (car b)) (b b)) ; B is a pair; Y = (CAR B). + (if (< y x) + + (let ((b (cdr b))) + (if (pair? b) + (cons y (recur x a (car b) b)) + (cons y a))) + + (let ((a (cdr a))) + (if (pair? a) + (cons x (recur (car a) a y b)) + (cons x b)))))))) + + +;;; This destructive merge does as few SET-CDR!s as it can -- for example, if +;;; the list is already sorted, it does no SET-CDR!s at all. It is also +;;; iterative, running in constant stack. + +(define (list-merge! < a b) + ;; The logic of these two loops is completely driven by these invariants: + ;; SCAN-A: (CDR PREV) = A. X = (CAR A). Y = (CAR B). + ;; SCAN-B: (CDR PREV) = B. X = (CAR A). Y = (CAR B). + (letrec ((scan-a (lambda (prev a x b y) ; Zip down A doing + (if (< y x) ; no SET-CDR!s until + (let ((next-b (cdr b))) ; we hit a B elt that + (set-cdr! prev b) ; has to be inserted. + (if (pair? next-b) + (scan-b b a x next-b (car next-b)) + (set-cdr! b a))) + + (let ((next-a (cdr a))) + (if (pair? next-a) + (scan-a a next-a (car next-a) b y) + (set-cdr! a b)))))) + + (scan-b (lambda (prev a x b y) ; Zip down B doing + (if (< y x) ; no SET-CDR!s until + (let ((next-b (cdr b))) ; we hit an A elt that + (if (pair? next-b) ; has to be + (scan-b b a x next-b (car next-b)) ; inserted. + (set-cdr! b a))) + + (let ((next-a (cdr a))) + (set-cdr! prev a) + (if (pair? next-a) + (scan-a a next-a (car next-a) b y) + (set-cdr! a b))))))) + + (cond ((not (pair? a)) b) + ((not (pair? b)) a) + + ;; B starts the answer list. + ((< (car b) (car a)) + (let ((next-b (cdr b))) + (if (null? next-b) + (set-cdr! b a) + (scan-b b a (car a) next-b (car next-b)))) + b) + + ;; A starts the answer list. + (else (let ((next-a (cdr a))) + (if (null? next-a) + (set-cdr! a b) + (scan-a a next-a (car next-a) b (car b)))) + a)))) + + +;;; Copyright +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; This code is +;;; Copyright (c) 1998 by Olin Shivers. +;;; The terms are: You may do as you please with this code, as long as +;;; you do not delete this notice or hold me responsible for any outcome +;;; related to its use. +;;; +;;; Blah blah blah. + + +;;; Code tuning & porting +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; This is very portable code. It's R4RS with the following exceptions: +;;; - The R5RS multiple-value VALUES & CALL-WITH-VALUES procedures for +;;; handling multiple-value return. +;;; +;;; This code is *tightly* bummed as far as I can go in portable Scheme. +;;; +;;; - The fixnum arithmetic in LIST-MERGE-SORT! and COUNTED-LIST-MERGE! +;;; that could be safely switched over to unsafe, fixnum-specific ops, +;;; if you're sure that 2*maxlen is a fixnum, where maxlen is the length +;;; of the longest list you could ever have. +;;; +;;; - I typically write my code in a style such that every CAR and CDR +;;; application is protected by an upstream PAIR?. This is the case in this +;;; code, so all the CAR's and CDR's could safely switched over to unsafe +;;; versions. But check over the code before you do it, in case the source +;;; has been altered since I wrote this. diff --git a/srfi/sorting/median.scm b/srfi/sorting/median.scm new file mode 100644 index 00000000..36a4beda --- /dev/null +++ b/srfi/sorting/median.scm @@ -0,0 +1,30 @@ +;;;; Finding the median of a vector +;; This involves sorting the vector, which is why it's part +;; of this package. + +(define (vector-find-median < v knil . maybe-mean) + (define mean (if (null? maybe-mean) + (lambda (a b) (/ (+ a b) 2)) + (car maybe-mean))) + (define len (vector-length v)) + (define newv (vector-sort < v)) + (cond + ((= len 0) knil) + ((odd? len) (vector-ref newv (/ (- len 1) 2))) + (else (mean + (vector-ref newv (- (/ len 2) 1)) + (vector-ref newv (/ len 2)))))) + +(define (vector-find-median! < v knil . maybe-mean) + (define mean (if (null? maybe-mean) + (lambda (a b) (/ (+ a b) 2)) + (car maybe-mean))) + (define len (vector-length v)) + (define newv (vector-sort! < v)) + (cond + ((= len 0) knil) + ((odd? len) (vector-ref newv (/ (- len 1) 2))) + (else (mean + (vector-ref newv (- (/ len 2) 1)) + (vector-ref newv (/ len 2)))))) + diff --git a/srfi/sorting/merge.scm b/srfi/sorting/merge.scm new file mode 100644 index 00000000..37335a87 --- /dev/null +++ b/srfi/sorting/merge.scm @@ -0,0 +1,226 @@ +;;; This file extracts four merge procedures from lmsort.scm and vmsort.scm +;;; files written by Olin Shivers. + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; +;;; Start of code extracted from Olin's lmsort.scm file. +;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; list merge & list merge-sort -*- Scheme -*- +;;; Copyright (c) 1998 by Olin Shivers. +;;; This code is open-source; see the end of the file for porting and +;;; more copyright information. +;;; Olin Shivers + +;;; Exports: +;;; (list-merge < lis lis) -> list +;;; (list-merge! < lis lis) -> list + +;;; Merge +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; These two merge procedures are stable -- ties favor list A. + +(define (list-merge < a b) + (cond ((not (pair? a)) b) + ((not (pair? b)) a) + (else (let recur ((x (car a)) (a a) ; A is a pair; X = (CAR A). + (y (car b)) (b b)) ; B is a pair; Y = (CAR B). + (if (< y x) + + (let ((b (cdr b))) + (if (pair? b) + (cons y (recur x a (car b) b)) + (cons y a))) + + (let ((a (cdr a))) + (if (pair? a) + (cons x (recur (car a) a y b)) + (cons x b)))))))) + + +;;; This destructive merge does as few SET-CDR!s as it can -- for example, if +;;; the list is already sorted, it does no SET-CDR!s at all. It is also +;;; iterative, running in constant stack. + +(define (list-merge! < a b) + ;; The logic of these two loops is completely driven by these invariants: + ;; SCAN-A: (CDR PREV) = A. X = (CAR A). Y = (CAR B). + ;; SCAN-B: (CDR PREV) = B. X = (CAR A). Y = (CAR B). + (letrec ((scan-a (lambda (prev a x b y) ; Zip down A doing + (if (< y x) ; no SET-CDR!s until + (let ((next-b (cdr b))) ; we hit a B elt that + (set-cdr! prev b) ; has to be inserted. + (if (pair? next-b) + (scan-b b a x next-b (car next-b)) + (set-cdr! b a))) + + (let ((next-a (cdr a))) + (if (pair? next-a) + (scan-a a next-a (car next-a) b y) + (set-cdr! a b)))))) + + (scan-b (lambda (prev a x b y) ; Zip down B doing + (if (< y x) ; no SET-CDR!s until + (let ((next-b (cdr b))) ; we hit an A elt that + (if (pair? next-b) ; has to be + (scan-b b a x next-b (car next-b)) ; inserted. + (set-cdr! b a))) + + (let ((next-a (cdr a))) + (set-cdr! prev a) + (if (pair? next-a) + (scan-a a next-a (car next-a) b y) + (set-cdr! a b))))))) + + (cond ((not (pair? a)) b) + ((not (pair? b)) a) + + ;; B starts the answer list. + ((< (car b) (car a)) + (let ((next-b (cdr b))) + (if (null? next-b) + (set-cdr! b a) + (scan-b b a (car a) next-b (car next-b)))) + b) + + ;; A starts the answer list. + (else (let ((next-a (cdr a))) + (if (null? next-a) + (set-cdr! a b) + (scan-a a next-a (car next-a) b (car b)))) + a)))) + + +;;; Copyright +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; This code is +;;; Copyright (c) 1998 by Olin Shivers. +;;; The terms are: You may do as you please with this code, as long as +;;; you do not delete this notice or hold me responsible for any outcome +;;; related to its use. +;;; +;;; Blah blah blah. + + +;;; Code tuning & porting +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; This is very portable code. It's R4RS with the following exceptions: +;;; - The R5RS multiple-value VALUES & CALL-WITH-VALUES procedures for +;;; handling multiple-value return. +;;; +;;; This code is *tightly* bummed as far as I can go in portable Scheme. +;;; +;;; - The fixnum arithmetic in LIST-MERGE-SORT! and COUNTED-LIST-MERGE! +;;; that could be safely switched over to unsafe, fixnum-specific ops, +;;; if you're sure that 2*maxlen is a fixnum, where maxlen is the length +;;; of the longest list you could ever have. +;;; +;;; - I typically write my code in a style such that every CAR and CDR +;;; application is protected by an upstream PAIR?. This is the case in this +;;; code, so all the CAR's and CDR's could safely switched over to unsafe +;;; versions. But check over the code before you do it, in case the source +;;; has been altered since I wrote this. + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; +;;; End of code extracted from Olin's lmsort.scm file. +;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; +;;; Start of code extracted from Olin's vmsort.scm file. +;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; The sort package -- stable vector merge & merge sort -*- Scheme -*- +;;; Copyright (c) 1998 by Olin Shivers. +;;; This code is open-source; see the end of the file for porting and +;;; more copyright information. +;;; Olin Shivers 10/98. + +;;; Exports: +;;; (vector-merge < v1 v2 [start1 end1 start2 end2]) -> vector +;;; (vector-merge! < v v1 v2 [start0 start1 end1 start2 end2]) -> unspecific +;;; +;;; (vector-merge-sort < v [start end temp]) -> vector +;;; (vector-merge-sort! < v [start end temp]) -> unspecific + + +;;; Merge +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; (vector-merge < v1 v2 [start1 end1 start2 end2]) -> vector +;;; (vector-merge! < v v1 v2 [start start1 end1 start2 end2]) -> unspecific +;;; +;;; Stable vector merge -- V1's elements come out ahead of equal V2 elements. + +(define (vector-merge < v1 v2 . maybe-starts+ends) + (call-with-values + (lambda () (vectors-start+end-2 v1 v2 maybe-starts+ends)) + (lambda (start1 end1 start2 end2) + (let ((ans (make-vector (+ (- end1 start1) (- end2 start2))))) + (%vector-merge! < ans v1 v2 0 start1 end1 start2 end2) + ans)))) + +(define (vector-merge! < v v1 v2 . maybe-starts+ends) + (call-with-values + (lambda () + (if (pair? maybe-starts+ends) + (values (car maybe-starts+ends) + (cdr maybe-starts+ends)) + (values 0 + '()))) + (lambda (start rest) + (call-with-values + (lambda () (vectors-start+end-2 v1 v2 rest)) + (lambda (start1 end1 start2 end2) + (%vector-merge! < v v1 v2 start start1 end1 start2 end2)))))) + + +;;; This routine is not exported. The code is tightly bummed. +;;; +;;; If these preconditions hold, the routine can be bummed to run with +;;; unsafe vector-indexing and fixnum arithmetic ops: +;;; - V V1 V2 are vectors. +;;; - START START1 END1 START2 END2 are fixnums. +;;; - (<= 0 START END0 (vector-length V), +;;; where end0 = start + (end1 - start1) + (end2 - start2) +;;; - (<= 0 START1 END1 (vector-length V1)) +;;; - (<= 0 START2 END2 (vector-length V2)) +;;; If you put these error checks in the two client procedures above, you can +;;; safely convert this procedure to use unsafe ops -- which is why it isn't +;;; exported. This will provide *huge* speedup. + +(define (%vector-merge! elt< v v1 v2 start start1 end1 start2 end2) + (letrec ((vblit (lambda (fromv j i end) ; Blit FROMV[J,END) to V[I,?]. + (let lp ((j j) (i i)) + (vector-set! v i (vector-ref fromv j)) + (let ((j (+ j 1))) + (if (< j end) (lp j (+ i 1)))))))) + + (cond ((<= end1 start1) (if (< start2 end2) (vblit v2 start2 start end2))) + ((<= end2 start2) (vblit v1 start1 start end1)) + + ;; Invariants: I is next index of V to write; X = V1[J]; Y = V2[K]. + (else (let lp ((i start) + (j start1) (x (vector-ref v1 start1)) + (k start2) (y (vector-ref v2 start2))) + (let ((i1 (+ i 1))) ; "i+1" is a complex number in R4RS! + (if (elt< y x) + (let ((k (+ k 1))) + (vector-set! v i y) + (if (< k end2) + (lp i1 j x k (vector-ref v2 k)) + (vblit v1 j i1 end1))) + (let ((j (+ j 1))) + (vector-set! v i x) + (if (< j end1) + (lp i1 j (vector-ref v1 j) k y) + (vblit v2 k i1 end2)))))))))) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; +;;; End of code extracted from Olin's vmsort.scm file. +;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/srfi/sorting/select.scm b/srfi/sorting/select.scm new file mode 100644 index 00000000..0975082b --- /dev/null +++ b/srfi/sorting/select.scm @@ -0,0 +1,266 @@ +;;; Linear-time (average case) algorithms for: +;;; +;;; Selecting the kth smallest element from an unsorted vector. +;;; Selecting the kth and (k+1)st smallest elements from an unsorted vector. +;;; Selecting the median from an unsorted vector. + +;;; These procedures are part of SRFI 132 but are missing from +;;; its reference implementation as of 10 March 2016. + +;;; SRFI 132 says this procedure runs in O(n) time. +;;; As implemented, however, the worst-case time is O(n^2) because +;;; vector-select is implemented using randomized quickselect. +;;; The average time is O(n), and you'd have to be unlucky +;;; to approach the worst case. + +(define (vector-find-median < v knil . rest) + (let* ((mean (if (null? rest) + (lambda (a b) (/ (+ a b) 2)) + (car rest))) + (n (vector-length v))) + (cond ((zero? n) + knil) + ((odd? n) + (%vector-select < v (quotient n 2) 0 n)) + (else + (call-with-values + (lambda () (%vector-select2 < v (- (quotient n 2) 1) 0 n)) + (lambda (a b) + (mean a b))))))) + +;;; For this procedure, the SRFI 132 specification +;;; demands the vector be sorted (by side effect). + +(define (vector-find-median! < v knil . rest) + (let* ((mean (if (null? rest) + (lambda (a b) (/ (+ a b) 2)) + (car rest))) + (n (vector-length v))) + (vector-sort! < v) + (cond ((zero? n) + knil) + ((odd? n) + (vector-ref v (quotient n 2))) + (else + (mean (vector-ref v (- (quotient n 2) 1)) + (vector-ref v (quotient n 2))))))) + +;;; SRFI 132 says this procedure runs in O(n) time. +;;; As implemented, however, the worst-case time is O(n^2). +;;; The average time is O(n), and you'd have to be unlucky +;;; to approach the worst case. +;;; +;;; After rest argument processing, calls the private version defined below. + +(define (vector-select < v k . rest) + (let* ((start (if (null? rest) + 0 + (car rest))) + (end (if (and (pair? rest) + (pair? (cdr rest))) + (car (cdr rest)) + (vector-length v)))) + (%vector-select < v k start end))) + +;;; The vector-select procedure is needed internally to implement +;;; vector-find-median, but SRFI 132 has been changed (for no good +;;; reason) to export vector-select! instead of vector-select. +;;; Fortunately, vector-select! is not required to have side effects. + +(define vector-select! vector-select) + +;;; This could be made slightly more efficient, but who cares? + +(define (vector-separate! < v k . rest) + (let* ((start (if (null? rest) + 0 + (car rest))) + (end (if (and (pair? rest) + (pair? (cdr rest))) + (car (cdr rest)) + (vector-length v)))) + (if (and (> k 0) + (> end start)) + (let ((pivot (vector-select < v (- k 1) start end))) + (call-with-values + (lambda () (count-smaller < pivot v start end 0 0)) + (lambda (count count2) + (let* ((v2 (make-vector count)) + (v3 (make-vector (- end start count count2)))) + (copy-smaller! < pivot v2 0 v start end) + (copy-bigger! < pivot v3 0 v start end) + (r7rs-vector-copy! v start v2) + (r7rs-vector-fill! v pivot (+ start count) (+ start count count2)) + (r7rs-vector-copy! v (+ start count count2) v3)))))))) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; For small ranges, sorting may be the fastest way to find the kth element. +;;; This threshold is not at all critical, and may not even be worthwhile. + +(define just-sort-it-threshold 50) + +;;; Given +;;; an irreflexive total order vector l))) ; a vector and sorting that. + (vector-heap-sort! < v) + (vector->list v))) + +(define list-sort! list-merge-sort!) + +(define list-stable-sort list-merge-sort) +(define list-stable-sort! list-merge-sort!) + +(define vector-sort vector-quick-sort) +(define vector-sort! vector-quick-sort!) + +(define vector-stable-sort vector-merge-sort) +(define vector-stable-sort! vector-merge-sort!) + diff --git a/srfi/sorting/sortfaster.scm b/srfi/sorting/sortfaster.scm new file mode 100644 index 00000000..49f1a65d --- /dev/null +++ b/srfi/sorting/sortfaster.scm @@ -0,0 +1,49 @@ +;;; SRFI 132 specifies these eight procedures. +;;; +;;; Benchmarking has shown that the (rnrs sorting) procedures +;;; are faster than the sorting procedures defined by SRFI 132's +;;; reference implementation, so the R6RS procedures are used here. +;;; +;;; This file is a plug-and-play alternative to sort.scm in the +;;; same directory. + +(define list-sort r6rs-list-sort) +(define list-sort! r6rs-list-sort) +(define list-stable-sort r6rs-list-sort) +(define list-stable-sort! r6rs-list-sort) + +(define (vector-sort < v . rest) + (cond ((null? rest) + (r6rs-vector-sort < v)) + ((null? (cdr rest)) + (r6rs-vector-sort < (r7rs-vector-copy v (car rest)))) + ((null? (cddr rest)) + (r6rs-vector-sort < (r7rs-vector-copy v (car rest) (cadr rest)))) + (else + (error 'vector-sort + "too many arguments" + (cons < (cons v rest)))))) + +(define vector-stable-sort vector-sort) + +(define (vector-sort! < v . rest) + (cond ((null? rest) + (r6rs-vector-sort! < v)) + ((null? (cdr rest)) + (let* ((start (car rest)) + (v2 (r7rs-vector-copy v start))) + (r6rs-vector-sort! < v2) + (r7rs-vector-copy! v start v2 0))) + ((null? (cddr rest)) + (let* ((start (car rest)) + (end (cadr rest)) + (v2 (r7rs-vector-copy v start end))) + (r6rs-vector-sort! < v2) + (r7rs-vector-copy! v start v2 0))) + (else + (error 'vector-sort! + "too many arguments" + (cons < (cons v rest)))))) + +(define vector-stable-sort! vector-sort!) + diff --git a/srfi/sorting/sorting-test.scm b/srfi/sorting/sorting-test.scm new file mode 100644 index 00000000..5ab62980 --- /dev/null +++ b/srfi/sorting/sorting-test.scm @@ -0,0 +1,83 @@ +;;; Little test harness, 'cause I'm paraoid about tricky code. + +;;; This code is +;;; Copyright (c) 1998 by Olin Shivers. +;;; The terms are: You may do as you please with this code, as long as +;;; you do not delete this notice or hold me responsible for any outcome +;;; related to its use. +;;; +;;; Blah blah blah. Don't you think source files should contain more lines +;;; of code than copyright notice? + +(define-test-suite sort-tests) + +;; Three-way comparison for numbers +(define (my-c x y) + (cond ((= x y) 0) + ((< x y) -1) + (else 1))) + +;;; For testing stable sort -- 3 & -3 compare the same. +(define (my< x y) (< (abs x) (abs y))) + +(define (unstable-sort-test v) ; quick & heap vs simple insert + (let ((v1 (vector-copy v)) + (v2 (vector-copy v)) + (v3 (vector-copy v)) + (v4 (vector-copy v))) + (vector-heap-sort! < v1) + (vector-insert-sort! < v2) + (vector-quick-sort! < v3) + (vector-quick-sort3! my-c v4) + (check-that v2 (is v1)) + (check-that v3 (is v1)) + (check-that v4 (is v1)) + (check-that v1 (is (lambda (v) (vector-sorted? < v)))))) + +(define (stable-sort-test v) ; insert, list & vector merge sorts + (let ((v1 (vector-copy v)) + (v2 (vector-copy v)) + (v3 (list->vector (list-merge-sort! my< (vector->list v)))) + (v4 (list->vector (list-merge-sort my< (vector->list v))))) + (vector-merge-sort! my< v1) + (vector-insert-sort! my< v2) + (check-that v1 (is (lambda (v) (vector-sorted? my< v)))) + (check-that v2 (is v1)) + (check-that v3 (is v1)) + (check-that v4 (is v1)))) + +(define (run-sort-test sort-test count max-size) + (let loop ((i 0)) + (if (< i count) + (begin + (sort-test (random-vector (random-integer max-size))) + (loop (+ 1 i)))))) + +(define-test-case stable-sort sort-tests + (run-sort-test stable-sort-test 10 4096)) + +(define-test-case unstable-sort sort-tests + (run-sort-test unstable-sort-test 10 4096)) + +(define (random-vector size) + (let ((v (make-vector size))) + (fill-vector-randomly! v (* 10 size)) + v)) + +(define (fill-vector-randomly! v range) + (let ((half (quotient range 2))) + (do ((i (- (vector-length v) 1) (- i 1))) + ((< i 0)) + (vector-set! v i (- (random-integer range) half))))) + +(define (vector-portion-copy vec start end) + (let* ((len (vector-length vec)) + (new-len (- end start)) + (new (make-vector new-len))) + (do ((i start (+ i 1)) + (j 0 (+ j 1))) + ((= i end) new) + (vector-set! new j (vector-ref vec i))))) + +(define (vector-copy vec) + (vector-portion-copy vec 0 (vector-length vec))) diff --git a/srfi/sorting/sortp.scm b/srfi/sorting/sortp.scm new file mode 100644 index 00000000..a6e3e9a8 --- /dev/null +++ b/srfi/sorting/sortp.scm @@ -0,0 +1,35 @@ +;;; The sort package -- sorted predicates +;;; Olin Shivers 10/98. +;;; +;;; (list-sorted? < lis) -> boolean +;;; (vector-sorted? < v [start end]) -> boolean + +(define (list-sorted? < list) + (or (not (pair? list)) + (let lp ((prev (car list)) (tail (cdr list))) + (or (not (pair? tail)) + (let ((next (car tail))) + (and (not (< next prev)) + (lp next (cdr tail)))))))) + +(define (vector-sorted? elt< v . maybe-start+end) + (call-with-values + (lambda () (vector-start+end v maybe-start+end)) + (lambda (start end) + (or (>= start end) ; Empty range + (let lp ((i (+ start 1)) (vi-1 (vector-ref v start))) + (or (>= i end) + (let ((vi (vector-ref v i))) + (and (not (elt< vi vi-1)) + (lp (+ i 1) vi))))))))) + +;;; Copyright and porting non-notices +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Give me a break. It's fifteen lines of code. I place this code in the +;;; public domain; help yourself. +;;; +;;; If your Scheme has a faster mechanism for handling optional arguments +;;; (e.g., Chez), you should definitely port over to it. Note that argument +;;; defaulting and error-checking are interleaved -- you don't have to +;;; error-check defaulted START/END args to see if they are fixnums that are +;;; legal vector indices for the corresponding vector, etc. diff --git a/srfi/sorting/srfi-132-test.sps b/srfi/sorting/srfi-132-test.sps new file mode 100644 index 00000000..4938a674 --- /dev/null +++ b/srfi/sorting/srfi-132-test.sps @@ -0,0 +1,1684 @@ +;;; Test program for SRFI 132 (Sort Libraries). + +;;; Copyright © William D Clinger (2016). +;;; +;;; Permission is hereby granted, free of charge, to any person +;;; obtaining a copy of this software and associated documentation +;;; files (the "Software"), to deal in the Software without +;;; restriction, including without limitation the rights to use, +;;; copy, modify, merge, publish, distribute, sublicense, and/or +;;; sell copies of the Software, and to permit persons to whom the +;;; Software is furnished to do so, subject to the following +;;; conditions: +;;; +;;; The above copyright notice and this permission notice shall be +;;; included in all copies or substantial portions of the Software. +;;; +;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +;;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +;;; OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +;;; NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +;;; HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +;;; WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +;;; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +;;; OTHER DEALINGS IN THE SOFTWARE. + +;;; Embeds Olin's test harness. Here is his copyright notice: + +;;; This code is +;;; Copyright (c) 1998 by Olin Shivers. +;;; The terms are: You may do as you please with this code, as long as +;;; you do not delete this notice or hold me responsible for any outcome +;;; related to its use. +;;; +;;; Blah blah blah. Don't you think source files should contain more lines +;;; of code than copyright notice? + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; +;;; To run this program in Larceny, from this directory: +;;; +;;; % mkdir srfi +;;; % cp 132.sld *.scm srfi +;;; % larceny --r7rs --program srfi-132-test.sps --path . +;;; +;;; Other implementations of the R7RS may use other conventions +;;; for naming and locating library files, but the conventions +;;; assumed by this program are the most widely implemented. +;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; Olin's test harness tests some procedures that aren't part of SRFI 132, +;;; so the (local olin) library defined here is just to support Olin's tests. +;;; (Including Olin's code within the test program would create name +;;; conflicts.) + +(define-library (local olin) + + (export list-merge-sort vector-merge-sort ; not part of SRFI 132 + list-merge-sort! vector-merge-sort! ; not part of SRFI 132 + vector-insert-sort vector-insert-sort! ; not part of SRFI 132 + vector-heap-sort vector-heap-sort! ; not part of SRFI 132 + vector-quick-sort vector-quick-sort! ; not part of SRFI 132 +; vector-binary-search vector-binary-search3 ; not part of SRFI 132 + vector-quick-sort3 vector-quick-sort3! ; not part of SRFI 132 + ) + + (import (except (scheme base) vector-copy vector-copy!) + (rename (only (scheme base) vector-copy vector-copy!) + (vector-copy r7rs-vector-copy) + (vector-copy! r7rs-vector-copy!)) + (scheme cxr) + (only (srfi 27) random-integer)) + + (include "delndups.scm") + (include "lmsort.scm") + (include "sortp.scm") + (include "vector-util.scm") + (include "vhsort.scm") + (include "visort.scm") + (include "vmsort.scm") + (include "vqsort2.scm") + (include "vqsort3.scm") + ) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; The test program contains optional benchmarks that can be +;;; enabled by defining display-benchmark-results? as true. + +(define-library (local benchmarking) + (export display-benchmark-results? + r6rs-list-sort + r6rs-vector-sort + r6rs-vector-sort!) + (import (scheme base) + (srfi 132)) + + (cond-expand + ((library (rnrs sorting)) + (import + (rename (rnrs sorting) + (list-sort r6rs-list-sort) + (vector-sort r6rs-vector-sort) + (vector-sort! r6rs-vector-sort!)))) + (else + (begin + (define r6rs-list-sort list-sort) + (define r6rs-vector-sort vector-sort) + (define r6rs-vector-sort! vector-sort!)))) + + (begin + + ;; To display benchmark results, change this to true. + + (define display-benchmark-results? #f) + + )) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(import (except (scheme base) vector-copy) + (rename (scheme base) + (vector-copy r7rs-vector-copy)) + (scheme write) + (scheme process-context) + (scheme time) + (only (srfi 27) random-integer) + (srfi 132) + (local olin) + (local benchmarking)) + +;;; These definitions avoid having to change Olin's code. + +(define-syntax define-test-suite + (syntax-rules () + ((_ name) + (define (name test-name thunk) + (thunk))))) + +(define-syntax define-test-case + (syntax-rules () + ((_ test-name suite-name expr) + (define (test-name) + (suite-name 'test-name (lambda () expr)))))) + +(define (is x) x) + +(define (check-that x y) + (or (if (procedure? y) + (y x) + (equal? x y)) + (fail "some test failed"))) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; +;;; Olin's code. +;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; Little test harness, 'cause I'm paraoid about tricky code. + +;;; This code is +;;; Copyright (c) 1998 by Olin Shivers. +;;; The terms are: You may do as you please with this code, as long as +;;; you do not delete this notice or hold me responsible for any outcome +;;; related to its use. +;;; +;;; Blah blah blah. Don't you think source files should contain more lines +;;; of code than copyright notice? + +(define-test-suite sort-tests) + +;; Three-way comparison for numbers +(define (my-c x y) + (cond ((= x y) 0) + ((< x y) -1) + (else 1))) + +;;; For testing stable sort -- 3 & -3 compare the same. +(define (my< x y) (< (abs x) (abs y))) + +(define (unstable-sort-test v) ; quick & heap vs simple insert + (let ((v1 (vector-copy v)) + (v2 (vector-copy v)) + (v3 (vector-copy v)) + (v4 (vector-copy v))) + (vector-heap-sort! < v1) + (vector-insert-sort! < v2) + (vector-quick-sort! < v3) + (vector-quick-sort3! my-c v4) + (check-that v2 (is v1)) + (check-that v3 (is v1)) + (check-that v4 (is v1)) + (check-that v1 (is (lambda (v) (vector-sorted? < v)))))) + +(define (stable-sort-test v) ; insert, list & vector merge sorts + (let ((v1 (vector-copy v)) + (v2 (vector-copy v)) + (v3 (list->vector (list-merge-sort! my< (vector->list v)))) + (v4 (list->vector (list-merge-sort my< (vector->list v))))) + (vector-merge-sort! my< v1) + (vector-insert-sort! my< v2) + (check-that v1 (is (lambda (v) (vector-sorted? my< v)))) + (check-that v2 (is v1)) + (check-that v3 (is v1)) + (check-that v4 (is v1)))) + +(define (run-sort-test sort-test count max-size) + (let loop ((i 0)) + (if (< i count) + (begin + (sort-test (random-vector (random-integer max-size))) + (loop (+ 1 i)))))) + +(define-test-case stable-sort sort-tests + (run-sort-test stable-sort-test 10 4096)) + +(define-test-case unstable-sort sort-tests + (run-sort-test unstable-sort-test 10 4096)) + +(define (random-vector size) + (let ((v (make-vector size))) + (fill-vector-randomly! v (* 10 size)) + v)) + +(define (fill-vector-randomly! v range) + (let ((half (quotient range 2))) + (do ((i (- (vector-length v) 1) (- i 1))) + ((< i 0)) + (vector-set! v i (- (random-integer range) half))))) + +(define (vector-portion-copy vec start end) + (let* ((len (vector-length vec)) + (new-len (- end start)) + (new (make-vector new-len))) + (do ((i start (+ i 1)) + (j 0 (+ j 1))) + ((= i end) new) + (vector-set! new j (vector-ref vec i))))) + +(define (vector-copy vec) + (vector-portion-copy vec 0 (vector-length vec))) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; +;;; End of Olin's code. +;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define (writeln . xs) + (for-each display xs) + (newline)) + +(define (fail token . more) + (writeln "Error: test failed: " token) + #f) + +(stable-sort) +(unstable-sort) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; +;;; Additional tests written specifically for SRFI 132. +;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(or (list-sorted? > '()) + (fail 'list-sorted?:empty-list)) + +(or (list-sorted? > '(987)) + (fail 'list-sorted?:singleton)) + +(or (list-sorted? > '(9 8 7)) + (fail 'list-sorted?:non-empty-list)) + +(or (vector-sorted? > '#()) + (fail 'vector-sorted?:empty-vector)) + +(or (vector-sorted? > '#(987)) + (fail 'vector-sorted?:singleton)) + +(or (vector-sorted? > '#(9 8 7 6 5)) + (fail 'vector-sorted?:non-empty-vector)) + +(or (vector-sorted? > '#() 0) + (fail 'vector-sorted?:empty-vector:0)) + +(or (vector-sorted? > '#(987) 1) + (fail 'vector-sorted?:singleton:1)) + +(or (vector-sorted? > '#(9 8 7 6 5) 1) + (fail 'vector-sorted?:non-empty-vector:1)) + +(or (vector-sorted? > '#() 0 0) + (fail 'vector-sorted?:empty-vector:0:0)) + +(or (vector-sorted? > '#(987) 1 1) + (fail 'vector-sorted?:singleton:1:1)) + +(or (vector-sorted? > '#(9 8 7 6 5) 1 2) + (fail 'vector-sorted?:non-empty-vector:1:2)) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(or (equal? (list-sort > (list)) + '()) + (fail 'list-sort:empty-list)) + +(or (equal? (list-sort > (list 987)) + '(987)) + (fail 'list-sort:singleton)) + +(or (equal? (list-sort > (list 987 654)) + '(987 654)) + (fail 'list-sort:doubleton)) + +(or (equal? (list-sort > (list 9 8 6 3 0 4 2 5 7 1)) + '(9 8 7 6 5 4 3 2 1 0)) + (fail 'list-sort:iota10)) + +(or (equal? (list-stable-sort > (list)) + '()) + (fail 'list-stable-sort:empty-list)) + +(or (equal? (list-stable-sort > (list 987)) + '(987)) + (fail 'list-stable-sort:singleton)) + +(or (equal? (list-stable-sort > (list 987 654)) + '(987 654)) + (fail 'list-stable-sort:doubleton)) + +(or (equal? (list-stable-sort > (list 9 8 6 3 0 4 2 5 7 1)) + '(9 8 7 6 5 4 3 2 1 0)) + (fail 'list-stable-sort:iota10)) + +(or (equal? (list-stable-sort (lambda (x y) + (> (quotient x 2) + (quotient y 2))) + (list 9 8 6 3 0 4 2 5 7 1)) + '(9 8 6 7 4 5 3 2 0 1)) + (fail 'list-stable-sort:iota10-quotient2)) + +(or (equal? (let ((v (vector))) + (vector-sort > v)) + '#()) + (fail 'vector-sort:empty-vector)) + +(or (equal? (let ((v (vector 987))) + (vector-sort > (vector 987))) + '#(987)) + (fail 'vector-sort:singleton)) + +(or (equal? (let ((v (vector 987 654))) + (vector-sort > v)) + '#(987 654)) + (fail 'vector-sort:doubleton)) + +(or (equal? (let ((v (vector 9 8 6 3 0 4 2 5 7 1))) + (vector-sort > v)) + '#(9 8 7 6 5 4 3 2 1 0)) + (fail 'vector-sort:iota10)) + +(or (equal? (let ((v (vector))) + (vector-stable-sort > v)) + '#()) + (fail 'vector-stable-sort:empty-vector)) + +(or (equal? (let ((v (vector 987))) + (vector-stable-sort > (vector 987))) + '#(987)) + (fail 'vector-stable-sort:singleton)) + +(or (equal? (let ((v (vector 987 654))) + (vector-stable-sort > v)) + '#(987 654)) + (fail 'vector-stable-sort:doubleton)) + +(or (equal? (let ((v (vector 9 8 6 3 0 4 2 5 7 1))) + (vector-stable-sort > v)) + '#(9 8 7 6 5 4 3 2 1 0)) + (fail 'vector-stable-sort:iota10)) + +(or (equal? (let ((v (vector 9 8 6 3 0 4 2 5 7 1))) + (vector-stable-sort (lambda (x y) + (> (quotient x 2) + (quotient y 2))) + v)) + '#(9 8 6 7 4 5 3 2 0 1)) + (fail 'vector-stable-sort:iota10-quotient2)) + +(or (equal? (let ((v (vector))) + (vector-sort > v 0)) + '#()) + (fail 'vector-sort:empty-vector:0)) + +(or (equal? (let ((v (vector 987))) + (vector-sort > (vector 987) 1)) + '#()) + (fail 'vector-sort:singleton:1)) + +(or (equal? (let ((v (vector 987 654))) + (vector-sort > v 1)) + '#(654)) + (fail 'vector-sort:doubleton:1)) + +(or (equal? (let ((v (vector 9 8 6 3 0 4 2 5 7 1))) + (vector-sort > v 3)) + '#(7 5 4 3 2 1 0)) + (fail 'vector-sort:iota10:3)) + +(or (equal? (let ((v (vector))) + (vector-stable-sort > v 0)) + '#()) + (fail 'vector-stable-sort:empty-vector:0)) + +(or (equal? (let ((v (vector 987))) + (vector-stable-sort > (vector 987) 1)) + '#()) + (fail 'vector-stable-sort:singleton:1)) + +(or (equal? (let ((v (vector 987 654))) + (vector-stable-sort < v 0 2)) + '#(654 987)) + (fail 'vector-stable-sort:doubleton:0:2)) + +(or (equal? (let ((v (vector 9 8 6 3 0 4 2 5 7 1))) + (vector-stable-sort > v 3)) + '#(7 5 4 3 2 1 0)) + (fail 'vector-stable-sort:iota10:3)) + +(or (equal? (let ((v (vector 9 8 6 3 0 4 2 5 7 1))) + (vector-stable-sort (lambda (x y) + (> (quotient x 2) + (quotient y 2))) + v + 3)) + '#(7 4 5 3 2 0 1)) + (fail 'vector-stable-sort:iota10-quotient2:3)) + +(or (equal? (let ((v (vector))) + (vector-sort > v 0 0)) + '#()) + (fail 'vector-sort:empty-vector:0:0)) + +(or (equal? (let ((v (vector 987))) + (vector-sort > (vector 987) 1 1)) + '#()) + (fail 'vector-sort:singleton:1:1)) + +(or (equal? (let ((v (vector 987 654))) + (vector-sort > v 1 2)) + '#(654)) + (fail 'vector-sort:doubleton:1:2)) + +(or (equal? (let ((v (vector 9 8 6 3 0 4 2 5 7 1))) + (vector-sort > v 4 8)) + '#(5 4 2 0)) + (fail 'vector-sort:iota10:4:8)) + +(or (equal? (let ((v (vector))) + (vector-stable-sort > v 0 0)) + '#()) + (fail 'vector-stable-sort:empty-vector:0:0)) + +(or (equal? (let ((v (vector 987))) + (vector-stable-sort > (vector 987) 1 1)) + '#()) + (fail 'vector-stable-sort:singleton:1:1)) + +(or (equal? (let ((v (vector 987 654))) + (vector-stable-sort > v 1 2)) + '#(654)) + (fail 'vector-stable-sort:doubleton:1:2)) + +(or (equal? (let ((v (vector 9 8 6 3 0 4 2 5 7 1))) + (vector-stable-sort > v 2 6)) + '#(6 4 3 0)) + (fail 'vector-stable-sort:iota10:2:6)) + +(or (equal? (let ((v (vector 9 8 6 3 0 4 2 5 7 1))) + (vector-stable-sort (lambda (x y) + (> (quotient x 2) + (quotient y 2))) + v + 1 + 8)) + '#(8 6 4 5 3 2 0)) + (fail 'vector-stable-sort:iota10-quotient2:1:8)) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(or (equal? (list-sort! > (list)) + '()) + (fail 'list-sort!:empty-list)) + +(or (equal? (list-sort! > (list 987)) + '(987)) + (fail 'list-sort!:singleton)) + +(or (equal? (list-sort! > (list 987 654)) + '(987 654)) + (fail 'list-sort!:doubleton)) + +(or (equal? (list-sort! > (list 9 8 6 3 0 4 2 5 7 1)) + '(9 8 7 6 5 4 3 2 1 0)) + (fail 'list-sort!:iota10)) + +(or (equal? (list-stable-sort! > (list)) + '()) + (fail 'list-stable-sort!:empty-list)) + +(or (equal? (list-stable-sort! > (list 987)) + '(987)) + (fail 'list-stable-sort!:singleton)) + +(or (equal? (list-stable-sort! > (list 987 654)) + '(987 654)) + (fail 'list-stable-sort!:doubleton)) + +(or (equal? (list-stable-sort! > (list 9 8 6 3 0 4 2 5 7 1)) + '(9 8 7 6 5 4 3 2 1 0)) + (fail 'list-stable-sort!:iota10)) + +(or (equal? (list-stable-sort! (lambda (x y) + (> (quotient x 2) + (quotient y 2))) + (list 9 8 6 3 0 4 2 5 7 1)) + '(9 8 6 7 4 5 3 2 0 1)) + (fail 'list-stable-sort!:iota10-quotient2)) + +(or (equal? (let ((v (vector))) + (vector-sort! > v) + v) + '#()) + (fail 'vector-sort!:empty-vector)) + +(or (equal? (let ((v (vector 987))) + (vector-sort! > (vector 987)) + v) + '#(987)) + (fail 'vector-sort!:singleton)) + +(or (equal? (let ((v (vector 987 654))) + (vector-sort! > v) + v) + '#(987 654)) + (fail 'vector-sort!:doubleton)) + +(or (equal? (let ((v (vector 9 8 6 3 0 4 2 5 7 1))) + (vector-sort! > v) + v) + '#(9 8 7 6 5 4 3 2 1 0)) + (fail 'vector-sort!:iota10)) + +(or (equal? (let ((v (vector))) + (vector-stable-sort! > v) + v) + '#()) + (fail 'vector-stable-sort!:empty-vector)) + +(or (equal? (let ((v (vector 987))) + (vector-stable-sort! > (vector 987)) + v) + '#(987)) + (fail 'vector-stable-sort!:singleton)) + +(or (equal? (let ((v (vector 987 654))) + (vector-stable-sort! > v) + v) + '#(987 654)) + (fail 'vector-stable-sort!:doubleton)) + +(or (equal? (let ((v (vector 9 8 6 3 0 4 2 5 7 1))) + (vector-stable-sort! > v) + v) + '#(9 8 7 6 5 4 3 2 1 0)) + (fail 'vector-stable-sort!:iota10)) + +(or (equal? (let ((v (vector 9 8 6 3 0 4 2 5 7 1))) + (vector-stable-sort! (lambda (x y) + (> (quotient x 2) + (quotient y 2))) + v) + v) + '#(9 8 6 7 4 5 3 2 0 1)) + (fail 'vector-stable-sort!:iota10-quotient2)) + +(or (equal? (let ((v (vector))) + (vector-sort! > v 0) + v) + '#()) + (fail 'vector-sort!:empty-vector:0)) + +(or (equal? (let ((v (vector 987))) + (vector-sort! > (vector 987) 1) + v) + '#(987)) + (fail 'vector-sort!:singleton:1)) + +(or (equal? (let ((v (vector 987 654))) + (vector-sort! > v 1) + v) + '#(987 654)) + (fail 'vector-sort!:doubleton:1)) + +(or (equal? (let ((v (vector 9 8 6 3 0 4 2 5 7 1))) + (vector-sort! > v 3) + v) + '#(9 8 6 7 5 4 3 2 1 0)) + (fail 'vector-sort!:iota10:3)) + +(or (equal? (let ((v (vector))) + (vector-stable-sort! > v 0) + v) + '#()) + (fail 'vector-stable-sort!:empty-vector:0)) + +(or (equal? (let ((v (vector 987))) + (vector-stable-sort! > (vector 987) 1) + v) + '#(987)) + (fail 'vector-stable-sort!:singleton:1)) + +(or (equal? (let ((v (vector 987 654))) + (vector-stable-sort! < v 0 2) + v) + '#(654 987)) + (fail 'vector-stable-sort!:doubleton:0:2)) + +(or (equal? (let ((v (vector 9 8 6 3 0 4 2 5 7 1))) + (vector-stable-sort! > v 3) + v) + '#(9 8 6 7 5 4 3 2 1 0)) + (fail 'vector-stable-sort!:iota10:3)) + +(or (equal? (let ((v (vector 9 8 6 3 0 4 2 5 7 1))) + (vector-stable-sort! (lambda (x y) + (> (quotient x 2) + (quotient y 2))) + v + 3) + v) + '#(9 8 6 7 4 5 3 2 0 1)) + (fail 'vector-stable-sort!:iota10-quotient2:3)) + +(or (equal? (let ((v (vector))) + (vector-sort! > v 0 0) + v) + '#()) + (fail 'vector-sort!:empty-vector:0:0)) + +(or (equal? (let ((v (vector 987))) + (vector-sort! > (vector 987) 1 1) + v) + '#(987)) + (fail 'vector-sort!:singleton:1:1)) + +(or (equal? (let ((v (vector 987 654))) + (vector-sort! > v 1 2) + v) + '#(987 654)) + (fail 'vector-sort!:doubleton:1:2)) + +(or (equal? (let ((v (vector 9 8 6 3 0 4 2 5 7 1))) + (vector-sort! > v 4 8) + v) + '#(9 8 6 3 5 4 2 0 7 1)) + (fail 'vector-sort!:iota10:4:8)) + +(or (equal? (let ((v (vector))) + (vector-stable-sort! > v 0 0) + v) + '#()) + (fail 'vector-stable-sort!:empty-vector:0:0)) + +(or (equal? (let ((v (vector 987))) + (vector-stable-sort! > (vector 987) 1 1) + v) + '#(987)) + (fail 'vector-stable-sort!:singleton:1:1)) + +(or (equal? (let ((v (vector 987 654))) + (vector-stable-sort! > v 1 2) + v) + '#(987 654)) + (fail 'vector-stable-sort!:doubleton:1:2)) + +(or (equal? (let ((v (vector 9 8 6 3 0 4 2 5 7 1))) + (vector-stable-sort! > v 2 6) + v) + '#(9 8 6 4 3 0 2 5 7 1)) + (fail 'vector-stable-sort!:iota10:2:6)) + +(or (equal? (let ((v (vector 9 8 6 3 0 4 2 5 7 1))) + (vector-stable-sort! (lambda (x y) + (> (quotient x 2) + (quotient y 2))) + v + 1 + 8) + v) + '#(9 8 6 4 5 3 2 0 7 1)) + (fail 'vector-stable-sort!:iota10-quotient2:1:8)) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(or (equal? (list-merge > (list) (list)) + '()) + (fail 'list-merge:empty:empty)) + +(or (equal? (list-merge > (list) (list 9 6 3 0)) + '(9 6 3 0)) + (fail 'list-merge:empty:nonempty)) + +(or (equal? (list-merge > (list 9 7 5 3 1) (list)) + '(9 7 5 3 1)) + (fail 'list-merge:nonempty:empty)) + +(or (equal? (list-merge > (list 9 7 5 3 1) (list 9 6 3 0)) + '(9 9 7 6 5 3 3 1 0)) + (fail 'list-merge:nonempty:nonempty)) + +(or (equal? (list-merge! > (list) (list)) + '()) + (fail 'list-merge!:empty:empty)) + +(or (equal? (list-merge! > (list) (list 9 6 3 0)) + '(9 6 3 0)) + (fail 'list-merge!:empty:nonempty)) + +(or (equal? (list-merge! > (list 9 7 5 3 1) (list)) + '(9 7 5 3 1)) + (fail 'list-merge!:nonempty:empty)) + +(or (equal? (list-merge! > (list 9 7 5 3 1) (list 9 6 3 0)) + '(9 9 7 6 5 3 3 1 0)) + (fail 'list-merge!:nonempty:nonempty)) + +(or (equal? (vector-merge > (vector) (vector)) + '#()) + (fail 'vector-merge:empty:empty)) + +(or (equal? (vector-merge > (vector) (vector 9 6 3 0)) + '#(9 6 3 0)) + (fail 'vector-merge:empty:nonempty)) + +(or (equal? (vector-merge > (vector 9 7 5 3 1) (vector)) + '#(9 7 5 3 1)) + (fail 'vector-merge:nonempty:empty)) + +(or (equal? (vector-merge > (vector 9 7 5 3 1) (vector 9 6 3 0)) + '#(9 9 7 6 5 3 3 1 0)) + (fail 'vector-merge:nonempty:nonempty)) + +(or (equal? (let ((v (make-vector 12 #f))) + (vector-merge! > v (vector) (vector)) + v) + '#(#f #f #f #f #f #f #f #f #f #f #f #f)) + (fail 'vector-merge!:empty:empty)) + +(or (equal? (let ((v (make-vector 12 #f))) + (vector-merge! > v (vector) (vector 9 6 3 0)) + v) + '#( 9 6 3 0 #f #f #f #f #f #f #f #f)) + (fail 'vector-merge!:empty:nonempty)) + +(or (equal? (let ((v (make-vector 12 #f))) + (vector-merge! > v (vector 9 7 5 3 1) (vector)) + v) + '#( 9 7 5 3 1 #f #f #f #f #f #f #f)) + (fail 'vector-merge!:nonempty:empty)) + +(or (equal? (let ((v (make-vector 12 #f))) + (vector-merge! > v (vector 9 7 5 3 1) (vector 9 6 3 0)) + v) + '#( 9 9 7 6 5 3 3 1 0 #f #f #f)) + (fail 'vector-merge!:nonempty:nonempty)) + +(or (equal? (let ((v (make-vector 12 #f))) + (vector-merge! > v (vector) (vector) 0) + v) + '#(#f #f #f #f #f #f #f #f #f #f #f #f)) + (fail 'vector-merge!:empty:empty:0)) + +(or (equal? (let ((v (make-vector 12 #f))) + (vector-merge! > v (vector) (vector 9 6 3 0) 0) + v) + '#( 9 6 3 0 #f #f #f #f #f #f #f #f)) + (fail 'vector-merge!:empty:nonempty:0)) + +(or (equal? (let ((v (make-vector 12 #f))) + (vector-merge! > v (vector 9 7 5 3 1) (vector) 0) + v) + '#( 9 7 5 3 1 #f #f #f #f #f #f #f)) + (fail 'vector-merge!:nonempty:empty:0)) + +(or (equal? (let ((v (make-vector 12 #f))) + (vector-merge! > v (vector 9 7 5 3 1) (vector 9 6 3 0) 0) + v) + '#( 9 9 7 6 5 3 3 1 0 #f #f #f)) + (fail 'vector-merge!:nonempty:nonempty:0)) + +(or (equal? (let ((v (make-vector 12 #f))) + (vector-merge! > v (vector) (vector) 2) + v) + '#(#f #f #f #f #f #f #f #f #f #f #f #f)) + (fail 'vector-merge!:empty:empty:2)) + +(or (equal? (let ((v (make-vector 12 #f))) + (vector-merge! > v (vector) (vector 9 6 3 0) 2) + v) + '#(#f #f 9 6 3 0 #f #f #f #f #f #f)) + (fail 'vector-merge!:empty:nonempty:2)) + +(or (equal? (let ((v (make-vector 12 #f))) + (vector-merge! > v (vector 9 7 5 3 1) (vector) 2) + v) + '#(#f #f 9 7 5 3 1 #f #f #f #f #f)) + (fail 'vector-merge!:nonempty:empty:2)) + +(or (equal? (let ((v (make-vector 12 #f))) + (vector-merge! > v (vector 9 7 5 3 1) (vector 9 6 3 0) 2) + v) + '#(#f #f 9 9 7 6 5 3 3 1 0 #f)) + (fail 'vector-merge!:nonempty:nonempty:2)) + +(or (equal? (vector-merge > (vector) (vector) 0) + '#()) + (fail 'vector-merge:empty:empty)) + +(or (equal? (vector-merge > (vector) (vector 9 6 3 0) 0) + '#(9 6 3 0)) + (fail 'vector-merge:empty:nonempty)) + +(or (equal? (vector-merge > (vector 9 7 5 3 1) (vector) 2) + '#(5 3 1)) + (fail 'vector-merge:nonempty:empty)) + +(or (equal? (vector-merge > (vector 9 7 5 3 1) (vector 9 6 3 0) 2) + '#(9 6 5 3 3 1 0)) + (fail 'vector-merge:nonempty:nonempty)) + +(or (equal? (let ((v (make-vector 12 #f))) + (vector-merge! > v (vector) (vector) 2 0) + v) + '#(#f #f #f #f #f #f #f #f #f #f #f #f)) + (fail 'vector-merge!:empty:empty:2)) + +(or (equal? (let ((v (make-vector 12 #f))) + (vector-merge! > v (vector) (vector 9 6 3 0) 2 0) + v) + '#(#f #f 9 6 3 0 #f #f #f #f #f #f)) + (fail 'vector-merge!:empty:nonempty:2)) + +(or (equal? (let ((v (make-vector 12 #f))) + (vector-merge! > v (vector 9 7 5 3 1) (vector) 2 2) + v) + '#(#f #f 5 3 1 #f #f #f #f #f #f #f)) + (fail 'vector-merge!:nonempty:empty:2)) + +(or (equal? (let ((v (make-vector 12 #f))) + (vector-merge! > v (vector 9 7 5 3 1) (vector 9 6 3 0) 2 2) + v) + '#(#f #f 9 6 5 3 3 1 0 #f #f #f)) + (fail 'vector-merge!:nonempty:nonempty:2)) + +(or (equal? (vector-merge > (vector) (vector) 0 0) + '#()) + (fail 'vector-merge:empty:empty)) + +(or (equal? (vector-merge > (vector) (vector 9 6 3 0) 0 0) + '#(9 6 3 0)) + (fail 'vector-merge:empty:nonempty)) + +(or (equal? (vector-merge > (vector 9 7 5 3 1) (vector) 2 5) + '#(5 3 1)) + (fail 'vector-merge:nonempty:empty)) + +(or (equal? (vector-merge > (vector 9 7 5 3 1) (vector 9 6 3 0) 2 5) + '#(9 6 5 3 3 1 0)) + (fail 'vector-merge:nonempty:nonempty)) + +(or (equal? (let ((v (make-vector 12 #f))) + (vector-merge! > v (vector) (vector) 2 0 0) + v) + '#(#f #f #f #f #f #f #f #f #f #f #f #f)) + (fail 'vector-merge!:empty:empty:2)) + +(or (equal? (let ((v (make-vector 12 #f))) + (vector-merge! > v (vector) (vector 9 6 3 0) 2 0 0) + v) + '#(#f #f 9 6 3 0 #f #f #f #f #f #f)) + (fail 'vector-merge!:empty:nonempty:2)) + +(or (equal? (let ((v (make-vector 12 #f))) + (vector-merge! > v (vector 9 7 5 3 1) (vector) 2 2 5) + v) + '#(#f #f 5 3 1 #f #f #f #f #f #f #f)) + (fail 'vector-merge!:nonempty:empty:2)) + +(or (equal? (let ((v (make-vector 12 #f))) + (vector-merge! > v (vector 9 7 5 3 1) (vector 9 6 3 0) 2 2 5) + v) + '#(#f #f 9 6 5 3 3 1 0 #f #f #f)) + (fail 'vector-merge!:nonempty:nonempty:2)) + +;;; Some tests are duplicated to make the pattern easier to discern. + +(or (equal? (vector-merge > (vector) (vector) 0 0) + '#()) + (fail 'vector-merge:empty:empty)) + +(or (equal? (vector-merge > (vector) (vector 9 6 3 0) 0 0) + '#(9 6 3 0)) + (fail 'vector-merge:empty:nonempty)) + +(or (equal? (vector-merge > (vector 9 7 5 3 1) (vector) 2 4) + '#(5 3)) + (fail 'vector-merge:nonempty:empty)) + +(or (equal? (vector-merge > (vector 9 7 5 3 1) (vector 9 6 3 0) 2 4) + '#(9 6 5 3 3 0)) + (fail 'vector-merge:nonempty:nonempty)) + +(or (equal? (let ((v (make-vector 12 #f))) + (vector-merge! > v (vector) (vector) 2 0 0) + v) + '#(#f #f #f #f #f #f #f #f #f #f #f #f)) + (fail 'vector-merge!:empty:empty:2)) + +(or (equal? (let ((v (make-vector 12 #f))) + (vector-merge! > v (vector) (vector 9 6 3 0) 2 0 0) + v) + '#(#f #f 9 6 3 0 #f #f #f #f #f #f)) + (fail 'vector-merge!:empty:nonempty:2)) + +(or (equal? (let ((v (make-vector 12 #f))) + (vector-merge! > v (vector 9 7 5 3 1) (vector) 2 2 4) + v) + '#(#f #f 5 3 #f #f #f #f #f #f #f #f)) + (fail 'vector-merge!:nonempty:empty:2)) + +(or (equal? (let ((v (make-vector 12 #f))) + (vector-merge! > v (vector 9 7 5 3 1) (vector 9 6 3 0) 2 2 4) + v) + '#(#f #f 9 6 5 3 3 0 #f #f #f #f)) + (fail 'vector-merge!:nonempty:nonempty:2)) + +(or (equal? (vector-merge > (vector) (vector) 0 0 0) + '#()) + (fail 'vector-merge:empty:empty)) + +(or (equal? (vector-merge > (vector) (vector 9 6 3 0) 0 0 0) + '#(9 6 3 0)) + (fail 'vector-merge:empty:nonempty)) + +(or (equal? (vector-merge > (vector 9 7 5 3 1) (vector) 2 4 0) + '#(5 3)) + (fail 'vector-merge:nonempty:empty)) + +(or (equal? (vector-merge > (vector 9 7 5 3 1) (vector 9 6 3 0) 2 4 0) + '#(9 6 5 3 3 0)) + (fail 'vector-merge:nonempty:nonempty)) + +(or (equal? (let ((v (make-vector 12 #f))) + (vector-merge! > v (vector) (vector) 2 0 0 0) + v) + '#(#f #f #f #f #f #f #f #f #f #f #f #f)) + (fail 'vector-merge!:empty:empty:2)) + +(or (equal? (let ((v (make-vector 12 #f))) + (vector-merge! > v (vector) (vector 9 6 3 0) 2 0 0 0) + v) + '#(#f #f 9 6 3 0 #f #f #f #f #f #f)) + (fail 'vector-merge!:empty:nonempty:2)) + +(or (equal? (let ((v (make-vector 12 #f))) + (vector-merge! > v (vector 9 7 5 3 1) (vector) 2 2 4 0) + v) + '#(#f #f 5 3 #f #f #f #f #f #f #f #f)) + (fail 'vector-merge!:nonempty:empty:2)) + +(or (equal? (let ((v (make-vector 12 #f))) + (vector-merge! > v (vector 9 7 5 3 1) (vector 9 6 3 0) 2 2 4 0) + v) + '#(#f #f 9 6 5 3 3 0 #f #f #f #f)) + (fail 'vector-merge!:nonempty:nonempty:2)) + +(or (equal? (vector-merge > (vector) (vector) 0 0 0) + '#()) + (fail 'vector-merge:empty:empty)) + +(or (equal? (vector-merge > (vector) (vector 9 6 3 0) 0 0 1) + '#(6 3 0)) + (fail 'vector-merge:empty:nonempty)) + +(or (equal? (vector-merge > (vector 9 7 5 3 1) (vector) 2 4 0) + '#(5 3)) + (fail 'vector-merge:nonempty:empty)) + +(or (equal? (vector-merge > (vector 9 7 5 3 1) (vector 9 6 3 0) 2 4 1) + '#(6 5 3 3 0)) + (fail 'vector-merge:nonempty:nonempty)) + +(or (equal? (let ((v (make-vector 12 #f))) + (vector-merge! > v (vector) (vector) 2 0 0 0) + v) + '#(#f #f #f #f #f #f #f #f #f #f #f #f)) + (fail 'vector-merge!:empty:empty:2)) + +(or (equal? (let ((v (make-vector 12 #f))) + (vector-merge! > v (vector) (vector 9 6 3 0) 2 0 0 1) + v) + '#(#f #f 6 3 0 #f #f #f #f #f #f #f)) + (fail 'vector-merge!:empty:nonempty:2)) + +(or (equal? (let ((v (make-vector 12 #f))) + (vector-merge! > v (vector 9 7 5 3 1) (vector) 2 2 4 0) + v) + '#(#f #f 5 3 #f #f #f #f #f #f #f #f)) + (fail 'vector-merge!:nonempty:empty:2)) + +(or (equal? (let ((v (make-vector 12 #f))) + (vector-merge! > v (vector 9 7 5 3 1) (vector 9 6 3 0) 2 2 4 1) + v) + '#(#f #f 6 5 3 3 0 #f #f #f #f #f)) + (fail 'vector-merge!:nonempty:nonempty:2)) + +(or (equal? (vector-merge > (vector) (vector) 0 0 0 0) + '#()) + (fail 'vector-merge:empty:empty)) + +(or (equal? (vector-merge > (vector) (vector 9 6 3 0) 0 0 1 4) + '#(6 3 0)) + (fail 'vector-merge:empty:nonempty)) + +(or (equal? (vector-merge > (vector 9 7 5 3 1) (vector) 2 4 0 0) + '#(5 3)) + (fail 'vector-merge:nonempty:empty)) + +(or (equal? (vector-merge > (vector 9 7 5 3 1) (vector 9 6 3 0) 2 4 1 4) + '#(6 5 3 3 0)) + (fail 'vector-merge:nonempty:nonempty)) + +(or (equal? (let ((v (make-vector 12 #f))) + (vector-merge! > v (vector) (vector) 2 0 0 0 0) + v) + '#(#f #f #f #f #f #f #f #f #f #f #f #f)) + (fail 'vector-merge!:empty:empty:2)) + +(or (equal? (let ((v (make-vector 12 #f))) + (vector-merge! > v (vector) (vector 9 6 3 0) 2 0 0 1 4) + v) + '#(#f #f 6 3 0 #f #f #f #f #f #f #f)) + (fail 'vector-merge!:empty:nonempty:2)) + +(or (equal? (let ((v (make-vector 12 #f))) + (vector-merge! > v (vector 9 7 5 3 1) (vector) 2 2 4 0 0) + v) + '#(#f #f 5 3 #f #f #f #f #f #f #f #f)) + (fail 'vector-merge!:nonempty:empty:2)) + +(or (equal? (let ((v (make-vector 12 #f))) + (vector-merge! > v (vector 9 7 5 3 1) (vector 9 6 3 0) 2 2 4 1 4) + v) + '#(#f #f 6 5 3 3 0 #f #f #f #f #f)) + (fail 'vector-merge!:nonempty:nonempty:2)) + +(or (equal? (vector-merge > (vector) (vector) 0 0 0 0) + '#()) + (fail 'vector-merge:empty:empty)) + +(or (equal? (vector-merge > (vector) (vector 9 6 3 0) 0 0 1 2) + '#(6)) + (fail 'vector-merge:empty:nonempty)) + +(or (equal? (vector-merge > (vector 9 7 5 3 1) (vector) 2 4 0 0) + '#(5 3)) + (fail 'vector-merge:nonempty:empty)) + +(or (equal? (vector-merge > (vector 9 7 5 3 1) (vector 9 6 3 0) 2 4 1 2) + '#(6 5 3)) + (fail 'vector-merge:nonempty:nonempty)) + +(or (equal? (let ((v (make-vector 12 #f))) + (vector-merge! > v (vector) (vector) 2 0 0 0 0) + v) + '#(#f #f #f #f #f #f #f #f #f #f #f #f)) + (fail 'vector-merge!:empty:empty:2)) + +(or (equal? (let ((v (make-vector 12 #f))) + (vector-merge! > v (vector) (vector 9 6 3 0) 2 0 0 1 2) + v) + '#(#f #f 6 #f #f #f #f #f #f #f #f #f)) + (fail 'vector-merge!:empty:nonempty:2)) + +(or (equal? (let ((v (make-vector 12 #f))) + (vector-merge! > v (vector 9 7 5 3 1) (vector) 2 2 4 0 0) + v) + '#(#f #f 5 3 #f #f #f #f #f #f #f #f)) + (fail 'vector-merge!:nonempty:empty:2)) + +(or (equal? (let ((v (make-vector 12 #f))) + (vector-merge! > v (vector 9 7 5 3 1) (vector 9 6 3 0) 2 2 4 1 2) + v) + '#(#f #f 6 5 3 #f #f #f #f #f #f #f)) + (fail 'vector-merge!:nonempty:nonempty:2)) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(or (equal? (list-delete-neighbor-dups char=? (list)) + '()) + (fail 'list-delete-neighbor-dups:empty)) + +(or (equal? (list-delete-neighbor-dups char=? (list #\a)) + '(#\a)) + (fail 'list-delete-neighbor-dups:singleton)) + +(or (equal? (list-delete-neighbor-dups char=? (list #\a #\a #\a #\b #\b #\a)) + '(#\a #\b #\a)) + (fail 'list-delete-neighbor-dups:nonempty)) + +(or (equal? (list-delete-neighbor-dups! char=? (list)) + '()) + (fail 'list-delete-neighbor-dups!:empty)) + +(or (equal? (list-delete-neighbor-dups! char=? (list #\a)) + '(#\a)) + (fail 'list-delete-neighbor-dups!:singleton)) + +(or (equal? (list-delete-neighbor-dups! char=? (list #\a #\a #\a #\b #\b #\a)) + '(#\a #\b #\a)) + (fail 'list-delete-neighbor-dups!:nonempty)) + +(or (equal? (let ((v (vector))) + (vector-delete-neighbor-dups char=? v)) + '#()) + (fail 'vector-delete-neighbor-dups:empty)) + +(or (equal? (let ((v (vector #\a))) + (vector-delete-neighbor-dups char=? v)) + '#(#\a)) + (fail 'vector-delete-neighbor-dups:singleton)) + +(or (equal? (let ((v (vector #\a #\a #\a #\b #\b #\a))) + (vector-delete-neighbor-dups char=? v)) + '#(#\a #\b #\a)) + (fail 'vector-delete-neighbor-dups:nonempty)) + +(or (equal? (let ((v (vector))) + (list (vector-delete-neighbor-dups! char=? v) v)) + '(0 #())) + (fail 'vector-delete-neighbor-dups!:empty)) + +(or (equal? (let ((v (vector #\a))) + (list (vector-delete-neighbor-dups! char=? v) v)) + '(1 #(#\a))) + (fail 'vector-delete-neighbor-dups!:singleton)) + +(or (equal? (let ((v (vector #\a #\a #\a #\b #\b #\a))) + (list (vector-delete-neighbor-dups! char=? v) v)) + '(3 #(#\a #\b #\a #\b #\b #\a))) + (fail 'vector-delete-neighbor-dups!:nonempty)) + +(or (equal? (let ((v (vector))) + (vector-delete-neighbor-dups char=? v 0)) + '#()) + (fail 'vector-delete-neighbor-dups:empty:0)) + +(or (equal? (let ((v (vector #\a))) + (vector-delete-neighbor-dups char=? v 0)) + '#(#\a)) + (fail 'vector-delete-neighbor-dups:singleton:0)) + +(or (equal? (let ((v (vector #\a #\a #\a #\b #\b #\a))) + (vector-delete-neighbor-dups char=? v 0)) + '#(#\a #\b #\a)) + (fail 'vector-delete-neighbor-dups:nonempty:0)) + +(or (equal? (let ((v (vector))) + (list (vector-delete-neighbor-dups! char=? v 0) v)) + '(0 #())) + (fail 'vector-delete-neighbor-dups!:empty:0)) + +(or (equal? (let ((v (vector #\a))) + (list (vector-delete-neighbor-dups! char=? v 0) v)) + '(1 #(#\a))) + (fail 'vector-delete-neighbor-dups!:singleton:0)) + +(or (equal? (let ((v (vector #\a #\a #\a #\b #\b #\a))) + (list (vector-delete-neighbor-dups! char=? v 0) v)) + '(3 #(#\a #\b #\a #\b #\b #\a))) + (fail 'vector-delete-neighbor-dups!:nonempty:0)) + +(or (equal? (let ((v (vector))) + (vector-delete-neighbor-dups char=? v 0)) + '#()) + (fail 'vector-delete-neighbor-dups:empty:0)) + +(or (equal? (let ((v (vector #\a))) + (vector-delete-neighbor-dups char=? v 1)) + '#()) + (fail 'vector-delete-neighbor-dups:singleton:1)) + +(or (equal? (let ((v (vector #\a #\a #\a #\b #\b #\a))) + (vector-delete-neighbor-dups char=? v 3)) + '#(#\b #\a)) + (fail 'vector-delete-neighbor-dups:nonempty:3)) + +(or (equal? (let ((v (vector))) + (list (vector-delete-neighbor-dups! char=? v 0) v)) + '(0 #())) + (fail 'vector-delete-neighbor-dups!:empty:0)) + +(or (equal? (let ((v (vector #\a))) + (list (vector-delete-neighbor-dups! char=? v 1) v)) + '(1 #(#\a))) + (fail 'vector-delete-neighbor-dups!:singleton:1)) + +(or (equal? (let ((v (vector #\a #\a #\a #\b #\b #\a))) + (list (vector-delete-neighbor-dups! char=? v 3) v)) + '(5 #(#\a #\a #\a #\b #\a #\a))) + (fail 'vector-delete-neighbor-dups!:nonempty:3)) + +(or (equal? (let ((v (vector))) + (vector-delete-neighbor-dups char=? v 0 0)) + '#()) + (fail 'vector-delete-neighbor-dups:empty:0:0)) + +(or (equal? (let ((v (vector #\a))) + (vector-delete-neighbor-dups char=? v 1 1)) + '#()) + (fail 'vector-delete-neighbor-dups:singleton:1:1)) + +(or (equal? (let ((v (vector #\a #\a #\a #\b #\b #\a))) + (vector-delete-neighbor-dups char=? v 3 5)) + '#(#\b)) + (fail 'vector-delete-neighbor-dups:nonempty:3:5)) + +(or (equal? (let ((v (vector))) + (list (vector-delete-neighbor-dups! char=? v 0 0) v)) + '(0 #())) + (fail 'vector-delete-neighbor-dups!:empty:0:0)) + +(or (equal? (let ((v (vector #\a))) + (list (vector-delete-neighbor-dups! char=? v 0 1) v)) + '(1 #(#\a))) + (fail 'vector-delete-neighbor-dups!:singleton:0:1)) + +(or (equal? (let ((v (vector #\a))) + (list (vector-delete-neighbor-dups! char=? v 1 1) v)) + '(1 #(#\a))) + (fail 'vector-delete-neighbor-dups!:singleton:1:1)) + +(or (equal? (let ((v (vector #\a #\a #\a #\b #\b #\a))) + (list (vector-delete-neighbor-dups! char=? v 3 5) v)) + '(4 #(#\a #\a #\a #\b #\b #\a))) + (fail 'vector-delete-neighbor-dups!:nonempty:3:5)) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(or (equal? (vector-find-median < (vector) "knil") + "knil") + (fail 'vector-find-median:empty)) + +(or (equal? (vector-find-median < (vector 17) "knil") + 17) + (fail 'vector-find-median:singleton)) + +(or (equal? (vector-find-median < (vector 18 1 12 14 12 5 18 2) "knil") + 12) + (fail 'vector-find-median:8same)) + +(or (equal? (vector-find-median < (vector 18 1 11 14 12 5 18 2) "knil") + 23/2) + (fail 'vector-find-median:8diff)) + +(or (equal? (vector-find-median < (vector 18 1 12 14 12 5 18 2) "knil" list) + (list 12 12)) + (fail 'vector-find-median:8samelist)) + +(or (equal? (vector-find-median < (vector 18 1 11 14 12 5 18 2) "knil" list) + (list 11 12)) + (fail 'vector-find-median:8difflist)) + +(or (equal? (vector-find-median < (vector 7 6 9 3 1 18 15 7 8) "knil") + 7) + (fail 'vector-find-median:9)) + +(or (equal? (vector-find-median < (vector 7 6 9 3 1 18 15 7 8) "knil" list) + 7) + (fail 'vector-find-median:9list)) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(or (equal? (let ((v (vector 19))) + (vector-select! < v 0)) + 19) + (fail 'vector-select!:singleton:0)) + +(or (equal? (let ((v (vector 8 22 19 19 13 9 21 13 3 23))) + (vector-select! < v 0)) + 3) + (fail 'vector-select!:ten:0)) + +(or (equal? (let ((v (vector 8 22 19 19 13 9 21 13 3 23))) + (vector-select! < v 2)) + 9) + (fail 'vector-select!:ten:2)) + +(or (equal? (let ((v (vector 8 22 19 19 13 9 21 13 3 23))) + (vector-select! < v 8)) + 22) + (fail 'vector-select!:ten:8)) + +(or (equal? (let ((v (vector 8 22 19 19 13 9 21 13 3 23))) + (vector-select! < v 9)) + 23) + (fail 'vector-select!:ten:9)) + +(or (equal? (let ((v (vector 19))) + (vector-select! < v 0 0)) + 19) + (fail 'vector-select!:singleton:0:0)) + +(or (equal? (let ((v (vector 8 22 19 19 13 9 21 13 3 23))) + (vector-select! < v 0 0)) + 3) + (fail 'vector-select!:ten:0:0)) + +(or (equal? (let ((v (vector 8 22 19 19 13 9 21 13 3 23))) + (vector-select! < v 2 0)) + 9) + (fail 'vector-select!:ten:2:0)) + +(or (equal? (let ((v (vector 8 22 19 19 13 9 21 13 3 23))) + (vector-select! < v 8 0)) + 22) + (fail 'vector-select!:ten:8:0)) + +(or (equal? (let ((v (vector 8 22 19 19 13 9 21 13 3 23))) + (vector-select! < v 9 0)) + 23) + (fail 'vector-select!:ten:9:0)) + +(or (equal? (let ((v (vector 19))) + (vector-select! < v 0 0 1)) + 19) + (fail 'vector-select!:singleton:0:0:1)) + +(or (equal? (let ((v (vector 8 22 19 19 13 9 21 13 3 23))) + (vector-select! < v 0 0 10)) + 3) + (fail 'vector-select!:ten:0:0:10)) + +(or (equal? (let ((v (vector 8 22 19 19 13 9 21 13 3 23))) + (vector-select! < v 2 0 10)) + 9) + (fail 'vector-select!:ten:2:0:10)) + +(or (equal? (let ((v (vector 8 22 19 19 13 9 21 13 3 23))) + (vector-select! < v 8 0 10)) + 22) + (fail 'vector-select!:ten:8:0:10)) + +(or (equal? (let ((v (vector 8 22 19 19 13 9 21 13 3 23))) + (vector-select! < v 9 0 10)) + 23) + (fail 'vector-select!:ten:9:0:10)) + +(or (equal? (let ((v (vector 8 22 19 19 13 9 21 13 3 23))) + (vector-select! < v 0 4 10)) + 3) + (fail 'vector-select!:ten:0:4:10)) + +(or (equal? (let ((v (vector 8 22 19 19 13 9 21 13 3 23))) + (vector-select! < v 2 4 10)) + 13) + (fail 'vector-select!:ten:2:4:10)) + +(or (equal? (let ((v (vector 8 22 19 19 13 9 21 13 3 23))) + (vector-select! < v 4 4 10)) + 21) + (fail 'vector-select!:ten:4:4:10)) + +(or (equal? (let ((v (vector 8 22 19 19 13 9 21 13 3 23))) + (vector-select! < v 5 4 10)) + 23) + (fail 'vector-select!:ten:5:4:10)) + +(or (equal? (let ((v (vector 8 22 19 19 13 9 21 13 3 23))) + (vector-select! < v 0 4 10)) + 3) + (fail 'vector-select!:ten:0:4:10)) + +(or (equal? (let ((v (vector 8 22 19 19 13 9 21 13 3 23))) + (vector-select! < v 2 4 10)) + 13) + (fail 'vector-select!:ten:2:4:10)) + +(or (equal? (let ((v (vector 8 22 19 19 13 9 21 13 3 23))) + (vector-select! < v 3 4 10)) + 13) + (fail 'vector-select!:ten:3:4:10)) + +(or (equal? (let ((v (vector 8 22 19 19 13 9 21 13 3 23))) + (vector-select! < v 4 4 10)) + 21) + (fail 'vector-select!:ten:4:4:10)) + +(or (equal? (let ((v (vector 8 22 19 19 13 9 21 13 3 23))) + (vector-select! < v 5 4 10)) + 23) + (fail 'vector-select!:ten:9:4:10)) + +(or (equal? (let ((v (vector 8 22 19 19 13 9 21 13 3 23))) + (vector-select! < v 0 4 8)) + 9) + (fail 'vector-select!:ten:0:4:8)) + +(or (equal? (let ((v (vector 8 22 19 19 13 9 21 13 3 23))) + (vector-select! < v 1 4 8)) + 13) + (fail 'vector-select!:ten:1:4:8)) + +(or (equal? (let ((v (vector 8 22 19 19 13 9 21 13 3 23))) + (vector-select! < v 2 4 8)) + 13) + (fail 'vector-select!:ten:2:4:8)) + +(or (equal? (let ((v (vector 8 22 19 19 13 9 21 13 3 23))) + (vector-select! < v 3 4 8)) + 21) + (fail 'vector-select!:ten:3:4:8)) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(or (equal? (let ((v (vector))) + (vector-separate! < v 0) + (vector-sort < (r7rs-vector-copy v 0 0))) + '#()) + (fail 'vector-separate!:empty:0)) + +(or (equal? (let ((v (vector 19))) + (vector-separate! < v 0) + (vector-sort < (r7rs-vector-copy v 0 0))) + '#()) + (fail 'vector-separate!:singleton:0)) + +(or (equal? (let ((v (vector 19))) + (vector-separate! < v 1) + (vector-sort < (r7rs-vector-copy v 0 1))) + '#(19)) + (fail 'vector-separate!:singleton:1)) + +(or (equal? (let ((v (vector 8 22 19 19 13 9 21 13 3 23))) + (vector-separate! < v 0) + (vector-sort < (r7rs-vector-copy v 0 0))) + '#()) + (fail 'vector-separate!:ten:0)) + +(or (equal? (let ((v (vector 8 22 19 19 13 9 21 13 3 23))) + (vector-separate! < v 3) + (vector-sort < (r7rs-vector-copy v 0 3))) + '#(3 8 9)) + (fail 'vector-separate!:ten:3)) + +(or (equal? (let ((v (vector))) + (vector-separate! < v 0 0) + (vector-sort < (r7rs-vector-copy v 0 0))) + '#()) + (fail 'vector-separate!:empty:0:0)) + +(or (equal? (let ((v (vector 19))) + (vector-separate! < v 0 0) + (vector-sort < (r7rs-vector-copy v 0 0))) + '#()) + (fail 'vector-separate!:singleton:0:0)) + +(or (equal? (let ((v (vector 19))) + (vector-separate! < v 1 0) + (vector-sort < (r7rs-vector-copy v 0 1))) + '#(19)) + (fail 'vector-separate!:singleton:1:0)) + +(or (equal? (let ((v (vector 8 22 19 19 13 9 21 13 3 23))) + (vector-separate! < v 0 0) + (vector-sort < (r7rs-vector-copy v 0 0))) + '#()) + (fail 'vector-separate!:ten:0:0)) + +(or (equal? (let ((v (vector 8 22 19 19 13 9 21 13 3 23))) + (vector-separate! < v 3 0) + (vector-sort < (r7rs-vector-copy v 0 3))) + '#(3 8 9)) + (fail 'vector-separate!:ten:3:0)) + +(or (equal? (let ((v (vector 19))) + (vector-separate! < v 0 1) + (vector-sort < (r7rs-vector-copy v 1 1))) + '#()) + (fail 'vector-separate!:singleton:0:1)) + +(or (equal? (let ((v (vector 8 22 19 19 13 9 21 13 3 23))) + (vector-separate! < v 0 2) + (vector-sort < (r7rs-vector-copy v 2 2))) + '#()) + (fail 'vector-separate!:ten:0:2)) + +(or (equal? (let ((v (vector 8 22 19 19 13 9 21 13 3 23))) + (vector-separate! < v 3 2) + (vector-sort < (r7rs-vector-copy v 2 5))) + '#(3 9 13)) + (fail 'vector-separate!:ten:3:2)) + +(or (equal? (let ((v (vector))) + (vector-separate! < v 0 0 0) + (vector-sort < (r7rs-vector-copy v 0 0))) + '#()) + (fail 'vector-separate!:empty:0:0:0)) + +(or (equal? (let ((v (vector 19))) + (vector-separate! < v 0 1 1) + (vector-sort < (r7rs-vector-copy v 1 1))) + '#()) + (fail 'vector-separate!:singleton:0:1:1)) + +(or (equal? (let ((v (vector 8 22 19 19 13 9 21 13 3 23))) + (vector-separate! < v 0 2 8) + (vector-sort < (r7rs-vector-copy v 2 2))) + '#()) + (fail 'vector-separate!:ten:0:2:8)) + +(or (equal? (let ((v (vector 8 22 19 19 13 9 21 13 3 23))) + (vector-separate! < v 3 2 8) + (vector-sort < (r7rs-vector-copy v 2 5))) + '#(9 13 13)) + (fail 'vector-separate!:ten:3:2:8)) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; +;;; Sorting routines often have internal boundary cases or +;;; randomness, so it's prudent to run a lot of tests with +;;; different lengths. +;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define (all-sorts-okay? m n) + (if (> m 0) + (let* ((v (random-vector n)) + (v2 (vector-copy v)) + (lst (vector->list v)) + (ans (vector-sort < v2)) + (med (cond ((= n 0) -97) + ((odd? n) + (vector-ref ans (quotient n 2))) + (else + (/ (+ (vector-ref ans (- (quotient n 2) 1)) + (vector-ref ans (quotient n 2))) + 2))))) + (define (dsort vsort!) + (let ((v2 (vector-copy v))) + (vsort! < v2) + v2)) + (and (equal? ans (list->vector (list-sort < lst))) + (equal? ans (list->vector (list-stable-sort < lst))) + (equal? ans (list->vector (list-sort! < (list-copy lst)))) + (equal? ans (list->vector (list-stable-sort! < (list-copy lst)))) + (equal? ans (vector-sort < v2)) + (equal? ans (vector-stable-sort < v2)) + (equal? ans (dsort vector-sort!)) + (equal? ans (dsort vector-stable-sort!)) + (equal? med (vector-find-median < v2 -97)) + (equal? v v2) + (equal? lst (vector->list v)) + (equal? med (vector-find-median! < v2 -97)) + (equal? ans v2) + (all-sorts-okay? (- m 1) n))) + #t)) + +(define (test-all-sorts m n) + (or (all-sorts-okay? m n) + (fail (list 'test-all-sorts m n)))) + +(for-each test-all-sorts + '( 3 5 10 10 10 20 20 10 10 10 10 10 10 10 10 10 10) + '( 0 1 2 3 4 5 10 20 30 40 50 99 100 101 499 500 501)) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; +;;; Benchmarks. +;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define (mostly-sorted-random-vector n) + (define fraction-not-sorted 1/20) + (define k (round (* n fraction-not-sorted))) + (let* ((v (random-vector n)) + (v2 (r6rs-vector-sort < v))) + (do ((i 0 (+ i 1))) + ((= i k)) + (vector-set! v2 i (vector-ref v i))) + v2)) + +;;; Performs n calls of f on a fresh copy of the vector or list v +;;; and returns the average time per call in seconds, rounded to +;;; the nearest microsecond. + +(define (average-time n f v) + (define (call-loop i jiffies) + (if (> i 0) + (let* ((v2 (if (vector? v) + (vector-copy v) + (list-copy v))) + (t0 (current-jiffy)) + (result (f < v2)) + (t1 (current-jiffy))) + (call-loop (- i 1) (+ jiffies (- t1 t0)))) + (let* ((dt (/ jiffies (jiffies-per-second))) + (dt (/ dt n)) + (dt (/ (round (* 1e6 dt)) 1e6))) + dt))) + (call-loop n 0)) + +(define (run-some-benchmarks m n) + (newline) + (display "Average time (in seconds) for a sequence of length ") + (write n) + (display " : ") + (newline) + (newline) + (display " random mostly sorted\n") + (let* ((v (random-vector n)) + (l (vector->list v)) + (v2 (mostly-sorted-random-vector n)) + (l2 (vector->list v2))) + (define (run-sorter name f v v2) + (display name) + (display " ") + (write10 (average-time m f v)) + (display " ") + (write10 (average-time m f v2)) + (newline)) + (define (write10 x) + (let* ((p (open-output-string)) + (ignored (write x p)) + (s (get-output-string p)) + (k (string-length s)) + (s (string-append s (make-string (max 0 (- 10 k)) #\space)))) + (display s))) + (run-sorter "R6RS list-sort " r6rs-list-sort l l2) + (run-sorter "list-sort " list-sort l l2) + (run-sorter "list-stable-sort " list-stable-sort l l2) + (run-sorter "list-sort! " list-sort! l l2) + (run-sorter "list-stable-sort! " list-stable-sort! l l2) + (run-sorter "R6RS vector-sort " r6rs-vector-sort v v2) + (run-sorter "R6RS vector-sort! " r6rs-vector-sort! v v2) + (run-sorter "vector-sort " vector-sort v v2) + (run-sorter "vector-stable-sort " vector-stable-sort v v2) + (run-sorter "vector-sort! " vector-sort! v v2) + (run-sorter "vector-stable-sort!" vector-stable-sort! v v2) + (run-sorter "vector-find-median " + (lambda (< v) + (vector-find-median < v -1)) + v v2) + (run-sorter "vector-find-median!" + (lambda (< v) + (vector-find-median! < v -1)) + v v2))) + +(if display-benchmark-results? + (run-some-benchmarks 1000 100)) + +(if display-benchmark-results? + (run-some-benchmarks 50 9999)) + +(if display-benchmark-results? + (run-some-benchmarks 50 10000)) + +(if display-benchmark-results? + (run-some-benchmarks 3 1000000)) + +(display "Done.\n") diff --git a/srfi/sorting/srfi-132.sld b/srfi/sorting/srfi-132.sld new file mode 100644 index 00000000..a5dcb190 --- /dev/null +++ b/srfi/sorting/srfi-132.sld @@ -0,0 +1,19 @@ +(define-library (srfi-132) + (import (scheme base)) + (import (scheme cxr)) + (export list-sorted? vector-sorted? list-merge vector-merge list-sort vector-sort + list-stable-sort vector-stable-sort list-merge! vector-merge! list-sort! vector-sort! + list-stable-sort! vector-stable-sort! + list-delete-neighbor-dups vector-delete-neighbor-dups + list-delete-neighbor-dups! vector-delete-neighbor-dups!) + (include "delndups.scm") + (include "lmsort.scm") + (include "sortp.scm") + (include "vector-util.scm") + (include "vhsort.scm") + (include "visort.scm") + (include "vmsort.scm") + (include "vqsort2.scm") + (include "vqsort3.scm") + (include "sort.scm") ; must be last +) diff --git a/srfi/sorting/vbinsearch.scm b/srfi/sorting/vbinsearch.scm new file mode 100644 index 00000000..6eee4e69 --- /dev/null +++ b/srfi/sorting/vbinsearch.scm @@ -0,0 +1,34 @@ +;;; The sort package -- binary search -*- Scheme -*- +;;; Copyright (c) 1998 by Olin Shivers. +;;; This code is in the public domain. +;;; Olin Shivers 98/11 + +;;; Returns the index of the matching element. +;;; (vector-binary-search < car 4 '#((1 . one) (3 . three) +;;; (4 . four) (25 . twenty-five))) +;;; => 2 + +(define (vector-binary-search key< elt->key key v . maybe-start+end) + (call-with-values + (lambda () (vector-start+end v maybe-start+end)) + (lambda (start end) + (let lp ((left start) (right end)) ; Search V[left,right). + (and (< left right) + (let* ((m (quotient (+ left right) 2)) + (elt (vector-ref v m)) + (elt-key (elt->key elt))) + (cond ((key< key elt-key) (lp left m)) + ((key< elt-key key) (lp (+ m 1) right)) + (else m)))))))) + +(define (vector-binary-search3 compare v . maybe-start+end) + (call-with-values + (lambda () (vector-start+end v maybe-start+end)) + (lambda (start end) + (let lp ((left start) (right end)) ; Search V[left,right). + (and (< left right) + (let* ((m (quotient (+ left right) 2)) + (sign (compare (vector-ref v m)))) + (cond ((> sign 0) (lp left m)) + ((< sign 0) (lp (+ m 1) right)) + (else m)))))))) diff --git a/srfi/sorting/vector-util.scm b/srfi/sorting/vector-util.scm new file mode 100644 index 00000000..319fa6f2 --- /dev/null +++ b/srfi/sorting/vector-util.scm @@ -0,0 +1,65 @@ +;;; This code is +;;; Copyright (c) 1998 by Olin Shivers. +;;; The terms are: You may do as you please with this code, as long as +;;; you do not delete this notice or hold me responsible for any outcome +;;; related to its use. +;;; +;;; Blah blah blah. Don't you think source files should contain more lines +;;; of code than copyright notice? + +(define (vector-portion-copy vec start end) + (let* ((len (vector-length vec)) + (new-len (- end start)) + (new (make-vector new-len))) + (do ((i start (+ i 1)) + (j 0 (+ j 1))) + ((= i end) new) + (vector-set! new j (vector-ref vec i))))) + +(define (vector-copy vec) + (vector-portion-copy vec 0 (vector-length vec))) + +(define (vector-portion-copy! target src start end) + (let ((len (- end start))) + (do ((i (- len 1) (- i 1)) + (j (- end 1) (- j 1))) + ((< i 0)) + (vector-set! target i (vector-ref src j))))) + +(define (has-element list index) + (cond + ((zero? index) + (if (pair? list) + (values #t (car list)) + (values #f #f))) + ((null? list) + (values #f #f)) + (else + (has-element (cdr list) (- index 1))))) + +(define (list-ref-or-default list index default) + (call-with-values + (lambda () (has-element list index)) + (lambda (has? maybe) + (if has? + maybe + default)))) + +(define (vector-start+end vector maybe-start+end) + (let ((start (list-ref-or-default maybe-start+end + 0 0)) + (end (list-ref-or-default maybe-start+end + 1 (vector-length vector)))) + (values start end))) + +(define (vectors-start+end-2 vector-1 vector-2 maybe-start+end) + (let ((start-1 (list-ref-or-default maybe-start+end + 0 0)) + (end-1 (list-ref-or-default maybe-start+end + 1 (vector-length vector-1))) + (start-2 (list-ref-or-default maybe-start+end + 2 0)) + (end-2 (list-ref-or-default maybe-start+end + 3 (vector-length vector-2)))) + (values start-1 end-1 + start-2 end-2))) diff --git a/srfi/sorting/vhsort.scm b/srfi/sorting/vhsort.scm new file mode 100644 index 00000000..004433c0 --- /dev/null +++ b/srfi/sorting/vhsort.scm @@ -0,0 +1,119 @@ +;;; The sort package -- vector heap sort -*- Scheme -*- +;;; Copyright (c) 2002 by Olin Shivers. +;;; This code is open-source; see the end of the file for porting and +;;; more copyright information. +;;; Olin Shivers 10/98. + +;;; Exports: +;;; (vector-heap-sort! elt< v [start end]) -> unspecified +;;; (vector-heap-sort elt< v [start end]) -> vector + +;;; Two key facts +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; If a heap structure is embedded into a vector at indices [start,end), then: +;;; 1. The two children of index k are start + 2*(k-start) + 1 = k*2-start+1 +;;; and start + 2*(k-start) + 2 = k*2-start+2. +;;; +;;; 2. The first index of a leaf node in the range [start,end) is +;;; first-leaf = floor[(start+end)/2] +;;; (You can deduce this from fact #1 above.) +;;; Any index before FIRST-LEAF is an internal node. + +(define (really-vector-heap-sort! elt< v start end) + ;; Vector V contains a heap at indices [START,END). The heap is in heap + ;; order in the range (I,END) -- i.e., every element in this range is >= + ;; its children. Bubble HEAP[I] down into the heap to impose heap order on + ;; the range [I,END). + (define (restore-heap! end i) + (let* ((vi (vector-ref v i)) + (first-leaf (quotient (+ start end) 2)) ; Can fixnum overflow. + (final-k (let lp ((k i)) + (if (>= k first-leaf) + k ; Leaf, so done. + (let* ((k*2-start (+ k (- k start))) ; Don't overflow. + (child1 (+ 1 k*2-start)) + (child2 (+ 2 k*2-start)) + (child1-val (vector-ref v child1))) + (call-with-values + (lambda () + (if (< child2 end) + (let ((child2-val (vector-ref v child2))) + (if (elt< child2-val child1-val) + (values child1 child1-val) + (values child2 child2-val))) + (values child1 child1-val))) + (lambda (max-child max-child-val) + (cond ((elt< vi max-child-val) + (vector-set! v k max-child-val) + (lp max-child)) + (else k))))))))) ; Done. + (vector-set! v final-k vi))) + + ;; Put the unsorted subvector V[start,end) into heap order. + (let ((first-leaf (quotient (+ start end) 2))) ; Can fixnum overflow. + (do ((i (- first-leaf 1) (- i 1))) + ((< i start)) + (restore-heap! end i))) + + (do ((i (- end 1) (- i 1))) + ((<= i start)) + (let ((top (vector-ref v start))) + (vector-set! v start (vector-ref v i)) + (vector-set! v i top) + (restore-heap! i start)))) + +;;; Here are the two exported interfaces. + +(define (vector-heap-sort! elt< v . maybe-start+end) + (call-with-values + (lambda () (vector-start+end v maybe-start+end)) + (lambda (start end) + (really-vector-heap-sort! elt< v start end)))) + +(define (vector-heap-sort elt< v . maybe-start+end) + (call-with-values + (lambda () (vector-start+end v maybe-start+end)) + (lambda (start end) + (let ((ans (vector-portion-copy v start end))) + (really-vector-heap-sort! elt< ans 0 (- end start)) + ans)))) + +;;; Notes on porting +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; +;;; Bumming the code for speed +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; If you can use a module system to lock up the internal function +;;; REALLY-VECTOR-HEAP-SORT! so that it can only be called from VECTOR-HEAP-SORT and +;;; VECTOR-HEAP-SORT!, then you can hack the internal functions to run with no safety +;;; checks. The safety checks performed by the exported functions VECTOR-HEAP-SORT & +;;; VECTOR-HEAP-SORT! guarantee that there will be no type errors or array-indexing +;;; errors. In addition, with the exception of the two computations of +;;; FIRST-LEAF, all arithmetic will be fixnum arithmetic that never overflows +;;; into bignums, assuming your Scheme provides that you can't allocate an +;;; array so large you might need a bignum to index an element, which is +;;; definitely the case for every implementation with which I am familiar. +;;; +;;; If you want to code up the first-leaf = (quotient (+ s e) 2) computation +;;; so that it will never fixnum overflow when S & E are fixnums, you can do +;;; it this way: +;;; - compute floor(e/2), which throws away e's low-order bit. +;;; - add e's low-order bit to s, and divide that by two: +;;; floor[(s + e mod 2) / 2] +;;; - add these two parts together. +;;; giving you +;;; (+ (quotient e 2) +;;; (quotient (+ s (modulo e 2)) 2)) +;;; If we know that e & s are fixnums, and that 0 <= s <= e, then this +;;; can only fixnum-overflow when s = e = max-fixnum. Note that the +;;; two divides and one modulo op can be done very quickly with two +;;; right-shifts and a bitwise and. +;;; +;;; I suspect there has never been a heapsort written in the history of +;;; the world in C that got this detail right. +;;; +;;; If your Scheme has a faster mechanism for handling optional arguments +;;; (e.g., Chez), you should definitely port over to it. Note that argument +;;; defaulting and error-checking are interleaved -- you don't have to +;;; error-check defaulted START/END args to see if they are fixnums that are +;;; legal vector indices for the corresponding vector, etc. diff --git a/srfi/sorting/visort.scm b/srfi/sorting/visort.scm new file mode 100644 index 00000000..f2c5ac77 --- /dev/null +++ b/srfi/sorting/visort.scm @@ -0,0 +1,78 @@ +;;; The sort package -- stable vector insertion sort -*- Scheme -*- +;;; Copyright (c) 1998 by Olin Shivers. +;;; This code is open-source; see the end of the file for porting and +;;; more copyright information. +;;; Olin Shivers 10/98. + +;;; Exports: +;;; vector-insert-sort < v [start end] -> vector +;;; vector-insert-sort! < v [start end] -> unspecific +;;; +;;; %vector-insert-sort! is also called from vqsort.scm's quick-sort function. + +(define (vector-insert-sort elt< v . maybe-start+end) + (call-with-values + (lambda () (vector-start+end v maybe-start+end)) + (lambda (start end) + (let ((ans (vector-portion-copy v start end))) + (%vector-insert-sort! elt< ans 0 (- end start)) + ans)))) + +(define (vector-insert-sort! < v . maybe-start+end) + (call-with-values + (lambda () (vector-start+end v maybe-start+end)) + (lambda (start end) + (%vector-insert-sort! < v start end)))) + +(define (%vector-insert-sort! elt< v start end) + (do ((i (+ 1 start) (+ i 1))) ; Invariant: [start,i) is sorted. + ((>= i end)) + (let ((val (vector-ref v i))) + (vector-set! v (let lp ((j i)) ; J is the location of the + (if (<= j start) + start ; "hole" as it bubbles down. + (let* ((j-1 (- j 1)) + (vj-1 (vector-ref v j-1))) + (cond ((elt< val vj-1) + (vector-set! v j vj-1) + (lp j-1)) + (else j))))) + val)))) + +;;; Copyright +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; This code is +;;; Copyright (c) 1998 by Olin Shivers. +;;; The terms are: You may do as you please with this code, as long as +;;; you do not delete this notice or hold me responsible for any outcome +;;; related to its use. +;;; +;;; Blah blah blah. Don't you think source files should contain more lines +;;; of code than copyright notice? + + +;;; Code tuning & porting +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; +;;; This code is tightly bummed as far as I can go in portable Scheme. +;;; +;;; The code can be converted to use unsafe vector-indexing and +;;; fixnum-specific arithmetic ops -- the safety checks done on entry +;;; to VECTOR-INSERT-SORT and VECTOR-INSERT-SORT! are sufficient to +;;; guarantee nothing bad will happen. However, note that if you alter +;;; %VECTOR-INSERT-SORT! to use dangerous primitives, you must ensure +;;; it is only called from clients that guarantee to observe its +;;; preconditions. In the implementation, %VECTOR-INSERT-SORT! is only +;;; called from VECTOR-INSERT-SORT! and the quick-sort code in +;;; vqsort.scm, and the preconditions are guaranteed for these two +;;; clients. This should provide *big* speedups. In fact, all the +;;; code bumming I've done pretty much disappears in the noise unless +;;; you have a good compiler and also can dump the vector-index checks +;;; and generic arithmetic -- so I've really just set things up for +;;; you to exploit. +;;; +;;; If your Scheme has a faster mechanism for handling optional arguments +;;; (e.g., Chez), you should definitely port over to it. Note that argument +;;; defaulting and error-checking are interleaved -- you don't have to +;;; error-check defaulted START/END args to see if they are fixnums that are +;;; legal vector indices for the corresponding vector, etc. diff --git a/srfi/sorting/vmsort.scm b/srfi/sorting/vmsort.scm new file mode 100644 index 00000000..9b0eb639 --- /dev/null +++ b/srfi/sorting/vmsort.scm @@ -0,0 +1,246 @@ +;;; The sort package -- stable vector merge & merge sort -*- Scheme -*- +;;; Copyright (c) 1998 by Olin Shivers. +;;; This code is open-source; see the end of the file for porting and +;;; more copyright information. +;;; Olin Shivers 10/98. + +;;; Exports: +;;; (vector-merge < v1 v2 [start1 end1 start2 end2]) -> vector +;;; (vector-merge! < v v1 v2 [start0 start1 end1 start2 end2]) -> unspecific +;;; +;;; (vector-merge-sort < v [start end temp]) -> vector +;;; (vector-merge-sort! < v [start end temp]) -> unspecific + + +;;; Merge +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; (vector-merge < v1 v2 [start1 end1 start2 end2]) -> vector +;;; (vector-merge! < v v1 v2 [start start1 end1 start2 end2]) -> unspecific +;;; +;;; Stable vector merge -- V1's elements come out ahead of equal V2 elements. + +(define (vector-merge < v1 v2 . maybe-starts+ends) + (call-with-values + (lambda () (vectors-start+end-2 v1 v2 maybe-starts+ends)) + (lambda (start1 end1 start2 end2) + (let ((ans (make-vector (+ (- end1 start1) (- end2 start2))))) + (%vector-merge! < ans v1 v2 0 start1 end1 start2 end2) + ans)))) + +(define (vector-merge! < v v1 v2 . maybe-starts+ends) + (call-with-values + (lambda () + (if (pair? maybe-starts+ends) + (values (car maybe-starts+ends) + (cdr maybe-starts+ends)) + (values 0 + '()))) + (lambda (start rest) + (call-with-values + (lambda () (vectors-start+end-2 v1 v2 rest)) + (lambda (start1 end1 start2 end2) + (%vector-merge! < v v1 v2 start start1 end1 start2 end2)))))) + + +;;; This routine is not exported. The code is tightly bummed. +;;; +;;; If these preconditions hold, the routine can be bummed to run with +;;; unsafe vector-indexing and fixnum arithmetic ops: +;;; - V V1 V2 are vectors. +;;; - START START1 END1 START2 END2 are fixnums. +;;; - (<= 0 START END0 (vector-length V), +;;; where end0 = start + (end1 - start1) + (end2 - start2) +;;; - (<= 0 START1 END1 (vector-length V1)) +;;; - (<= 0 START2 END2 (vector-length V2)) +;;; If you put these error checks in the two client procedures above, you can +;;; safely convert this procedure to use unsafe ops -- which is why it isn't +;;; exported. This will provide *huge* speedup. + +(define (%vector-merge! elt< v v1 v2 start start1 end1 start2 end2) + (letrec ((vblit (lambda (fromv j i end) ; Blit FROMV[J,END) to V[I,?]. + (let lp ((j j) (i i)) + (vector-set! v i (vector-ref fromv j)) + (let ((j (+ j 1))) + (if (< j end) (lp j (+ i 1)))))))) + + (cond ((<= end1 start1) (if (< start2 end2) (vblit v2 start2 start end2))) + ((<= end2 start2) (vblit v1 start1 start end1)) + + ;; Invariants: I is next index of V to write; X = V1[J]; Y = V2[K]. + (else (let lp ((i start) + (j start1) (x (vector-ref v1 start1)) + (k start2) (y (vector-ref v2 start2))) + (let ((i1 (+ i 1))) ; "i+1" is a complex number in R4RS! + (if (elt< y x) + (let ((k (+ k 1))) + (vector-set! v i y) + (if (< k end2) + (lp i1 j x k (vector-ref v2 k)) + (vblit v1 j i1 end1))) + (let ((j (+ j 1))) + (vector-set! v i x) + (if (< j end1) + (lp i1 j (vector-ref v1 j) k y) + (vblit v2 k i1 end2)))))))))) + + +;;; (vector-merge-sort < v [start end temp]) -> vector +;;; (vector-merge-sort! < v [start end temp]) -> unspecific +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Stable natural vector merge sort + +(define (vector-merge-sort! < v . maybe-args) + (call-with-values + (lambda () (vector-start+end v maybe-args)) + (lambda (start end) + (let ((temp (if (and (pair? maybe-args) ; kludge + (pair? (cdr maybe-args)) + (pair? (cddr maybe-args))) + (caddr maybe-args) + (vector-copy v)))) + (%vector-merge-sort! < v start end temp))))) + +(define (vector-merge-sort < v . maybe-args) + (call-with-values + (lambda () (vector-start+end v maybe-args)) + (lambda (start end) + (let ((ans (vector-copy v start end))) + (vector-merge-sort! < ans) + ans)))) + + +;;; %VECTOR-MERGE-SORT! is not exported. +;;; Preconditions: +;;; V TEMP vectors +;;; START END fixnums +;;; START END legal indices for V and TEMP +;;; If these preconditions are ensured by the cover functions, you +;;; can safely change this code to use unsafe fixnum arithmetic and vector +;;; indexing ops, for *huge* speedup. + +;;; This merge sort is "opportunistic" -- the leaves of the merge tree are +;;; contiguous runs of already sorted elements in the vector. In the best +;;; case -- an already sorted vector -- it runs in linear time. Worst case +;;; is still O(n lg n) time. + +(define (%vector-merge-sort! elt< v0 l r temp0) + (define (xor a b) (not (eq? a b))) + + ;; Merge v1[l,l+len1) and v2[l+len1,l+len1+len2) into target[l,l+len1+len2) + ;; Merge left-to-right, so that TEMP may be either V1 or V2 + ;; (that this is OK takes a little bit of thought). + ;; V2=TARGET? is true if V2 and TARGET are the same, which allows + ;; merge to punt the final blit half of the time. + + (define (merge target v1 v2 l len1 len2 v2=target?) + (letrec ((vblit (lambda (fromv j i end) ; Blit FROMV[J,END) to TARGET[I,?] + (let lp ((j j) (i i)) ; J < END. The final copy. + (vector-set! target i (vector-ref fromv j)) + (let ((j (+ j 1))) + (if (< j end) (lp j (+ i 1)))))))) + + (let* ((r1 (+ l len1)) + (r2 (+ r1 len2))) + ; Invariants: + (let lp ((n l) ; N is next index of + (j l) (x (vector-ref v1 l)) ; TARGET to write. + (k r1) (y (vector-ref v2 r1))) ; X = V1[J] + (let ((n+1 (+ n 1))) ; Y = V2[K] + (if (elt< y x) + (let ((k (+ k 1))) + (vector-set! target n y) + (if (< k r2) + (lp n+1 j x k (vector-ref v2 k)) + (vblit v1 j n+1 r1))) + (let ((j (+ j 1))) + (vector-set! target n x) + (if (< j r1) + (lp n+1 j (vector-ref v1 j) k y) + (if (not v2=target?) (vblit v2 k n+1 r2)))))))))) + + + ;; Might hack GETRUN so that if the run is short it pads it out to length + ;; 10 with insert sort... + + ;; Precondition: l < r. + (define (getrun v l r) + (let lp ((i (+ l 1)) (x (vector-ref v l))) + (if (>= i r) + (- i l) + (let ((y (vector-ref v i))) + (if (elt< y x) + (- i l) + (lp (+ i 1) y)))))) + + ;; RECUR: Sort V0[L,L+LEN) for some LEN where 0 < WANT <= LEN <= (R-L). + ;; That is, sort *at least* WANT elements in V0 starting at index L. + ;; May put the result into either V0[L,L+LEN) or TEMP0[L,L+LEN). + ;; Must not alter either vector outside this range. + ;; Return: + ;; - LEN -- the number of values we sorted + ;; - ANSVEC -- the vector holding the value + ;; - ANS=V0? -- tells if ANSVEC is V0 or TEMP + ;; + ;; LP: V[L,L+PFXLEN) holds a sorted prefix of V0. + ;; TEMP = if V = V0 then TEMP0 else V0. (I.e., TEMP is the other vec.) + ;; PFXLEN2 is a power of 2 <= PFXLEN. + ;; Solve RECUR's problem. + (if (< l r) ; Don't try to sort an empty range. + (call-with-values + (lambda () + (let recur ((l l) (want (- r l))) + (let ((len (- r l))) + (let lp ((pfxlen (getrun v0 l r)) (pfxlen2 1) + (v v0) (temp temp0) + (v=v0? #t)) + (if (or (>= pfxlen want) (= pfxlen len)) + (values pfxlen v v=v0?) + (let ((pfxlen2 (let lp ((j pfxlen2)) + (let ((j*2 (+ j j))) + (if (<= j pfxlen) (lp j*2) j)))) + (tail-len (- len pfxlen))) + ;; PFXLEN2 is now the largest power of 2 <= PFXLEN. + ;; (Just think of it as being roughly PFXLEN.) + (call-with-values + (lambda () + (recur (+ pfxlen l) pfxlen2)) + (lambda (nr-len nr-vec nrvec=v0?) + (merge temp v nr-vec l pfxlen nr-len + (xor nrvec=v0? v=v0?)) + (lp (+ pfxlen nr-len) (+ pfxlen2 pfxlen2) + temp v (not v=v0?)))))))))) + (lambda (ignored-len ignored-ansvec ansvec=v0?) + (if (not ansvec=v0?) + (vector-copy! v0 l temp0 l r)))))) + + +;;; Copyright +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; This code is +;;; Copyright (c) 1998 by Olin Shivers. +;;; The terms are: You may do as you please with this code, as long as +;;; you do not delete this notice or hold me responsible for any outcome +;;; related to its use. +;;; +;;; Blah blah blah. Don't you think source files should contain more lines +;;; of code than copyright notice? + + +;;; Code tuning & porting +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; This code is *tightly* bummed as far as I can go in portable Scheme. +;;; +;;; The two internal primitives that do the real work can be converted to +;;; use unsafe vector-indexing and fixnum-specific arithmetic ops *if* you +;;; alter the four small cover functions to enforce the invariants. This should +;;; provide *big* speedups. In fact, all the code bumming I've done pretty +;;; much disappears in the noise unless you have a good compiler and also +;;; can dump the vector-index checks and generic arithmetic -- so I've really +;;; just set things up for you to exploit. +;;; +;;; The optional-arg parsing, defaulting, and error checking is done with a +;;; portable R4RS macro. But if your Scheme has a faster mechanism (e.g., +;;; Chez), you should definitely port over to it. Note that argument defaulting +;;; and error-checking are interleaved -- you don't have to error-check +;;; defaulted START/END args to see if they are fixnums that are legal vector +;;; indices for the corresponding vector, etc. diff --git a/srfi/sorting/vqsort2.scm b/srfi/sorting/vqsort2.scm new file mode 100644 index 00000000..e15f9c81 --- /dev/null +++ b/srfi/sorting/vqsort2.scm @@ -0,0 +1,191 @@ +;;; The SRFI-32 sort package -- quick sort -*- Scheme -*- +;;; Copyright (c) 2002 by Olin Shivers. +;;; This code is open-source; see the end of the file for porting and +;;; more copyright information. +;;; Olin Shivers 2002/7. + +;;; (quick-sort < v [start end]) -> vector +;;; (quick-sort! < v [start end]) -> unspecific +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; The algorithm is a standard quicksort, but the partition loop is fancier, +;;; arranging the vector into a left part that is <, a middle region that is +;;; =, and a right part that is > the pivot. Here's how it is done: +;;; The partition loop divides the range being partitioned into five +;;; subranges: +;;; =======<<<<<<<<>>>>>>======= +;;; where = marks a value that is equal the pivot, < marks a value that +;;; is less than the pivot, ? marks a value that hasn't been scanned, and +;;; > marks a value that is greater than the pivot. Let's consider the +;;; left-to-right scan. If it checks a ? value that is <, it keeps scanning. +;;; If the ? value is >, we stop the scan -- we are ready to start the +;;; right-to-left scan and then do a swap. But if the rightward scan checks +;;; a ? value that is =, we swap it *down* to the end of the initial chunk +;;; of ====='s -- we exchange it with the leftmost < value -- and then +;;; continue our rightward scan. The leftwards scan works in a similar +;;; fashion, scanning past > elements, stopping on a < element, and swapping +;;; up = elements. When we are done, we have a picture like this +;;; ========<<<<<<<<<<<<>>>>>>>>>>========= +;;; Then swap the = elements up into the middle of the vector to get +;;; this: +;;; <<<<<<<<<<<<=================>>>>>>>>>> +;;; Then recurse on the <'s and >'s. Work out all the tricky little +;;; boundary cases, and you're done. +;;; +;;; Other tricks: +;;; - This quicksort also makes some effort to pick the pivot well -- it uses +;;; the median of three elements as the partition pivot, so pathological n^2 +;;; run time is much rarer (but not eliminated completely). If you really +;;; wanted to get fancy, you could use a random number generator to choose +;;; pivots. The key to this trick is that you only need to pick one random +;;; number for each *level* of recursion -- i.e. you only need (lg n) random +;;; numbers. +;;; - After the partition, we *recurse* on the smaller of the two pending +;;; regions, then *tail-recurse* (iterate) on the larger one. This guarantees +;;; we use no more than lg(n) stack frames, worst case. +;;; - There are two ways to finish off the sort. +;;; A Recurse down to regions of size 10, then sort each such region using +;;; insertion sort. +;;; B Recurse down to regions of size 10, then sort *the entire vector* +;;; using insertion sort. +;;; We do A. Each choice has a cost. Choice A has more overhead to invoke +;;; all the separate insertion sorts -- choice B only calls insertion sort +;;; once. But choice B will call the comparison function *more times* -- +;;; it will unnecessarily compare elt 9 of one segment to elt 0 of the +;;; following segment. The overhead of choice A is linear in the length +;;; of the vector, but *otherwise independent of the algorithm's parameters*. +;;; I.e., it's a *fixed*, *small* constant factor. The cost of the extra +;;; comparisons made by choice B, however, is dependent on an externality: +;;; the comparison function passed in by the client. This can be made +;;; arbitrarily bad -- that is, the constant factor *isn't* fixed by the +;;; sort algorithm; instead, it's determined by the comparison function. +;;; If your comparison function is very, very slow, you want to eliminate +;;; every single one that you can. Choice A limits the potential badness, +;;; so that is what we do. + +(define (vector-quick-sort! < v . maybe-start+end) + (call-with-values + (lambda () (vector-start+end v maybe-start+end)) + (lambda (start end) + (%quick-sort! < v start end)))) + +(define (vector-quick-sort < v . maybe-start+end) + (call-with-values + (lambda () (vector-start+end v maybe-start+end)) + (lambda (start end) + (let ((ans (make-vector (- end start)))) + (vector-portion-copy! ans v start end) + (%quick-sort! < ans 0 (- end start)) + ans)))) + +;;; %QUICK-SORT is not exported. +;;; Preconditions: +;;; V vector +;;; START END fixnums +;;; 0 <= START, END <= (vector-length V) +;;; If these preconditions are ensured by the cover functions, you +;;; can safely change this code to use unsafe fixnum arithmetic and vector +;;; indexing ops, for *huge* speedup. +;;; +;;; We bail out to insertion sort for small ranges; feel free to tune the +;;; crossover -- it's just a random guess. If you don't have the insertion +;;; sort routine, just kill that branch of the IF and change the recursion +;;; test to (< 1 (- r l)) -- the code is set up to work that way. + +(define (%quick-sort! elt< v start end) + ;; Swap the N outer pairs of the range [l,r). + (define (swap l r n) + (if (> n 0) + (let ((x (vector-ref v l)) + (r-1 (- r 1))) + (vector-set! v l (vector-ref v r-1)) + (vector-set! v r-1 x) + (swap (+ l 1) r-1 (- n 1))))) + + ;; Choose the median of V[l], V[r], and V[middle] for the pivot. + (define (median v1 v2 v3) + (call-with-values + (lambda () (if (elt< v1 v2) (values v1 v2) (values v2 v1))) + (lambda (little big) + (if (elt< big v3) + big + (if (elt< little v3) v3 little))))) + + (let recur ((l start) (r end)) ; Sort the range [l,r). + (if (< 10 (- r l)) ; Ten: the gospel according to Sedgewick. + + (let ((pivot (median (vector-ref v l) + (vector-ref v (quotient (+ l r) 2)) + (vector-ref v (- r 1))))) + + ;; Everything in these loops is driven by the invariants expressed + ;; in the little pictures & the corresponding l,i,j,k,m,r indices + ;; and the associated ranges. + + ;; =======<<<<<<<<>>>>>>======= + ;; l i j k m r + ;; [l,i) [i,j) [j,k] (k,m] (m,r) + (letrec ((lscan (lambda (i j k m) ; left-to-right scan + (let lp ((i i) (j j)) + (if (> j k) + (done i j m) + (let ((x (vector-ref v j))) + (cond ((elt< x pivot) (lp i (+ j 1))) + + ((elt< pivot x) (rscan i j k m)) + + (else ; Equal + (if (< i j) + (begin (vector-set! v j (vector-ref v i)) + (vector-set! v i x))) + (lp (+ i 1) (+ j 1))))))))) + + ;; =======<<<<<<<<<>????????>>>>>>>======= + ;; l i j k m r + ;; [l,i) [i,j) j (j,k] (k,m] (m,r) + (rscan (lambda (i j k m) ; right-to-left scan + (let lp ((k k) (m m)) + (if (<= k j) + (done i j m) + (let* ((x (vector-ref v k))) + (cond ((elt< pivot x) (lp (- k 1) m)) + + ((elt< x pivot) ; Swap j & k & lscan. + (vector-set! v k (vector-ref v j)) + (vector-set! v j x) + (lscan i (+ j 1) (- k 1) m)) + + (else ; x=pivot + (if (< k m) + (begin (vector-set! v k (vector-ref v m)) + (vector-set! v m x))) + (lp (- k 1) (- m 1))))))))) + + + ;; =======<<<<<<<<<<<<<>>>>>>>>>>>======= + ;; l i j m r + ;; [l,i) [i,j) [j,m] (m,r) + (done (lambda (i j m) + (let ((num< (- j i)) + (num> (+ 1 (- m j))) + (num=l (- i l)) + (num=r (- (- r m) 1))) + (swap l j (min num< num=l)) ; Swap ='s into + (swap j r (min num> num=r)) ; the middle. + ;; Recur on the <'s and >'s. Recurring on the + ;; smaller range and iterating on the bigger + ;; range ensures O(lg n) stack frames, worst case. + (cond ((<= num< num>) + (recur l (+ l num<)) + (recur (- r num>) r)) + (else + (recur (- r num>) r) + (recur l (+ l num<)))))))) + + (let ((r-1 (- r 1))) + (lscan l l r-1 r-1)))) + + ;; Small segment => punt to insert sort. + ;; Use the dangerous subprimitive. + (%vector-insert-sort! elt< v l r)))) + + diff --git a/srfi/sorting/vqsort3.scm b/srfi/sorting/vqsort3.scm new file mode 100644 index 00000000..8c69c83a --- /dev/null +++ b/srfi/sorting/vqsort3.scm @@ -0,0 +1,261 @@ +;;; The SRFI-32 sort package -- three-way quick sort -*- Scheme -*- +;;; Copyright (c) 2002 by Olin Shivers. +;;; This code is open-source; see the end of the file for porting and +;;; more copyright information. +;;; Olin Shivers 2002/7. + +;;; (quick-sort3! c v [start end]) -> unspecific +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Sort vector V[start,end) using three-way comparison function C: +;;; (c x y) < 0 => x x=y +;;; (c x y) > 0 => x>y +;;; That is, C acts as a sort of "subtraction" procedure; using - for the +;;; comparison function will cause numbers to be sorted into increasing order. +;;; +;;; This algorithm is more efficient than standard, two-way quicksort if there +;;; are many duplicate items in the data set and the comparison function is +;;; relatively expensive (e.g., comparing large strings). It is due to Jon +;;; Bentley & Doug McIlroy; I learned it from Bentley. +;;; +;;; The algorithm is a standard quicksort, but the partition loop is fancier, +;;; arranging the vector into a left part that is <, a middle region that is +;;; =, and a right part that is > the pivot. Here's how it is done: +;;; The partition loop divides the range being partitioned into five +;;; subranges: +;;; =======<<<<<<<<>>>>>>======= +;;; where = marks a value that is equal the pivot, < marks a value that +;;; is less than the pivot, ? marks a value that hasn't been scanned, and +;;; > marks a value that is greater than the pivot. Let's consider the +;;; left-to-right scan. If it checks a ? value that is <, it keeps scanning. +;;; If the ? value is >, we stop the scan -- we are ready to start the +;;; right-to-left scan and then do a swap. But if the rightward scan checks +;;; a ? value that is =, we swap it *down* to the end of the initial chunk +;;; of ====='s -- we exchange it with the leftmost < value -- and then +;;; continue our rightward scan. The leftwards scan works in a similar +;;; fashion, scanning past > elements, stopping on a < element, and swapping +;;; up = elements. When we are done, we have a picture like this +;;; ========<<<<<<<<<<<<>>>>>>>>>>========= +;;; Then swap the = elements up into the middle of the vector to get +;;; this: +;;; <<<<<<<<<<<<=================>>>>>>>>>> +;;; Then recurse on the <'s and >'s. Work out all the tricky little +;;; boundary cases, and you're done. +;;; +;;; Other tricks that make this implementation industrial strength: +;;; - This quicksort makes some effort to pick the pivot well -- it uses the +;;; median of three elements as the partition pivot, so pathological n^2 +;;; run time is much rarer (but not eliminated completely). If you really +;;; wanted to get fancy, you could use a random number generator to choose +;;; pivots. The key to this trick is that you only need to pick one random +;;; number for each *level* of recursion -- i.e. you only need (lg n) random +;;; numbers. +;;; +;;; - After the partition, we *recurse* on the smaller of the two pending +;;; regions, then *tail-recurse* (iterate) on the larger one. This guarantees +;;; we use no more than lg(n) stack frames, worst case. +;;; +;;; - There are two ways to finish off the sort. +;;; A. Recurse down to regions of size 10, then sort each such region using +;;; insertion sort. +;;; B. Recurse down to regions of size 10, then sort *the entire vector* +;;; using insertion sort. +;;; We do A. Each choice has a cost. Choice A has more overhead to invoke +;;; all the separate insertion sorts -- choice B only calls insertion sort +;;; once. But choice B will call the comparison function *more times* -- +;;; it will unnecessarily compare elt 9 of one segment to elt 0 of the +;;; following segment. The overhead of choice A is linear in the length +;;; of the vector, but *otherwise independent of the algorithm's parameters*. +;;; I.e., it's a *fixed*, *small* constant factor. The cost of the extra +;;; comparisons made by choice B, however, is dependent on an externality: +;;; the comparison function passed in by the client. This can be made +;;; arbitrarily bad -- that is, the constant factor *isn't* fixed by the +;;; sort algorithm; instead, it's determined by the comparison function. +;;; If your comparison function is very, very slow, you want to eliminate +;;; every single one that you can. Choice A limits the potential badness, +;;; so that is what we do. + +(define (vector-quick-sort3! c v . maybe-start+end) + (call-with-values + (lambda () (vector-start+end v maybe-start+end)) + (lambda (start end) + (%quick-sort3! c v start end)))) + +(define (vector-quick-sort3 c v . maybe-start+end) + (call-with-values + (lambda () (vector-start+end v maybe-start+end)) + (lambda (start end) + (let ((ans (make-vector (- end start)))) + (vector-portion-copy! ans v start end) + (%quick-sort3! c ans 0 (- end start)) + ans)))) + +;;; %QUICK-SORT3! is not exported. +;;; Preconditions: +;;; V vector +;;; START END fixnums +;;; 0 <= START, END <= (vector-length V) +;;; If these preconditions are ensured by the cover functions, you +;;; can safely change this code to use unsafe fixnum arithmetic and vector +;;; indexing ops, for *huge* speedup. +;;; +;;; We bail out to insertion sort for small ranges; feel free to tune the +;;; crossover -- it's just a random guess. If you don't have the insertion +;;; sort routine, just kill that branch of the IF and change the recursion +;;; test to (< 1 (- r l)) -- the code is set up to work that way. + +(define (%quick-sort3! c v start end) + (define (swap l r n) ; Little utility -- swap the N + (if (> n 0) + (let ((x (vector-ref v l)) ; outer pairs of the range [l,r). + (r-1 (- r 1))) + (vector-set! v l (vector-ref v r-1)) + (vector-set! v r-1 x) + (swap (+ l 1) r-1 (- n 1))))) + + (define (sort3 v1 v2 v3) + (call-with-values + (lambda () (if (< (c v1 v2) 0) (values v1 v2) (values v2 v1))) + (lambda (little big) + (if (< (c big v3) 0) + (values little big v3) + (if (< (c little v3) 0) + (values little v3 big) + (values v3 little big)))))) + + (define (elt< v1 v2) + (negative? (c v1 v2))) + + (let recur ((l start) (r end)) ; Sort the range [l,r). + (if (< 10 (- r l)) ; 10: the gospel according to Sedgewick. + + ;; Choose the median of V[l], V[r-1], and V[middle] for the pivot. + ;; We do this by sorting these three elts; call the results LO, PIVOT + ;; & HI. Put LO, PIVOT & HI where they should go in the vector. We + ;; will kick off the partition step with one elt (PIVOT) in the left= + ;; range, one elt (LO) in the < range, one elt (HI) in in the > range + ;; & no elts in the right= range. + (let* ((r-1 (- r 1)) ; Three handy + (mid (quotient (+ l r) 2)) ; common + (l+1 (+ l 1)) ; subexpressions + (pivot (call-with-values + (lambda () + (sort3 (vector-ref v l) + (vector-ref v mid) + (vector-ref v r-1))) + (lambda (lo piv hi) + (let ((tmp (vector-ref v l+1))) ; Put LO, PIV & HI + (vector-set! v l piv) ; back into V + (vector-set! v r-1 hi) ; where they belong, + (vector-set! v l+1 lo) + (vector-set! v mid tmp) + piv))))) ; and return PIV as pivot. + + + ;; Everything in these loops is driven by the invariants expressed + ;; in the little pictures, the corresponding l,i,j,k,m,r indices, + ;; & the associated ranges. + + ;; =======<<<<<<<<>>>>>>======= (picture) + ;; l i j k m r (indices) + ;; [l,i) [i,j) [j,k] (k,m] (m,r) (ranges ) + (letrec ((lscan (lambda (i j k m) ; left-to-right scan + (let lp ((i i) (j j)) + (if (> j k) + (done i j m) + (let* ((x (vector-ref v j)) + (sign (c x pivot))) + (cond ((< sign 0) (lp i (+ j 1))) + + ((= sign 0) + (if (< i j) + (begin (vector-set! v j (vector-ref v i)) + (vector-set! v i x))) + (lp (+ i 1) (+ j 1))) + + ((> sign 0) (rscan i j k m)))))))) + + ;; =======<<<<<<<<<>????????>>>>>>>======= + ;; l i j k m r + ;; [l,i) [i,j) j (j,k] (k,m] (m,r) + (rscan (lambda (i j k m) ; right-to-left scan + (let lp ((k k) (m m)) + (if (<= k j) + (done i j m) + (let* ((x (vector-ref v k)) + (sign (c x pivot))) + (cond ((> sign 0) (lp (- k 1) m)) + + ((= sign 0) + (if (< k m) + (begin (vector-set! v k (vector-ref v m)) + (vector-set! v m x))) + (lp (- k 1) (- m 1))) + + ((< sign 0) ; Swap j & k & lscan. + (vector-set! v k (vector-ref v j)) + (vector-set! v j x) + (lscan i (+ j 1) (- k 1) m)))))))) + + ;; =======<<<<<<<<<<<<<>>>>>>>>>>>======= + ;; l i j m r + ;; [l,i) [i,j) [j,m] (m,r) + (done (lambda (i j m) + (let ((num< (- j i)) + (num> (+ 1 (- m j))) + (num=l (- i l)) + (num=r (- (- r m) 1))) + (swap l j (min num< num=l)) ; Swap ='s into + (swap j r (min num> num=r)) ; the middle. + ;; Recur on the <'s and >'s. Recurring on the + ;; smaller range and iterating on the bigger + ;; range ensures O(lg n) stack frames, worst case. + (cond ((<= num< num>) + (recur l (+ l num<)) + (recur (- r num>) r)) + (else + (recur (- r num>) r) + (recur l (+ l num<)))))))) + + ;; To repeat: We kick off the partition step with one elt (PIVOT) + ;; in the left= range, one elt (LO) in the < range, one elt (HI) + ;; in the > range & no elts in the right= range. + (lscan l+1 (+ l 2) (- r 2) r-1))) + + ;; Small segment => punt to insert sort. + ;; Use the dangerous subprimitive. + (%vector-insert-sort! elt< v l r)))) + +;;; Copyright +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; This code is +;;; Copyright (c) 1998 by Olin Shivers. +;;; The terms are: You may do as you please with this code, as long as +;;; you do not delete this notice or hold me responsible for any outcome +;;; related to its use. +;;; +;;; Blah blah blah. + + +;;; Code tuning & porting +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; - The quicksort recursion bottoms out in a call to an insertion sort +;;; routine, %INSERT-SORT!. But you could even punt this and go with pure +;;; recursion in a pinch. +;;; +;;; This code is *tightly* bummed as far as I can go in portable Scheme. +;;; +;;; The internal primitive %QUICK-SORT! that does the real work can be +;;; converted to use unsafe vector-indexing and fixnum-specific arithmetic ops +;;; *if* you alter the two small cover functions to enforce the invariants. +;;; This should provide *big* speedups. In fact, all the code bumming I've +;;; done pretty much disappears in the noise unless you have a good compiler +;;; and also can dump the vector-index checks and generic arithmetic -- so +;;; I've really just set things up for you to exploit. +;;; +;;; The optional-arg parsing, defaulting, and error checking is done with a +;;; portable R4RS macro. But if your Scheme has a faster mechanism (e.g., +;;; Chez), you should definitely port over to it. Note that argument defaulting +;;; and error-checking are interleaved -- you don't have to error-check +;;; defaulted START/END args to see if they are fixnums that are legal vector +;;; indices for the corresponding vector, etc.