mirror of
https://github.com/justinethier/cyclone.git
synced 2025-05-19 05:39:17 +02:00
Initial files
This commit is contained in:
parent
73219778a6
commit
64fef1271c
18 changed files with 3984 additions and 0 deletions
186
srfi/sorting/delndups.scm
Normal file
186
srfi/sorting/delndups.scm
Normal file
|
@ -0,0 +1,186 @@
|
||||||
|
;;; The sort package -- delete neighboring duplicate elts
|
||||||
|
;;; Copyright (c) 1998 by Olin Shivers.
|
||||||
|
;;; This code is open-source; see the end of the file for porting and
|
||||||
|
;;; more copyright information.
|
||||||
|
;;; Olin Shivers 11/98.
|
||||||
|
|
||||||
|
;;; Problem:
|
||||||
|
;;; vector-delete-neighbor-dups pushes N stack frames, where N is the number
|
||||||
|
;;; of elements in the answer vector. This is arguably a very efficient thing
|
||||||
|
;;; to do, but it might blow out on a system with a limited stack but a big
|
||||||
|
;;; heap. We could rewrite this to "chunk" up answers in temp vectors if we
|
||||||
|
;;; push more than a certain number of frames, then allocate a final answer,
|
||||||
|
;;; copying all the chunks into the answer. But it's much more complex code.
|
||||||
|
|
||||||
|
;;; Exports:
|
||||||
|
;;; (list-delete-neighbor-dups = lis) -> list
|
||||||
|
;;; (list-delete-neighbor-dups! = lis) -> list
|
||||||
|
;;; (vector-delete-neighbor-dups = v [start end]) -> vector
|
||||||
|
;;; (vector-delete-neighbor-dups! = v [start end]) -> end'
|
||||||
|
|
||||||
|
;;; These procedures delete adjacent duplicate elements from a list or
|
||||||
|
;;; a vector, using a given element equality procedure. The first or leftmost
|
||||||
|
;;; element of a run of equal elements is the one that survives. The list
|
||||||
|
;;; or vector is not otherwise disordered.
|
||||||
|
;;;
|
||||||
|
;;; These procedures are linear time -- much faster than the O(n^2) general
|
||||||
|
;;; duplicate-elt deletors that do not assume any "bunching" of elements.
|
||||||
|
;;; If you want to delete duplicate elements from a large list or vector,
|
||||||
|
;;; sort the elements to bring equal items together, then use one of these
|
||||||
|
;;; procedures -- for a total time of O(n lg n).
|
||||||
|
|
||||||
|
;;; LIST-DELETE-NEIGHBOR-DUPS
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;;; Below are multiple versions of the LIST-DELETE-NEIGHBOR-DUPS procedure,
|
||||||
|
;;; from simple to complex. RECUR's contract: Strip off any leading X's from
|
||||||
|
;;; LIS, and return that list neighbor-dup-deleted.
|
||||||
|
;;;
|
||||||
|
;;; The final version
|
||||||
|
;;; - shares a common subtail between the input & output list, up to 1024
|
||||||
|
;;; elements;
|
||||||
|
;;; - Needs no more than 1024 stack frames.
|
||||||
|
|
||||||
|
#;
|
||||||
|
;;; Simplest version.
|
||||||
|
;;; - Always allocates a fresh list / never shares storage.
|
||||||
|
;;; - Needs N stack frames, if answer is length N.
|
||||||
|
(define (list-delete-neighbor-dups = lis)
|
||||||
|
(if (pair? lis)
|
||||||
|
(let ((x0 (car lis)))
|
||||||
|
(cons x0 (let recur ((x0 x0) (xs (cdr lis)))
|
||||||
|
(if (pair? xs)
|
||||||
|
(let ((x1 (car xs))
|
||||||
|
(x2+ (cdr xs)))
|
||||||
|
(if (= x0 x1)
|
||||||
|
(recur x0 x2+) ; Loop, actually.
|
||||||
|
(cons x1 (recur x1 x2+))))
|
||||||
|
xs))))
|
||||||
|
lis))
|
||||||
|
|
||||||
|
;;; This version tries to use cons cells from input by sharing longest
|
||||||
|
;;; common tail between input & output. Still needs N stack frames, for ans
|
||||||
|
;;; of length N.
|
||||||
|
(define (list-delete-neighbor-dups = lis)
|
||||||
|
(if (pair? lis)
|
||||||
|
(let* ((x0 (car lis))
|
||||||
|
(xs (cdr lis))
|
||||||
|
(ans (let recur ((x0 x0) (xs xs))
|
||||||
|
(if (pair? xs)
|
||||||
|
(let ((x1 (car xs))
|
||||||
|
(x2+ (cdr xs)))
|
||||||
|
(if (= x0 x1)
|
||||||
|
(recur x0 x2+)
|
||||||
|
(let ((ans-tail (recur x1 x2+)))
|
||||||
|
(if (eq? ans-tail x2+) xs
|
||||||
|
(cons x1 ans-tail)))))
|
||||||
|
xs))))
|
||||||
|
(if (eq? ans xs) lis (cons x0 ans)))
|
||||||
|
|
||||||
|
lis))
|
||||||
|
|
||||||
|
;;; LIST-DELETE-NEIGHBOR-DUPS!
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;;; This code runs in constant list space, constant stack, and also
|
||||||
|
;;; does only the minimum SET-CDR!'s necessary.
|
||||||
|
|
||||||
|
(define (list-delete-neighbor-dups! = lis)
|
||||||
|
(if (pair? lis)
|
||||||
|
(let lp1 ((prev lis) (prev-elt (car lis)) (lis (cdr lis)))
|
||||||
|
(if (pair? lis)
|
||||||
|
(let ((lis-elt (car lis))
|
||||||
|
(next (cdr lis)))
|
||||||
|
(if (= prev-elt lis-elt)
|
||||||
|
|
||||||
|
;; We found the first elts of a run of dups, so we know
|
||||||
|
;; we're going to have to do a SET-CDR!. Scan to the end of
|
||||||
|
;; the run, do the SET-CDR!, and loop on LP1.
|
||||||
|
(let lp2 ((lis next))
|
||||||
|
(if (pair? lis)
|
||||||
|
(let ((lis-elt (car lis))
|
||||||
|
(next (cdr lis)))
|
||||||
|
(if (= prev-elt lis-elt)
|
||||||
|
(lp2 next)
|
||||||
|
(begin (set-cdr! prev lis)
|
||||||
|
(lp1 lis lis-elt next))))
|
||||||
|
(set-cdr! prev lis))) ; Ran off end => quit.
|
||||||
|
|
||||||
|
(lp1 lis lis-elt next))))))
|
||||||
|
lis)
|
||||||
|
|
||||||
|
|
||||||
|
(define (vector-delete-neighbor-dups elt= v . maybe-start+end)
|
||||||
|
(call-with-values
|
||||||
|
(lambda () (vector-start+end v maybe-start+end))
|
||||||
|
(lambda (start end)
|
||||||
|
(if (< start end)
|
||||||
|
(let* ((x (vector-ref v start))
|
||||||
|
(ans (let recur ((x x) (i start) (j 1))
|
||||||
|
(if (< i end)
|
||||||
|
(let ((y (vector-ref v i))
|
||||||
|
(nexti (+ i 1)))
|
||||||
|
(if (elt= x y)
|
||||||
|
(recur x nexti j)
|
||||||
|
(let ((ansvec (recur y nexti (+ j 1))))
|
||||||
|
(vector-set! ansvec j y)
|
||||||
|
ansvec)))
|
||||||
|
(make-vector j)))))
|
||||||
|
(vector-set! ans 0 x)
|
||||||
|
ans)
|
||||||
|
'#()))))
|
||||||
|
|
||||||
|
|
||||||
|
;;; Packs the surviving elements to the left, in range [start,end'),
|
||||||
|
;;; and returns END'.
|
||||||
|
(define (vector-delete-neighbor-dups! elt= v . maybe-start+end)
|
||||||
|
(call-with-values
|
||||||
|
(lambda () (vector-start+end v maybe-start+end))
|
||||||
|
(lambda (start end)
|
||||||
|
|
||||||
|
(if (>= start end)
|
||||||
|
end
|
||||||
|
;; To eliminate unnecessary copying (read elt i then write the value
|
||||||
|
;; back at index i), we scan until we find the first dup.
|
||||||
|
(let skip ((j start) (vj (vector-ref v start)))
|
||||||
|
(let ((j+1 (+ j 1)))
|
||||||
|
(if (>= j+1 end)
|
||||||
|
end
|
||||||
|
(let ((vj+1 (vector-ref v j+1)))
|
||||||
|
(if (not (elt= vj vj+1))
|
||||||
|
(skip j+1 vj+1)
|
||||||
|
|
||||||
|
;; OK -- j & j+1 are dups, so we're committed to moving
|
||||||
|
;; data around. In lp2, v[start,j] is what we've done;
|
||||||
|
;; v[k,end) is what we have yet to handle.
|
||||||
|
(let lp2 ((j j) (vj vj) (k (+ j 2)))
|
||||||
|
(let lp3 ((k k))
|
||||||
|
(if (>= k end)
|
||||||
|
(+ j 1) ; Done.
|
||||||
|
(let ((vk (vector-ref v k))
|
||||||
|
(k+1 (+ k 1)))
|
||||||
|
(if (elt= vj vk)
|
||||||
|
(lp3 k+1)
|
||||||
|
(let ((j+1 (+ j 1)))
|
||||||
|
(vector-set! v j+1 vk)
|
||||||
|
(lp2 j+1 vk k+1))))))))))))))))
|
||||||
|
|
||||||
|
;;; Copyright
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;;; This code is
|
||||||
|
;;; Copyright (c) 1998 by Olin Shivers.
|
||||||
|
;;; The terms are: You may do as you please with this code, as long as
|
||||||
|
;;; you do not delete this notice or hold me responsible for any outcome
|
||||||
|
;;; related to its use.
|
||||||
|
;;;
|
||||||
|
;;; Blah blah blah. Don't you think source files should contain more lines
|
||||||
|
;;; of code than copyright notice?
|
||||||
|
;;;
|
||||||
|
;;; Code porting
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;;;
|
||||||
|
;;; If your Scheme has a faster mechanism for handling optional arguments
|
||||||
|
;;; (e.g., Chez), you should definitely port over to it. Note that argument
|
||||||
|
;;; defaulting and error-checking are interleaved -- you don't have to
|
||||||
|
;;; error-check defaulted START/END args to see if they are fixnums that are
|
||||||
|
;;; legal vector indices for the corresponding vector, etc.
|
||||||
|
|
||||||
|
|
386
srfi/sorting/lmsort.scm
Normal file
386
srfi/sorting/lmsort.scm
Normal file
|
@ -0,0 +1,386 @@
|
||||||
|
;;; list merge & list merge-sort -*- Scheme -*-
|
||||||
|
;;; Copyright (c) 1998 by Olin Shivers.
|
||||||
|
;;; This code is open-source; see the end of the file for porting and
|
||||||
|
;;; more copyright information.
|
||||||
|
;;; Olin Shivers
|
||||||
|
|
||||||
|
;;; Exports:
|
||||||
|
;;; (list-merge < lis lis) -> list
|
||||||
|
;;; (list-merge! < lis lis) -> list
|
||||||
|
;;; (list-merge-sort < lis) -> list
|
||||||
|
;;; (list-merge-sort! < lis) -> list
|
||||||
|
|
||||||
|
;;; A stable list merge sort of my own device
|
||||||
|
;;; Two variants: pure & destructive
|
||||||
|
;;;
|
||||||
|
;;; This list merge sort is opportunistic (a "natural" sort) -- it exploits
|
||||||
|
;;; existing order in the input set. Instead of recursing all the way down to
|
||||||
|
;;; individual elements, the leaves of the merge tree are maximal contiguous
|
||||||
|
;;; runs of elements from the input list. So the algorithm does very well on
|
||||||
|
;;; data that is mostly ordered, with a best-case time of O(n) when the input
|
||||||
|
;;; list is already completely sorted. In any event, worst-case time is
|
||||||
|
;;; O(n lg n).
|
||||||
|
;;;
|
||||||
|
;;; The destructive variant is "in place," meaning that it allocates no new
|
||||||
|
;;; cons cells at all; it just rearranges the pairs of the input list with
|
||||||
|
;;; SET-CDR! to order it.
|
||||||
|
;;;
|
||||||
|
;;; The interesting control structure is the combination recursion/iteration
|
||||||
|
;;; of the core GROW function that does an "opportunistic" DFS walk of the
|
||||||
|
;;; merge tree, adaptively subdividing in response to the length of the
|
||||||
|
;;; merges, without requiring any auxiliary data structures beyond the
|
||||||
|
;;; recursion stack. It's actually quite simple -- ten lines of code.
|
||||||
|
;;; -Olin Shivers 10/20/98
|
||||||
|
|
||||||
|
;;; (mlet ((var-list mv-exp) ...) body ...)
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;;; A LET* form that handles multiple values. Move this into the two clients
|
||||||
|
;;; if you don't have a module system handy to restrict its visibility...
|
||||||
|
(define-syntax mlet ; Multiple-value LET*
|
||||||
|
(syntax-rules ()
|
||||||
|
((mlet ((() exp) rest ...) body ...)
|
||||||
|
(begin exp (mlet (rest ...) body ...)))
|
||||||
|
|
||||||
|
((mlet (((var) exp) rest ...) body ...)
|
||||||
|
(let ((var exp)) (mlet (rest ...) body ...)))
|
||||||
|
|
||||||
|
((mlet ((vars exp) rest ...) body ...)
|
||||||
|
(call-with-values (lambda () exp)
|
||||||
|
(lambda vars (mlet (rest ...) body ...))))
|
||||||
|
|
||||||
|
((mlet () body ...) (begin body ...))))
|
||||||
|
|
||||||
|
|
||||||
|
;;; (list-merge-sort < lis)
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;;; A natural, stable list merge sort.
|
||||||
|
;;; - natural: picks off maximal contiguous runs of pre-ordered data.
|
||||||
|
;;; - stable: won't invert the order of equal elements in the input list.
|
||||||
|
|
||||||
|
(define (list-merge-sort elt< lis)
|
||||||
|
|
||||||
|
;; (getrun lis) -> run runlen rest
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;; Pick a run of non-decreasing data off of non-empty list LIS.
|
||||||
|
;; Return the length of this run, and the following list.
|
||||||
|
(define (getrun lis)
|
||||||
|
(let lp ((ans '()) (i 1) (prev (car lis)) (xs (cdr lis)))
|
||||||
|
(if (pair? xs)
|
||||||
|
(let ((x (car xs)))
|
||||||
|
(if (elt< x prev)
|
||||||
|
(values (append-reverse ans (cons prev '())) i xs)
|
||||||
|
(lp (cons prev ans) (+ i 1) x (cdr xs))))
|
||||||
|
(values (append-reverse ans (cons prev '())) i xs))))
|
||||||
|
|
||||||
|
(define (append-reverse rev-head tail)
|
||||||
|
(let lp ((rev-head rev-head) (tail tail))
|
||||||
|
(if (null-list? rev-head) tail
|
||||||
|
(lp (cdr rev-head) (cons (car rev-head) tail)))))
|
||||||
|
|
||||||
|
(define (null-list? l)
|
||||||
|
(cond ((pair? l) #f)
|
||||||
|
((null? l) #t)
|
||||||
|
(else (error "argument out of domain" l))))
|
||||||
|
|
||||||
|
;; (merge a b) -> list
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;; List merge -- stably merge lists A (length > 0) & B (length > 0).
|
||||||
|
;; This version requires up to |a|+|b| stack frames.
|
||||||
|
(define (merge a b)
|
||||||
|
(let recur ((x (car a)) (a a)
|
||||||
|
(y (car b)) (b b))
|
||||||
|
(if (elt< y x)
|
||||||
|
(cons y (let ((b (cdr b)))
|
||||||
|
(if (pair? b)
|
||||||
|
(recur x a (car b) b)
|
||||||
|
a)))
|
||||||
|
(cons x (let ((a (cdr a)))
|
||||||
|
(if (pair? a)
|
||||||
|
(recur (car a) a y b)
|
||||||
|
b))))))
|
||||||
|
|
||||||
|
;; (grow s ls ls2 u lw) -> [a la unused]
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;; The core routine. Read the next 20 lines of comments & all is obvious.
|
||||||
|
;; - S is a sorted list of length LS > 1.
|
||||||
|
;; - LS2 is some power of two <= LS.
|
||||||
|
;; - U is an unsorted list.
|
||||||
|
;; - LW is a positive integer.
|
||||||
|
;; Starting with S, and taking data from U as needed, produce
|
||||||
|
;; a sorted list of *at least* length LW, if there's enough data
|
||||||
|
;; (LW <= LS + length(U)), or use all of U if not.
|
||||||
|
;;
|
||||||
|
;; GROW takes maximal contiguous runs of data from U at a time;
|
||||||
|
;; it is allowed to return a list *longer* than LW if it gets lucky
|
||||||
|
;; with a long run.
|
||||||
|
;;
|
||||||
|
;; The key idea: If you want a merge operation to "pay for itself," the two
|
||||||
|
;; lists being merged should be about the same length. Remember that.
|
||||||
|
;;
|
||||||
|
;; Returns:
|
||||||
|
;; - A: The result list
|
||||||
|
;; - LA: The length of the result list
|
||||||
|
;; - UNUSED: The unused tail of U.
|
||||||
|
|
||||||
|
(define (grow s ls ls2 u lw) ; The core of the sort algorithm.
|
||||||
|
(if (or (<= lw ls) (not (pair? u))) ; Met quota or out of data?
|
||||||
|
(values s ls u) ; If so, we're done.
|
||||||
|
(mlet (((ls2) (let lp ((ls2 ls2))
|
||||||
|
(let ((ls2*2 (+ ls2 ls2)))
|
||||||
|
(if (<= ls2*2 ls) (lp ls2*2) ls2))))
|
||||||
|
;; LS2 is now the largest power of two <= LS.
|
||||||
|
;; (Just think of it as being roughly LS.)
|
||||||
|
((r lr u2) (getrun u)) ; Get a run, then
|
||||||
|
((t lt u3) (grow r lr 1 u2 ls2))) ; grow it up to be T.
|
||||||
|
(grow (merge s t) (+ ls lt) ; Merge S & T,
|
||||||
|
(+ ls2 ls2) u3 lw)))) ; and loop.
|
||||||
|
|
||||||
|
;; Note: (LENGTH LIS) or any constant guaranteed
|
||||||
|
;; to be greater can be used in place of INFINITY.
|
||||||
|
(if (pair? lis) ; Don't sort an empty list.
|
||||||
|
(mlet (((r lr tail) (getrun lis)) ; Pick off an initial run,
|
||||||
|
((infinity) #o100000000) ; then grow it up maximally.
|
||||||
|
((a la v) (grow r lr 1 tail infinity)))
|
||||||
|
a)
|
||||||
|
'()))
|
||||||
|
|
||||||
|
|
||||||
|
;;; (list-merge-sort! < lis)
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;;; A natural, stable, destructive, in-place list merge sort.
|
||||||
|
;;; - natural: picks off maximal contiguous runs of pre-ordered data.
|
||||||
|
;;; - stable: won't invert the order of equal elements in the input list.
|
||||||
|
;;; - destructive, in-place: this routine allocates no extra working memory;
|
||||||
|
;;; it simply rearranges the list with SET-CDR! operations.
|
||||||
|
|
||||||
|
(define (list-merge-sort! elt< lis)
|
||||||
|
;; (getrun lis) -> runlen last rest
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;; Pick a run of non-decreasing data off of non-empty list LIS.
|
||||||
|
;; Return the length of this run, the last cons cell of the run,
|
||||||
|
;; and the following list.
|
||||||
|
(define (getrun lis)
|
||||||
|
(let lp ((lis lis) (x (car lis)) (i 1) (next (cdr lis)))
|
||||||
|
(if (pair? next)
|
||||||
|
(let ((y (car next)))
|
||||||
|
(if (elt< y x)
|
||||||
|
(values i lis next)
|
||||||
|
(lp next y (+ i 1) (cdr next))))
|
||||||
|
(values i lis next))))
|
||||||
|
|
||||||
|
;; (merge! a enda b endb) -> [m endm]
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;; Destructively and stably merge non-empty lists A & B.
|
||||||
|
;; The last cons of A is ENDA. (The cdr of ENDA can be non-nil.)
|
||||||
|
;; the last cons of B is ENDB. (The cdr of ENDB can be non-nil.)
|
||||||
|
;;
|
||||||
|
;; Return the first and last cons cells of the merged list.
|
||||||
|
;; This routine is iterative & in-place: it runs in constant stack and
|
||||||
|
;; doesn't allocate any cons cells. It is also tedious but simple; don't
|
||||||
|
;; bother reading it unless necessary.
|
||||||
|
(define (merge! a enda b endb)
|
||||||
|
;; The logic of these two loops is completely driven by these invariants:
|
||||||
|
;; SCAN-A: (CDR PREV) = A. X = (CAR A). Y = (CAR B).
|
||||||
|
;; SCAN-B: (CDR PREV) = B. X = (CAR A). Y = (CAR B).
|
||||||
|
(letrec ((scan-a (lambda (prev x a y b) ; Zip down A until we
|
||||||
|
(cond ((elt< y x) ; find an elt > (CAR B).
|
||||||
|
(set-cdr! prev b)
|
||||||
|
(let ((next-b (cdr b)))
|
||||||
|
(if (eq? b endb)
|
||||||
|
(begin (set-cdr! b a) enda) ; Done.
|
||||||
|
(scan-b b x a (car next-b) next-b))))
|
||||||
|
|
||||||
|
((eq? a enda) (maybe-set-cdr! a b) endb) ; Done.
|
||||||
|
|
||||||
|
(else (let ((next-a (cdr a))) ; Continue scan.
|
||||||
|
(scan-a a (car next-a) next-a y b))))))
|
||||||
|
|
||||||
|
(scan-b (lambda (prev x a y b) ; Zip down B while its
|
||||||
|
(cond ((elt< y x) ; elts are < (CAR A).
|
||||||
|
(if (eq? b endb)
|
||||||
|
(begin (set-cdr! b a) enda) ; Done.
|
||||||
|
(let ((next-b (cdr b))) ; Continue scan.
|
||||||
|
(scan-b b x a (car next-b) next-b))))
|
||||||
|
|
||||||
|
(else (set-cdr! prev a)
|
||||||
|
(if (eq? a enda)
|
||||||
|
(begin (maybe-set-cdr! a b) endb) ; Done.
|
||||||
|
(let ((next-a (cdr a)))
|
||||||
|
(scan-a a (car next-a) next-a y b)))))))
|
||||||
|
|
||||||
|
;; This guy only writes if he has to. Called at most once.
|
||||||
|
;; Pointer equality rules; pure languages are for momma's boys.
|
||||||
|
(maybe-set-cdr! (lambda (pair val) (if (not (eq? (cdr pair) val))
|
||||||
|
(set-cdr! pair val)))))
|
||||||
|
|
||||||
|
(let ((x (car a)) (y (car b)))
|
||||||
|
(if (elt< y x)
|
||||||
|
|
||||||
|
;; B starts the answer list.
|
||||||
|
(values b (if (eq? b endb)
|
||||||
|
(begin (set-cdr! b a) enda)
|
||||||
|
(let ((next-b (cdr b)))
|
||||||
|
(scan-b b x a (car next-b) next-b))))
|
||||||
|
|
||||||
|
;; A starts the answer list.
|
||||||
|
(values a (if (eq? a enda)
|
||||||
|
(begin (maybe-set-cdr! a b) endb)
|
||||||
|
(let ((next-a (cdr a)))
|
||||||
|
(scan-a a (car next-a) next-a y b))))))))
|
||||||
|
|
||||||
|
;; (grow s ends ls ls2 u lw) -> [a enda la unused]
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;; The core routine.
|
||||||
|
;; - S is a sorted list of length LS > 1, with final cons cell ENDS.
|
||||||
|
;; (CDR ENDS) doesn't have to be nil.
|
||||||
|
;; - LS2 is some power of two <= LS.
|
||||||
|
;; - U is an unsorted list.
|
||||||
|
;; - LW is a positive integer.
|
||||||
|
;; Starting with S, and taking data from U as needed, produce
|
||||||
|
;; a sorted list of *at least* length LW, if there's enough data
|
||||||
|
;; (LW <= LS + length(U)), or use all of U if not.
|
||||||
|
;;
|
||||||
|
;; GROW takes maximal contiguous runs of data from U at a time;
|
||||||
|
;; it is allowed to return a list *longer* than LW if it gets lucky
|
||||||
|
;; with a long run.
|
||||||
|
;;
|
||||||
|
;; The key idea: If you want a merge operation to "pay for itself," the two
|
||||||
|
;; lists being merged should be about the same length. Remember that.
|
||||||
|
;;
|
||||||
|
;; Returns:
|
||||||
|
;; - A: The result list (not properly terminated)
|
||||||
|
;; - ENDA: The last cons cell of the result list.
|
||||||
|
;; - LA: The length of the result list
|
||||||
|
;; - UNUSED: The unused tail of U.
|
||||||
|
(define (grow s ends ls ls2 u lw)
|
||||||
|
(if (and (pair? u) (< ls lw))
|
||||||
|
|
||||||
|
;; We haven't met the LW quota but there's still some U data to use.
|
||||||
|
(mlet (((ls2) (let lp ((ls2 ls2))
|
||||||
|
(let ((ls2*2 (+ ls2 ls2)))
|
||||||
|
(if (<= ls2*2 ls) (lp ls2*2) ls2))))
|
||||||
|
;; LS2 is now the largest power of two <= LS.
|
||||||
|
;; (Just think of it as being roughly LS.)
|
||||||
|
((lr endr u2) (getrun u)) ; Get a run from U;
|
||||||
|
((t endt lt u3) (grow u endr lr 1 u2 ls2)) ; grow it up to be T.
|
||||||
|
((st end-st) (merge! s ends t endt))) ; Merge S & T,
|
||||||
|
(grow st end-st (+ ls lt) (+ ls2 ls2) u3 lw)) ; then loop.
|
||||||
|
|
||||||
|
(values s ends ls u))) ; Done -- met LW quota or ran out of data.
|
||||||
|
|
||||||
|
;; Note: (LENGTH LIS) or any constant guaranteed
|
||||||
|
;; to be greater can be used in place of INFINITY.
|
||||||
|
(if (pair? lis)
|
||||||
|
(mlet (((lr endr rest) (getrun lis)) ; Pick off an initial run.
|
||||||
|
((infinity) #o100000000) ; Then grow it up maximally.
|
||||||
|
((a enda la v) (grow lis endr lr 1 rest infinity)))
|
||||||
|
(set-cdr! enda '()) ; Nil-terminate answer.
|
||||||
|
a) ; We're done.
|
||||||
|
|
||||||
|
'())) ; Don't sort an empty list.
|
||||||
|
|
||||||
|
|
||||||
|
;;; Merge
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;;; These two merge procedures are stable -- ties favor list A.
|
||||||
|
|
||||||
|
(define (list-merge < a b)
|
||||||
|
(cond ((not (pair? a)) b)
|
||||||
|
((not (pair? b)) a)
|
||||||
|
(else (let recur ((x (car a)) (a a) ; A is a pair; X = (CAR A).
|
||||||
|
(y (car b)) (b b)) ; B is a pair; Y = (CAR B).
|
||||||
|
(if (< y x)
|
||||||
|
|
||||||
|
(let ((b (cdr b)))
|
||||||
|
(if (pair? b)
|
||||||
|
(cons y (recur x a (car b) b))
|
||||||
|
(cons y a)))
|
||||||
|
|
||||||
|
(let ((a (cdr a)))
|
||||||
|
(if (pair? a)
|
||||||
|
(cons x (recur (car a) a y b))
|
||||||
|
(cons x b))))))))
|
||||||
|
|
||||||
|
|
||||||
|
;;; This destructive merge does as few SET-CDR!s as it can -- for example, if
|
||||||
|
;;; the list is already sorted, it does no SET-CDR!s at all. It is also
|
||||||
|
;;; iterative, running in constant stack.
|
||||||
|
|
||||||
|
(define (list-merge! < a b)
|
||||||
|
;; The logic of these two loops is completely driven by these invariants:
|
||||||
|
;; SCAN-A: (CDR PREV) = A. X = (CAR A). Y = (CAR B).
|
||||||
|
;; SCAN-B: (CDR PREV) = B. X = (CAR A). Y = (CAR B).
|
||||||
|
(letrec ((scan-a (lambda (prev a x b y) ; Zip down A doing
|
||||||
|
(if (< y x) ; no SET-CDR!s until
|
||||||
|
(let ((next-b (cdr b))) ; we hit a B elt that
|
||||||
|
(set-cdr! prev b) ; has to be inserted.
|
||||||
|
(if (pair? next-b)
|
||||||
|
(scan-b b a x next-b (car next-b))
|
||||||
|
(set-cdr! b a)))
|
||||||
|
|
||||||
|
(let ((next-a (cdr a)))
|
||||||
|
(if (pair? next-a)
|
||||||
|
(scan-a a next-a (car next-a) b y)
|
||||||
|
(set-cdr! a b))))))
|
||||||
|
|
||||||
|
(scan-b (lambda (prev a x b y) ; Zip down B doing
|
||||||
|
(if (< y x) ; no SET-CDR!s until
|
||||||
|
(let ((next-b (cdr b))) ; we hit an A elt that
|
||||||
|
(if (pair? next-b) ; has to be
|
||||||
|
(scan-b b a x next-b (car next-b)) ; inserted.
|
||||||
|
(set-cdr! b a)))
|
||||||
|
|
||||||
|
(let ((next-a (cdr a)))
|
||||||
|
(set-cdr! prev a)
|
||||||
|
(if (pair? next-a)
|
||||||
|
(scan-a a next-a (car next-a) b y)
|
||||||
|
(set-cdr! a b)))))))
|
||||||
|
|
||||||
|
(cond ((not (pair? a)) b)
|
||||||
|
((not (pair? b)) a)
|
||||||
|
|
||||||
|
;; B starts the answer list.
|
||||||
|
((< (car b) (car a))
|
||||||
|
(let ((next-b (cdr b)))
|
||||||
|
(if (null? next-b)
|
||||||
|
(set-cdr! b a)
|
||||||
|
(scan-b b a (car a) next-b (car next-b))))
|
||||||
|
b)
|
||||||
|
|
||||||
|
;; A starts the answer list.
|
||||||
|
(else (let ((next-a (cdr a)))
|
||||||
|
(if (null? next-a)
|
||||||
|
(set-cdr! a b)
|
||||||
|
(scan-a a next-a (car next-a) b (car b))))
|
||||||
|
a))))
|
||||||
|
|
||||||
|
|
||||||
|
;;; Copyright
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;;; This code is
|
||||||
|
;;; Copyright (c) 1998 by Olin Shivers.
|
||||||
|
;;; The terms are: You may do as you please with this code, as long as
|
||||||
|
;;; you do not delete this notice or hold me responsible for any outcome
|
||||||
|
;;; related to its use.
|
||||||
|
;;;
|
||||||
|
;;; Blah blah blah.
|
||||||
|
|
||||||
|
|
||||||
|
;;; Code tuning & porting
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;;; This is very portable code. It's R4RS with the following exceptions:
|
||||||
|
;;; - The R5RS multiple-value VALUES & CALL-WITH-VALUES procedures for
|
||||||
|
;;; handling multiple-value return.
|
||||||
|
;;;
|
||||||
|
;;; This code is *tightly* bummed as far as I can go in portable Scheme.
|
||||||
|
;;;
|
||||||
|
;;; - The fixnum arithmetic in LIST-MERGE-SORT! and COUNTED-LIST-MERGE!
|
||||||
|
;;; that could be safely switched over to unsafe, fixnum-specific ops,
|
||||||
|
;;; if you're sure that 2*maxlen is a fixnum, where maxlen is the length
|
||||||
|
;;; of the longest list you could ever have.
|
||||||
|
;;;
|
||||||
|
;;; - I typically write my code in a style such that every CAR and CDR
|
||||||
|
;;; application is protected by an upstream PAIR?. This is the case in this
|
||||||
|
;;; code, so all the CAR's and CDR's could safely switched over to unsafe
|
||||||
|
;;; versions. But check over the code before you do it, in case the source
|
||||||
|
;;; has been altered since I wrote this.
|
30
srfi/sorting/median.scm
Normal file
30
srfi/sorting/median.scm
Normal file
|
@ -0,0 +1,30 @@
|
||||||
|
;;;; Finding the median of a vector
|
||||||
|
;; This involves sorting the vector, which is why it's part
|
||||||
|
;; of this package.
|
||||||
|
|
||||||
|
(define (vector-find-median < v knil . maybe-mean)
|
||||||
|
(define mean (if (null? maybe-mean)
|
||||||
|
(lambda (a b) (/ (+ a b) 2))
|
||||||
|
(car maybe-mean)))
|
||||||
|
(define len (vector-length v))
|
||||||
|
(define newv (vector-sort < v))
|
||||||
|
(cond
|
||||||
|
((= len 0) knil)
|
||||||
|
((odd? len) (vector-ref newv (/ (- len 1) 2)))
|
||||||
|
(else (mean
|
||||||
|
(vector-ref newv (- (/ len 2) 1))
|
||||||
|
(vector-ref newv (/ len 2))))))
|
||||||
|
|
||||||
|
(define (vector-find-median! < v knil . maybe-mean)
|
||||||
|
(define mean (if (null? maybe-mean)
|
||||||
|
(lambda (a b) (/ (+ a b) 2))
|
||||||
|
(car maybe-mean)))
|
||||||
|
(define len (vector-length v))
|
||||||
|
(define newv (vector-sort! < v))
|
||||||
|
(cond
|
||||||
|
((= len 0) knil)
|
||||||
|
((odd? len) (vector-ref newv (/ (- len 1) 2)))
|
||||||
|
(else (mean
|
||||||
|
(vector-ref newv (- (/ len 2) 1))
|
||||||
|
(vector-ref newv (/ len 2))))))
|
||||||
|
|
226
srfi/sorting/merge.scm
Normal file
226
srfi/sorting/merge.scm
Normal file
|
@ -0,0 +1,226 @@
|
||||||
|
;;; This file extracts four merge procedures from lmsort.scm and vmsort.scm
|
||||||
|
;;; files written by Olin Shivers.
|
||||||
|
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;;;
|
||||||
|
;;; Start of code extracted from Olin's lmsort.scm file.
|
||||||
|
;;;
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
;;; list merge & list merge-sort -*- Scheme -*-
|
||||||
|
;;; Copyright (c) 1998 by Olin Shivers.
|
||||||
|
;;; This code is open-source; see the end of the file for porting and
|
||||||
|
;;; more copyright information.
|
||||||
|
;;; Olin Shivers
|
||||||
|
|
||||||
|
;;; Exports:
|
||||||
|
;;; (list-merge < lis lis) -> list
|
||||||
|
;;; (list-merge! < lis lis) -> list
|
||||||
|
|
||||||
|
;;; Merge
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;;; These two merge procedures are stable -- ties favor list A.
|
||||||
|
|
||||||
|
(define (list-merge < a b)
|
||||||
|
(cond ((not (pair? a)) b)
|
||||||
|
((not (pair? b)) a)
|
||||||
|
(else (let recur ((x (car a)) (a a) ; A is a pair; X = (CAR A).
|
||||||
|
(y (car b)) (b b)) ; B is a pair; Y = (CAR B).
|
||||||
|
(if (< y x)
|
||||||
|
|
||||||
|
(let ((b (cdr b)))
|
||||||
|
(if (pair? b)
|
||||||
|
(cons y (recur x a (car b) b))
|
||||||
|
(cons y a)))
|
||||||
|
|
||||||
|
(let ((a (cdr a)))
|
||||||
|
(if (pair? a)
|
||||||
|
(cons x (recur (car a) a y b))
|
||||||
|
(cons x b))))))))
|
||||||
|
|
||||||
|
|
||||||
|
;;; This destructive merge does as few SET-CDR!s as it can -- for example, if
|
||||||
|
;;; the list is already sorted, it does no SET-CDR!s at all. It is also
|
||||||
|
;;; iterative, running in constant stack.
|
||||||
|
|
||||||
|
(define (list-merge! < a b)
|
||||||
|
;; The logic of these two loops is completely driven by these invariants:
|
||||||
|
;; SCAN-A: (CDR PREV) = A. X = (CAR A). Y = (CAR B).
|
||||||
|
;; SCAN-B: (CDR PREV) = B. X = (CAR A). Y = (CAR B).
|
||||||
|
(letrec ((scan-a (lambda (prev a x b y) ; Zip down A doing
|
||||||
|
(if (< y x) ; no SET-CDR!s until
|
||||||
|
(let ((next-b (cdr b))) ; we hit a B elt that
|
||||||
|
(set-cdr! prev b) ; has to be inserted.
|
||||||
|
(if (pair? next-b)
|
||||||
|
(scan-b b a x next-b (car next-b))
|
||||||
|
(set-cdr! b a)))
|
||||||
|
|
||||||
|
(let ((next-a (cdr a)))
|
||||||
|
(if (pair? next-a)
|
||||||
|
(scan-a a next-a (car next-a) b y)
|
||||||
|
(set-cdr! a b))))))
|
||||||
|
|
||||||
|
(scan-b (lambda (prev a x b y) ; Zip down B doing
|
||||||
|
(if (< y x) ; no SET-CDR!s until
|
||||||
|
(let ((next-b (cdr b))) ; we hit an A elt that
|
||||||
|
(if (pair? next-b) ; has to be
|
||||||
|
(scan-b b a x next-b (car next-b)) ; inserted.
|
||||||
|
(set-cdr! b a)))
|
||||||
|
|
||||||
|
(let ((next-a (cdr a)))
|
||||||
|
(set-cdr! prev a)
|
||||||
|
(if (pair? next-a)
|
||||||
|
(scan-a a next-a (car next-a) b y)
|
||||||
|
(set-cdr! a b)))))))
|
||||||
|
|
||||||
|
(cond ((not (pair? a)) b)
|
||||||
|
((not (pair? b)) a)
|
||||||
|
|
||||||
|
;; B starts the answer list.
|
||||||
|
((< (car b) (car a))
|
||||||
|
(let ((next-b (cdr b)))
|
||||||
|
(if (null? next-b)
|
||||||
|
(set-cdr! b a)
|
||||||
|
(scan-b b a (car a) next-b (car next-b))))
|
||||||
|
b)
|
||||||
|
|
||||||
|
;; A starts the answer list.
|
||||||
|
(else (let ((next-a (cdr a)))
|
||||||
|
(if (null? next-a)
|
||||||
|
(set-cdr! a b)
|
||||||
|
(scan-a a next-a (car next-a) b (car b))))
|
||||||
|
a))))
|
||||||
|
|
||||||
|
|
||||||
|
;;; Copyright
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;;; This code is
|
||||||
|
;;; Copyright (c) 1998 by Olin Shivers.
|
||||||
|
;;; The terms are: You may do as you please with this code, as long as
|
||||||
|
;;; you do not delete this notice or hold me responsible for any outcome
|
||||||
|
;;; related to its use.
|
||||||
|
;;;
|
||||||
|
;;; Blah blah blah.
|
||||||
|
|
||||||
|
|
||||||
|
;;; Code tuning & porting
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;;; This is very portable code. It's R4RS with the following exceptions:
|
||||||
|
;;; - The R5RS multiple-value VALUES & CALL-WITH-VALUES procedures for
|
||||||
|
;;; handling multiple-value return.
|
||||||
|
;;;
|
||||||
|
;;; This code is *tightly* bummed as far as I can go in portable Scheme.
|
||||||
|
;;;
|
||||||
|
;;; - The fixnum arithmetic in LIST-MERGE-SORT! and COUNTED-LIST-MERGE!
|
||||||
|
;;; that could be safely switched over to unsafe, fixnum-specific ops,
|
||||||
|
;;; if you're sure that 2*maxlen is a fixnum, where maxlen is the length
|
||||||
|
;;; of the longest list you could ever have.
|
||||||
|
;;;
|
||||||
|
;;; - I typically write my code in a style such that every CAR and CDR
|
||||||
|
;;; application is protected by an upstream PAIR?. This is the case in this
|
||||||
|
;;; code, so all the CAR's and CDR's could safely switched over to unsafe
|
||||||
|
;;; versions. But check over the code before you do it, in case the source
|
||||||
|
;;; has been altered since I wrote this.
|
||||||
|
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;;;
|
||||||
|
;;; End of code extracted from Olin's lmsort.scm file.
|
||||||
|
;;;
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;;;
|
||||||
|
;;; Start of code extracted from Olin's vmsort.scm file.
|
||||||
|
;;;
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
;;; The sort package -- stable vector merge & merge sort -*- Scheme -*-
|
||||||
|
;;; Copyright (c) 1998 by Olin Shivers.
|
||||||
|
;;; This code is open-source; see the end of the file for porting and
|
||||||
|
;;; more copyright information.
|
||||||
|
;;; Olin Shivers 10/98.
|
||||||
|
|
||||||
|
;;; Exports:
|
||||||
|
;;; (vector-merge < v1 v2 [start1 end1 start2 end2]) -> vector
|
||||||
|
;;; (vector-merge! < v v1 v2 [start0 start1 end1 start2 end2]) -> unspecific
|
||||||
|
;;;
|
||||||
|
;;; (vector-merge-sort < v [start end temp]) -> vector
|
||||||
|
;;; (vector-merge-sort! < v [start end temp]) -> unspecific
|
||||||
|
|
||||||
|
|
||||||
|
;;; Merge
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;;; (vector-merge < v1 v2 [start1 end1 start2 end2]) -> vector
|
||||||
|
;;; (vector-merge! < v v1 v2 [start start1 end1 start2 end2]) -> unspecific
|
||||||
|
;;;
|
||||||
|
;;; Stable vector merge -- V1's elements come out ahead of equal V2 elements.
|
||||||
|
|
||||||
|
(define (vector-merge < v1 v2 . maybe-starts+ends)
|
||||||
|
(call-with-values
|
||||||
|
(lambda () (vectors-start+end-2 v1 v2 maybe-starts+ends))
|
||||||
|
(lambda (start1 end1 start2 end2)
|
||||||
|
(let ((ans (make-vector (+ (- end1 start1) (- end2 start2)))))
|
||||||
|
(%vector-merge! < ans v1 v2 0 start1 end1 start2 end2)
|
||||||
|
ans))))
|
||||||
|
|
||||||
|
(define (vector-merge! < v v1 v2 . maybe-starts+ends)
|
||||||
|
(call-with-values
|
||||||
|
(lambda ()
|
||||||
|
(if (pair? maybe-starts+ends)
|
||||||
|
(values (car maybe-starts+ends)
|
||||||
|
(cdr maybe-starts+ends))
|
||||||
|
(values 0
|
||||||
|
'())))
|
||||||
|
(lambda (start rest)
|
||||||
|
(call-with-values
|
||||||
|
(lambda () (vectors-start+end-2 v1 v2 rest))
|
||||||
|
(lambda (start1 end1 start2 end2)
|
||||||
|
(%vector-merge! < v v1 v2 start start1 end1 start2 end2))))))
|
||||||
|
|
||||||
|
|
||||||
|
;;; This routine is not exported. The code is tightly bummed.
|
||||||
|
;;;
|
||||||
|
;;; If these preconditions hold, the routine can be bummed to run with
|
||||||
|
;;; unsafe vector-indexing and fixnum arithmetic ops:
|
||||||
|
;;; - V V1 V2 are vectors.
|
||||||
|
;;; - START START1 END1 START2 END2 are fixnums.
|
||||||
|
;;; - (<= 0 START END0 (vector-length V),
|
||||||
|
;;; where end0 = start + (end1 - start1) + (end2 - start2)
|
||||||
|
;;; - (<= 0 START1 END1 (vector-length V1))
|
||||||
|
;;; - (<= 0 START2 END2 (vector-length V2))
|
||||||
|
;;; If you put these error checks in the two client procedures above, you can
|
||||||
|
;;; safely convert this procedure to use unsafe ops -- which is why it isn't
|
||||||
|
;;; exported. This will provide *huge* speedup.
|
||||||
|
|
||||||
|
(define (%vector-merge! elt< v v1 v2 start start1 end1 start2 end2)
|
||||||
|
(letrec ((vblit (lambda (fromv j i end) ; Blit FROMV[J,END) to V[I,?].
|
||||||
|
(let lp ((j j) (i i))
|
||||||
|
(vector-set! v i (vector-ref fromv j))
|
||||||
|
(let ((j (+ j 1)))
|
||||||
|
(if (< j end) (lp j (+ i 1))))))))
|
||||||
|
|
||||||
|
(cond ((<= end1 start1) (if (< start2 end2) (vblit v2 start2 start end2)))
|
||||||
|
((<= end2 start2) (vblit v1 start1 start end1))
|
||||||
|
|
||||||
|
;; Invariants: I is next index of V to write; X = V1[J]; Y = V2[K].
|
||||||
|
(else (let lp ((i start)
|
||||||
|
(j start1) (x (vector-ref v1 start1))
|
||||||
|
(k start2) (y (vector-ref v2 start2)))
|
||||||
|
(let ((i1 (+ i 1))) ; "i+1" is a complex number in R4RS!
|
||||||
|
(if (elt< y x)
|
||||||
|
(let ((k (+ k 1)))
|
||||||
|
(vector-set! v i y)
|
||||||
|
(if (< k end2)
|
||||||
|
(lp i1 j x k (vector-ref v2 k))
|
||||||
|
(vblit v1 j i1 end1)))
|
||||||
|
(let ((j (+ j 1)))
|
||||||
|
(vector-set! v i x)
|
||||||
|
(if (< j end1)
|
||||||
|
(lp i1 j (vector-ref v1 j) k y)
|
||||||
|
(vblit v2 k i1 end2))))))))))
|
||||||
|
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;;;
|
||||||
|
;;; End of code extracted from Olin's vmsort.scm file.
|
||||||
|
;;;
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
266
srfi/sorting/select.scm
Normal file
266
srfi/sorting/select.scm
Normal file
|
@ -0,0 +1,266 @@
|
||||||
|
;;; Linear-time (average case) algorithms for:
|
||||||
|
;;;
|
||||||
|
;;; Selecting the kth smallest element from an unsorted vector.
|
||||||
|
;;; Selecting the kth and (k+1)st smallest elements from an unsorted vector.
|
||||||
|
;;; Selecting the median from an unsorted vector.
|
||||||
|
|
||||||
|
;;; These procedures are part of SRFI 132 but are missing from
|
||||||
|
;;; its reference implementation as of 10 March 2016.
|
||||||
|
|
||||||
|
;;; SRFI 132 says this procedure runs in O(n) time.
|
||||||
|
;;; As implemented, however, the worst-case time is O(n^2) because
|
||||||
|
;;; vector-select is implemented using randomized quickselect.
|
||||||
|
;;; The average time is O(n), and you'd have to be unlucky
|
||||||
|
;;; to approach the worst case.
|
||||||
|
|
||||||
|
(define (vector-find-median < v knil . rest)
|
||||||
|
(let* ((mean (if (null? rest)
|
||||||
|
(lambda (a b) (/ (+ a b) 2))
|
||||||
|
(car rest)))
|
||||||
|
(n (vector-length v)))
|
||||||
|
(cond ((zero? n)
|
||||||
|
knil)
|
||||||
|
((odd? n)
|
||||||
|
(%vector-select < v (quotient n 2) 0 n))
|
||||||
|
(else
|
||||||
|
(call-with-values
|
||||||
|
(lambda () (%vector-select2 < v (- (quotient n 2) 1) 0 n))
|
||||||
|
(lambda (a b)
|
||||||
|
(mean a b)))))))
|
||||||
|
|
||||||
|
;;; For this procedure, the SRFI 132 specification
|
||||||
|
;;; demands the vector be sorted (by side effect).
|
||||||
|
|
||||||
|
(define (vector-find-median! < v knil . rest)
|
||||||
|
(let* ((mean (if (null? rest)
|
||||||
|
(lambda (a b) (/ (+ a b) 2))
|
||||||
|
(car rest)))
|
||||||
|
(n (vector-length v)))
|
||||||
|
(vector-sort! < v)
|
||||||
|
(cond ((zero? n)
|
||||||
|
knil)
|
||||||
|
((odd? n)
|
||||||
|
(vector-ref v (quotient n 2)))
|
||||||
|
(else
|
||||||
|
(mean (vector-ref v (- (quotient n 2) 1))
|
||||||
|
(vector-ref v (quotient n 2)))))))
|
||||||
|
|
||||||
|
;;; SRFI 132 says this procedure runs in O(n) time.
|
||||||
|
;;; As implemented, however, the worst-case time is O(n^2).
|
||||||
|
;;; The average time is O(n), and you'd have to be unlucky
|
||||||
|
;;; to approach the worst case.
|
||||||
|
;;;
|
||||||
|
;;; After rest argument processing, calls the private version defined below.
|
||||||
|
|
||||||
|
(define (vector-select < v k . rest)
|
||||||
|
(let* ((start (if (null? rest)
|
||||||
|
0
|
||||||
|
(car rest)))
|
||||||
|
(end (if (and (pair? rest)
|
||||||
|
(pair? (cdr rest)))
|
||||||
|
(car (cdr rest))
|
||||||
|
(vector-length v))))
|
||||||
|
(%vector-select < v k start end)))
|
||||||
|
|
||||||
|
;;; The vector-select procedure is needed internally to implement
|
||||||
|
;;; vector-find-median, but SRFI 132 has been changed (for no good
|
||||||
|
;;; reason) to export vector-select! instead of vector-select.
|
||||||
|
;;; Fortunately, vector-select! is not required to have side effects.
|
||||||
|
|
||||||
|
(define vector-select! vector-select)
|
||||||
|
|
||||||
|
;;; This could be made slightly more efficient, but who cares?
|
||||||
|
|
||||||
|
(define (vector-separate! < v k . rest)
|
||||||
|
(let* ((start (if (null? rest)
|
||||||
|
0
|
||||||
|
(car rest)))
|
||||||
|
(end (if (and (pair? rest)
|
||||||
|
(pair? (cdr rest)))
|
||||||
|
(car (cdr rest))
|
||||||
|
(vector-length v))))
|
||||||
|
(if (and (> k 0)
|
||||||
|
(> end start))
|
||||||
|
(let ((pivot (vector-select < v (- k 1) start end)))
|
||||||
|
(call-with-values
|
||||||
|
(lambda () (count-smaller < pivot v start end 0 0))
|
||||||
|
(lambda (count count2)
|
||||||
|
(let* ((v2 (make-vector count))
|
||||||
|
(v3 (make-vector (- end start count count2))))
|
||||||
|
(copy-smaller! < pivot v2 0 v start end)
|
||||||
|
(copy-bigger! < pivot v3 0 v start end)
|
||||||
|
(r7rs-vector-copy! v start v2)
|
||||||
|
(r7rs-vector-fill! v pivot (+ start count) (+ start count count2))
|
||||||
|
(r7rs-vector-copy! v (+ start count count2) v3))))))))
|
||||||
|
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
;;; For small ranges, sorting may be the fastest way to find the kth element.
|
||||||
|
;;; This threshold is not at all critical, and may not even be worthwhile.
|
||||||
|
|
||||||
|
(define just-sort-it-threshold 50)
|
||||||
|
|
||||||
|
;;; Given
|
||||||
|
;;; an irreflexive total order <?
|
||||||
|
;;; a vector v
|
||||||
|
;;; an index k
|
||||||
|
;;; an index start
|
||||||
|
;;; an index end
|
||||||
|
;;; with
|
||||||
|
;;; 0 <= k < (- end start)
|
||||||
|
;;; 0 <= start < end <= (vector-length v)
|
||||||
|
;;; returns
|
||||||
|
;;; (vector-ref (vector-sort <? (vector-copy v start end)) (+ start k))
|
||||||
|
;;; but is usually faster than that.
|
||||||
|
|
||||||
|
(define (%vector-select <? v k start end)
|
||||||
|
(assert (and 'vector-select
|
||||||
|
(procedure? <?)
|
||||||
|
(vector? v)
|
||||||
|
(exact-integer? k)
|
||||||
|
(exact-integer? start)
|
||||||
|
(exact-integer? end)
|
||||||
|
(<= 0 k (- end start 1))
|
||||||
|
(<= 0 start end (vector-length v))))
|
||||||
|
(%%vector-select <? v k start end))
|
||||||
|
|
||||||
|
;;; Given
|
||||||
|
;;; an irreflexive total order <?
|
||||||
|
;;; a vector v
|
||||||
|
;;; an index k
|
||||||
|
;;; an index start
|
||||||
|
;;; an index end
|
||||||
|
;;; with
|
||||||
|
;;; 0 <= k < (- end start 1)
|
||||||
|
;;; 0 <= start < end <= (vector-length v)
|
||||||
|
;;; returns two values:
|
||||||
|
;;; (vector-ref (vector-sort <? (vector-copy v start end)) (+ start k))
|
||||||
|
;;; (vector-ref (vector-sort <? (vector-copy v start end)) (+ start k 1))
|
||||||
|
;;; but is usually faster than that.
|
||||||
|
|
||||||
|
(define (%vector-select2 <? v k start end)
|
||||||
|
(assert (and 'vector-select
|
||||||
|
(procedure? <?)
|
||||||
|
(vector? v)
|
||||||
|
(exact-integer? k)
|
||||||
|
(exact-integer? start)
|
||||||
|
(exact-integer? end)
|
||||||
|
(<= 0 k (- end start 1 1))
|
||||||
|
(<= 0 start end (vector-length v))))
|
||||||
|
(%%vector-select2 <? v k start end))
|
||||||
|
|
||||||
|
;;; Like %vector-select, but its preconditions have been checked.
|
||||||
|
|
||||||
|
(define (%%vector-select <? v k start end)
|
||||||
|
(let ((size (- end start)))
|
||||||
|
(cond ((= 1 size)
|
||||||
|
(vector-ref v (+ k start)))
|
||||||
|
((= 2 size)
|
||||||
|
(cond ((<? (vector-ref v start)
|
||||||
|
(vector-ref v (+ start 1)))
|
||||||
|
(vector-ref v (+ k start)))
|
||||||
|
(else
|
||||||
|
(vector-ref v (+ (- 1 k) start)))))
|
||||||
|
((< size just-sort-it-threshold)
|
||||||
|
(vector-ref (vector-sort <? (r7rs-vector-copy v start end)) k))
|
||||||
|
(else
|
||||||
|
(let* ((ip (random-integer size))
|
||||||
|
(pivot (vector-ref v (+ start ip))))
|
||||||
|
(call-with-values
|
||||||
|
(lambda () (count-smaller <? pivot v start end 0 0))
|
||||||
|
(lambda (count count2)
|
||||||
|
(cond ((< k count)
|
||||||
|
(let* ((n count)
|
||||||
|
(v2 (make-vector n)))
|
||||||
|
(copy-smaller! <? pivot v2 0 v start end)
|
||||||
|
(%%vector-select <? v2 k 0 n)))
|
||||||
|
((< k (+ count count2))
|
||||||
|
pivot)
|
||||||
|
(else
|
||||||
|
(let* ((n (- size count count2))
|
||||||
|
(v2 (make-vector n))
|
||||||
|
(k2 (- k count count2)))
|
||||||
|
(copy-bigger! <? pivot v2 0 v start end)
|
||||||
|
(%%vector-select <? v2 k2 0 n)))))))))))
|
||||||
|
|
||||||
|
;;; Like %%vector-select, but returns two values:
|
||||||
|
;;;
|
||||||
|
;;; (vector-ref (vector-sort <? (vector-copy v start end)) (+ start k))
|
||||||
|
;;; (vector-ref (vector-sort <? (vector-copy v start end)) (+ start k 1))
|
||||||
|
;;;
|
||||||
|
;;; Returning two values is useful when finding the median of an even
|
||||||
|
;;; number of things.
|
||||||
|
|
||||||
|
(define (%%vector-select2 <? v k start end)
|
||||||
|
(let ((size (- end start)))
|
||||||
|
(cond ((= 2 size)
|
||||||
|
(let ((a (vector-ref v start))
|
||||||
|
(b (vector-ref v (+ start 1))))
|
||||||
|
(cond ((<? a b)
|
||||||
|
(values a b))
|
||||||
|
(else
|
||||||
|
(values b a)))))
|
||||||
|
((< size just-sort-it-threshold)
|
||||||
|
(let ((v2 (vector-sort <? (r7rs-vector-copy v start end))))
|
||||||
|
(values (vector-ref v2 k)
|
||||||
|
(vector-ref v2 (+ k 1)))))
|
||||||
|
(else
|
||||||
|
(let* ((ip (random-integer size))
|
||||||
|
(pivot (vector-ref v (+ start ip))))
|
||||||
|
(call-with-values
|
||||||
|
(lambda () (count-smaller <? pivot v start end 0 0))
|
||||||
|
(lambda (count count2)
|
||||||
|
(cond ((= (+ k 1) count)
|
||||||
|
(values (%%vector-select <? v k start end)
|
||||||
|
pivot))
|
||||||
|
((< k count)
|
||||||
|
(let* ((n count)
|
||||||
|
(v2 (make-vector n)))
|
||||||
|
(copy-smaller! <? pivot v2 0 v start end)
|
||||||
|
(%%vector-select2 <? v2 k 0 n)))
|
||||||
|
((< k (+ count count2))
|
||||||
|
(values pivot
|
||||||
|
(if (< (+ k 1) (+ count count2))
|
||||||
|
pivot
|
||||||
|
(%%vector-select <? v (+ k 1) start end))))
|
||||||
|
(else
|
||||||
|
(let* ((n (- size count count2))
|
||||||
|
(v2 (make-vector n))
|
||||||
|
(k2 (- k count count2)))
|
||||||
|
(copy-bigger! <? pivot v2 0 v start end)
|
||||||
|
(%%vector-select2 <? v2 k2 0 n)))))))))))
|
||||||
|
|
||||||
|
;;; Counts how many elements within the range are less than the pivot
|
||||||
|
;;; and how many are equal to the pivot, returning both of those counts.
|
||||||
|
|
||||||
|
(define (count-smaller <? pivot v i end count count2)
|
||||||
|
(cond ((= i end)
|
||||||
|
(values count count2))
|
||||||
|
((<? (vector-ref v i) pivot)
|
||||||
|
(count-smaller <? pivot v (+ i 1) end (+ count 1) count2))
|
||||||
|
((<? pivot (vector-ref v i))
|
||||||
|
(count-smaller <? pivot v (+ i 1) end count count2))
|
||||||
|
(else
|
||||||
|
(count-smaller <? pivot v (+ i 1) end count (+ count2 1)))))
|
||||||
|
|
||||||
|
;;; Like vector-copy! but copies an element only if it is less than the pivot.
|
||||||
|
;;; The destination vector must be large enough.
|
||||||
|
|
||||||
|
(define (copy-smaller! <? pivot dst at src start end)
|
||||||
|
(cond ((= start end) dst)
|
||||||
|
((<? (vector-ref src start) pivot)
|
||||||
|
(vector-set! dst at (vector-ref src start))
|
||||||
|
(copy-smaller! <? pivot dst (+ at 1) src (+ start 1) end))
|
||||||
|
(else
|
||||||
|
(copy-smaller! <? pivot dst at src (+ start 1) end))))
|
||||||
|
|
||||||
|
;;; Like copy-smaller! but copies only elements that are greater than the pivot.
|
||||||
|
|
||||||
|
(define (copy-bigger! <? pivot dst at src start end)
|
||||||
|
(cond ((= start end) dst)
|
||||||
|
((<? pivot (vector-ref src start))
|
||||||
|
(vector-set! dst at (vector-ref src start))
|
||||||
|
(copy-bigger! <? pivot dst (+ at 1) src (+ start 1) end))
|
||||||
|
(else
|
||||||
|
(copy-bigger! <? pivot dst at src (+ start 1) end))))
|
||||||
|
|
26
srfi/sorting/sort.scm
Normal file
26
srfi/sorting/sort.scm
Normal file
|
@ -0,0 +1,26 @@
|
||||||
|
;;; The sort package -- general sort & merge procedures
|
||||||
|
;;;
|
||||||
|
;;; Copyright (c) 1998 by Olin Shivers.
|
||||||
|
;;; You may do as you please with this code, as long as you do not delete this
|
||||||
|
;;; notice or hold me responsible for any outcome related to its use.
|
||||||
|
;;; Olin Shivers 10/98.
|
||||||
|
|
||||||
|
;;; This file just defines the general sort API in terms of some
|
||||||
|
;;; algorithm-specific calls.
|
||||||
|
|
||||||
|
(define (list-sort < l) ; Sort lists by converting to
|
||||||
|
(let ((v (list->vector l))) ; a vector and sorting that.
|
||||||
|
(vector-heap-sort! < v)
|
||||||
|
(vector->list v)))
|
||||||
|
|
||||||
|
(define list-sort! list-merge-sort!)
|
||||||
|
|
||||||
|
(define list-stable-sort list-merge-sort)
|
||||||
|
(define list-stable-sort! list-merge-sort!)
|
||||||
|
|
||||||
|
(define vector-sort vector-quick-sort)
|
||||||
|
(define vector-sort! vector-quick-sort!)
|
||||||
|
|
||||||
|
(define vector-stable-sort vector-merge-sort)
|
||||||
|
(define vector-stable-sort! vector-merge-sort!)
|
||||||
|
|
49
srfi/sorting/sortfaster.scm
Normal file
49
srfi/sorting/sortfaster.scm
Normal file
|
@ -0,0 +1,49 @@
|
||||||
|
;;; SRFI 132 specifies these eight procedures.
|
||||||
|
;;;
|
||||||
|
;;; Benchmarking has shown that the (rnrs sorting) procedures
|
||||||
|
;;; are faster than the sorting procedures defined by SRFI 132's
|
||||||
|
;;; reference implementation, so the R6RS procedures are used here.
|
||||||
|
;;;
|
||||||
|
;;; This file is a plug-and-play alternative to sort.scm in the
|
||||||
|
;;; same directory.
|
||||||
|
|
||||||
|
(define list-sort r6rs-list-sort)
|
||||||
|
(define list-sort! r6rs-list-sort)
|
||||||
|
(define list-stable-sort r6rs-list-sort)
|
||||||
|
(define list-stable-sort! r6rs-list-sort)
|
||||||
|
|
||||||
|
(define (vector-sort < v . rest)
|
||||||
|
(cond ((null? rest)
|
||||||
|
(r6rs-vector-sort < v))
|
||||||
|
((null? (cdr rest))
|
||||||
|
(r6rs-vector-sort < (r7rs-vector-copy v (car rest))))
|
||||||
|
((null? (cddr rest))
|
||||||
|
(r6rs-vector-sort < (r7rs-vector-copy v (car rest) (cadr rest))))
|
||||||
|
(else
|
||||||
|
(error 'vector-sort
|
||||||
|
"too many arguments"
|
||||||
|
(cons < (cons v rest))))))
|
||||||
|
|
||||||
|
(define vector-stable-sort vector-sort)
|
||||||
|
|
||||||
|
(define (vector-sort! < v . rest)
|
||||||
|
(cond ((null? rest)
|
||||||
|
(r6rs-vector-sort! < v))
|
||||||
|
((null? (cdr rest))
|
||||||
|
(let* ((start (car rest))
|
||||||
|
(v2 (r7rs-vector-copy v start)))
|
||||||
|
(r6rs-vector-sort! < v2)
|
||||||
|
(r7rs-vector-copy! v start v2 0)))
|
||||||
|
((null? (cddr rest))
|
||||||
|
(let* ((start (car rest))
|
||||||
|
(end (cadr rest))
|
||||||
|
(v2 (r7rs-vector-copy v start end)))
|
||||||
|
(r6rs-vector-sort! < v2)
|
||||||
|
(r7rs-vector-copy! v start v2 0)))
|
||||||
|
(else
|
||||||
|
(error 'vector-sort!
|
||||||
|
"too many arguments"
|
||||||
|
(cons < (cons v rest))))))
|
||||||
|
|
||||||
|
(define vector-stable-sort! vector-sort!)
|
||||||
|
|
83
srfi/sorting/sorting-test.scm
Normal file
83
srfi/sorting/sorting-test.scm
Normal file
|
@ -0,0 +1,83 @@
|
||||||
|
;;; Little test harness, 'cause I'm paraoid about tricky code.
|
||||||
|
|
||||||
|
;;; This code is
|
||||||
|
;;; Copyright (c) 1998 by Olin Shivers.
|
||||||
|
;;; The terms are: You may do as you please with this code, as long as
|
||||||
|
;;; you do not delete this notice or hold me responsible for any outcome
|
||||||
|
;;; related to its use.
|
||||||
|
;;;
|
||||||
|
;;; Blah blah blah. Don't you think source files should contain more lines
|
||||||
|
;;; of code than copyright notice?
|
||||||
|
|
||||||
|
(define-test-suite sort-tests)
|
||||||
|
|
||||||
|
;; Three-way comparison for numbers
|
||||||
|
(define (my-c x y)
|
||||||
|
(cond ((= x y) 0)
|
||||||
|
((< x y) -1)
|
||||||
|
(else 1)))
|
||||||
|
|
||||||
|
;;; For testing stable sort -- 3 & -3 compare the same.
|
||||||
|
(define (my< x y) (< (abs x) (abs y)))
|
||||||
|
|
||||||
|
(define (unstable-sort-test v) ; quick & heap vs simple insert
|
||||||
|
(let ((v1 (vector-copy v))
|
||||||
|
(v2 (vector-copy v))
|
||||||
|
(v3 (vector-copy v))
|
||||||
|
(v4 (vector-copy v)))
|
||||||
|
(vector-heap-sort! < v1)
|
||||||
|
(vector-insert-sort! < v2)
|
||||||
|
(vector-quick-sort! < v3)
|
||||||
|
(vector-quick-sort3! my-c v4)
|
||||||
|
(check-that v2 (is v1))
|
||||||
|
(check-that v3 (is v1))
|
||||||
|
(check-that v4 (is v1))
|
||||||
|
(check-that v1 (is (lambda (v) (vector-sorted? < v))))))
|
||||||
|
|
||||||
|
(define (stable-sort-test v) ; insert, list & vector merge sorts
|
||||||
|
(let ((v1 (vector-copy v))
|
||||||
|
(v2 (vector-copy v))
|
||||||
|
(v3 (list->vector (list-merge-sort! my< (vector->list v))))
|
||||||
|
(v4 (list->vector (list-merge-sort my< (vector->list v)))))
|
||||||
|
(vector-merge-sort! my< v1)
|
||||||
|
(vector-insert-sort! my< v2)
|
||||||
|
(check-that v1 (is (lambda (v) (vector-sorted? my< v))))
|
||||||
|
(check-that v2 (is v1))
|
||||||
|
(check-that v3 (is v1))
|
||||||
|
(check-that v4 (is v1))))
|
||||||
|
|
||||||
|
(define (run-sort-test sort-test count max-size)
|
||||||
|
(let loop ((i 0))
|
||||||
|
(if (< i count)
|
||||||
|
(begin
|
||||||
|
(sort-test (random-vector (random-integer max-size)))
|
||||||
|
(loop (+ 1 i))))))
|
||||||
|
|
||||||
|
(define-test-case stable-sort sort-tests
|
||||||
|
(run-sort-test stable-sort-test 10 4096))
|
||||||
|
|
||||||
|
(define-test-case unstable-sort sort-tests
|
||||||
|
(run-sort-test unstable-sort-test 10 4096))
|
||||||
|
|
||||||
|
(define (random-vector size)
|
||||||
|
(let ((v (make-vector size)))
|
||||||
|
(fill-vector-randomly! v (* 10 size))
|
||||||
|
v))
|
||||||
|
|
||||||
|
(define (fill-vector-randomly! v range)
|
||||||
|
(let ((half (quotient range 2)))
|
||||||
|
(do ((i (- (vector-length v) 1) (- i 1)))
|
||||||
|
((< i 0))
|
||||||
|
(vector-set! v i (- (random-integer range) half)))))
|
||||||
|
|
||||||
|
(define (vector-portion-copy vec start end)
|
||||||
|
(let* ((len (vector-length vec))
|
||||||
|
(new-len (- end start))
|
||||||
|
(new (make-vector new-len)))
|
||||||
|
(do ((i start (+ i 1))
|
||||||
|
(j 0 (+ j 1)))
|
||||||
|
((= i end) new)
|
||||||
|
(vector-set! new j (vector-ref vec i)))))
|
||||||
|
|
||||||
|
(define (vector-copy vec)
|
||||||
|
(vector-portion-copy vec 0 (vector-length vec)))
|
35
srfi/sorting/sortp.scm
Normal file
35
srfi/sorting/sortp.scm
Normal file
|
@ -0,0 +1,35 @@
|
||||||
|
;;; The sort package -- sorted predicates
|
||||||
|
;;; Olin Shivers 10/98.
|
||||||
|
;;;
|
||||||
|
;;; (list-sorted? < lis) -> boolean
|
||||||
|
;;; (vector-sorted? < v [start end]) -> boolean
|
||||||
|
|
||||||
|
(define (list-sorted? < list)
|
||||||
|
(or (not (pair? list))
|
||||||
|
(let lp ((prev (car list)) (tail (cdr list)))
|
||||||
|
(or (not (pair? tail))
|
||||||
|
(let ((next (car tail)))
|
||||||
|
(and (not (< next prev))
|
||||||
|
(lp next (cdr tail))))))))
|
||||||
|
|
||||||
|
(define (vector-sorted? elt< v . maybe-start+end)
|
||||||
|
(call-with-values
|
||||||
|
(lambda () (vector-start+end v maybe-start+end))
|
||||||
|
(lambda (start end)
|
||||||
|
(or (>= start end) ; Empty range
|
||||||
|
(let lp ((i (+ start 1)) (vi-1 (vector-ref v start)))
|
||||||
|
(or (>= i end)
|
||||||
|
(let ((vi (vector-ref v i)))
|
||||||
|
(and (not (elt< vi vi-1))
|
||||||
|
(lp (+ i 1) vi)))))))))
|
||||||
|
|
||||||
|
;;; Copyright and porting non-notices
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;;; Give me a break. It's fifteen lines of code. I place this code in the
|
||||||
|
;;; public domain; help yourself.
|
||||||
|
;;;
|
||||||
|
;;; If your Scheme has a faster mechanism for handling optional arguments
|
||||||
|
;;; (e.g., Chez), you should definitely port over to it. Note that argument
|
||||||
|
;;; defaulting and error-checking are interleaved -- you don't have to
|
||||||
|
;;; error-check defaulted START/END args to see if they are fixnums that are
|
||||||
|
;;; legal vector indices for the corresponding vector, etc.
|
1684
srfi/sorting/srfi-132-test.sps
Normal file
1684
srfi/sorting/srfi-132-test.sps
Normal file
File diff suppressed because it is too large
Load diff
19
srfi/sorting/srfi-132.sld
Normal file
19
srfi/sorting/srfi-132.sld
Normal file
|
@ -0,0 +1,19 @@
|
||||||
|
(define-library (srfi-132)
|
||||||
|
(import (scheme base))
|
||||||
|
(import (scheme cxr))
|
||||||
|
(export list-sorted? vector-sorted? list-merge vector-merge list-sort vector-sort
|
||||||
|
list-stable-sort vector-stable-sort list-merge! vector-merge! list-sort! vector-sort!
|
||||||
|
list-stable-sort! vector-stable-sort!
|
||||||
|
list-delete-neighbor-dups vector-delete-neighbor-dups
|
||||||
|
list-delete-neighbor-dups! vector-delete-neighbor-dups!)
|
||||||
|
(include "delndups.scm")
|
||||||
|
(include "lmsort.scm")
|
||||||
|
(include "sortp.scm")
|
||||||
|
(include "vector-util.scm")
|
||||||
|
(include "vhsort.scm")
|
||||||
|
(include "visort.scm")
|
||||||
|
(include "vmsort.scm")
|
||||||
|
(include "vqsort2.scm")
|
||||||
|
(include "vqsort3.scm")
|
||||||
|
(include "sort.scm") ; must be last
|
||||||
|
)
|
34
srfi/sorting/vbinsearch.scm
Normal file
34
srfi/sorting/vbinsearch.scm
Normal file
|
@ -0,0 +1,34 @@
|
||||||
|
;;; The sort package -- binary search -*- Scheme -*-
|
||||||
|
;;; Copyright (c) 1998 by Olin Shivers.
|
||||||
|
;;; This code is in the public domain.
|
||||||
|
;;; Olin Shivers 98/11
|
||||||
|
|
||||||
|
;;; Returns the index of the matching element.
|
||||||
|
;;; (vector-binary-search < car 4 '#((1 . one) (3 . three)
|
||||||
|
;;; (4 . four) (25 . twenty-five)))
|
||||||
|
;;; => 2
|
||||||
|
|
||||||
|
(define (vector-binary-search key< elt->key key v . maybe-start+end)
|
||||||
|
(call-with-values
|
||||||
|
(lambda () (vector-start+end v maybe-start+end))
|
||||||
|
(lambda (start end)
|
||||||
|
(let lp ((left start) (right end)) ; Search V[left,right).
|
||||||
|
(and (< left right)
|
||||||
|
(let* ((m (quotient (+ left right) 2))
|
||||||
|
(elt (vector-ref v m))
|
||||||
|
(elt-key (elt->key elt)))
|
||||||
|
(cond ((key< key elt-key) (lp left m))
|
||||||
|
((key< elt-key key) (lp (+ m 1) right))
|
||||||
|
(else m))))))))
|
||||||
|
|
||||||
|
(define (vector-binary-search3 compare v . maybe-start+end)
|
||||||
|
(call-with-values
|
||||||
|
(lambda () (vector-start+end v maybe-start+end))
|
||||||
|
(lambda (start end)
|
||||||
|
(let lp ((left start) (right end)) ; Search V[left,right).
|
||||||
|
(and (< left right)
|
||||||
|
(let* ((m (quotient (+ left right) 2))
|
||||||
|
(sign (compare (vector-ref v m))))
|
||||||
|
(cond ((> sign 0) (lp left m))
|
||||||
|
((< sign 0) (lp (+ m 1) right))
|
||||||
|
(else m))))))))
|
65
srfi/sorting/vector-util.scm
Normal file
65
srfi/sorting/vector-util.scm
Normal file
|
@ -0,0 +1,65 @@
|
||||||
|
;;; This code is
|
||||||
|
;;; Copyright (c) 1998 by Olin Shivers.
|
||||||
|
;;; The terms are: You may do as you please with this code, as long as
|
||||||
|
;;; you do not delete this notice or hold me responsible for any outcome
|
||||||
|
;;; related to its use.
|
||||||
|
;;;
|
||||||
|
;;; Blah blah blah. Don't you think source files should contain more lines
|
||||||
|
;;; of code than copyright notice?
|
||||||
|
|
||||||
|
(define (vector-portion-copy vec start end)
|
||||||
|
(let* ((len (vector-length vec))
|
||||||
|
(new-len (- end start))
|
||||||
|
(new (make-vector new-len)))
|
||||||
|
(do ((i start (+ i 1))
|
||||||
|
(j 0 (+ j 1)))
|
||||||
|
((= i end) new)
|
||||||
|
(vector-set! new j (vector-ref vec i)))))
|
||||||
|
|
||||||
|
(define (vector-copy vec)
|
||||||
|
(vector-portion-copy vec 0 (vector-length vec)))
|
||||||
|
|
||||||
|
(define (vector-portion-copy! target src start end)
|
||||||
|
(let ((len (- end start)))
|
||||||
|
(do ((i (- len 1) (- i 1))
|
||||||
|
(j (- end 1) (- j 1)))
|
||||||
|
((< i 0))
|
||||||
|
(vector-set! target i (vector-ref src j)))))
|
||||||
|
|
||||||
|
(define (has-element list index)
|
||||||
|
(cond
|
||||||
|
((zero? index)
|
||||||
|
(if (pair? list)
|
||||||
|
(values #t (car list))
|
||||||
|
(values #f #f)))
|
||||||
|
((null? list)
|
||||||
|
(values #f #f))
|
||||||
|
(else
|
||||||
|
(has-element (cdr list) (- index 1)))))
|
||||||
|
|
||||||
|
(define (list-ref-or-default list index default)
|
||||||
|
(call-with-values
|
||||||
|
(lambda () (has-element list index))
|
||||||
|
(lambda (has? maybe)
|
||||||
|
(if has?
|
||||||
|
maybe
|
||||||
|
default))))
|
||||||
|
|
||||||
|
(define (vector-start+end vector maybe-start+end)
|
||||||
|
(let ((start (list-ref-or-default maybe-start+end
|
||||||
|
0 0))
|
||||||
|
(end (list-ref-or-default maybe-start+end
|
||||||
|
1 (vector-length vector))))
|
||||||
|
(values start end)))
|
||||||
|
|
||||||
|
(define (vectors-start+end-2 vector-1 vector-2 maybe-start+end)
|
||||||
|
(let ((start-1 (list-ref-or-default maybe-start+end
|
||||||
|
0 0))
|
||||||
|
(end-1 (list-ref-or-default maybe-start+end
|
||||||
|
1 (vector-length vector-1)))
|
||||||
|
(start-2 (list-ref-or-default maybe-start+end
|
||||||
|
2 0))
|
||||||
|
(end-2 (list-ref-or-default maybe-start+end
|
||||||
|
3 (vector-length vector-2))))
|
||||||
|
(values start-1 end-1
|
||||||
|
start-2 end-2)))
|
119
srfi/sorting/vhsort.scm
Normal file
119
srfi/sorting/vhsort.scm
Normal file
|
@ -0,0 +1,119 @@
|
||||||
|
;;; The sort package -- vector heap sort -*- Scheme -*-
|
||||||
|
;;; Copyright (c) 2002 by Olin Shivers.
|
||||||
|
;;; This code is open-source; see the end of the file for porting and
|
||||||
|
;;; more copyright information.
|
||||||
|
;;; Olin Shivers 10/98.
|
||||||
|
|
||||||
|
;;; Exports:
|
||||||
|
;;; (vector-heap-sort! elt< v [start end]) -> unspecified
|
||||||
|
;;; (vector-heap-sort elt< v [start end]) -> vector
|
||||||
|
|
||||||
|
;;; Two key facts
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;;; If a heap structure is embedded into a vector at indices [start,end), then:
|
||||||
|
;;; 1. The two children of index k are start + 2*(k-start) + 1 = k*2-start+1
|
||||||
|
;;; and start + 2*(k-start) + 2 = k*2-start+2.
|
||||||
|
;;;
|
||||||
|
;;; 2. The first index of a leaf node in the range [start,end) is
|
||||||
|
;;; first-leaf = floor[(start+end)/2]
|
||||||
|
;;; (You can deduce this from fact #1 above.)
|
||||||
|
;;; Any index before FIRST-LEAF is an internal node.
|
||||||
|
|
||||||
|
(define (really-vector-heap-sort! elt< v start end)
|
||||||
|
;; Vector V contains a heap at indices [START,END). The heap is in heap
|
||||||
|
;; order in the range (I,END) -- i.e., every element in this range is >=
|
||||||
|
;; its children. Bubble HEAP[I] down into the heap to impose heap order on
|
||||||
|
;; the range [I,END).
|
||||||
|
(define (restore-heap! end i)
|
||||||
|
(let* ((vi (vector-ref v i))
|
||||||
|
(first-leaf (quotient (+ start end) 2)) ; Can fixnum overflow.
|
||||||
|
(final-k (let lp ((k i))
|
||||||
|
(if (>= k first-leaf)
|
||||||
|
k ; Leaf, so done.
|
||||||
|
(let* ((k*2-start (+ k (- k start))) ; Don't overflow.
|
||||||
|
(child1 (+ 1 k*2-start))
|
||||||
|
(child2 (+ 2 k*2-start))
|
||||||
|
(child1-val (vector-ref v child1)))
|
||||||
|
(call-with-values
|
||||||
|
(lambda ()
|
||||||
|
(if (< child2 end)
|
||||||
|
(let ((child2-val (vector-ref v child2)))
|
||||||
|
(if (elt< child2-val child1-val)
|
||||||
|
(values child1 child1-val)
|
||||||
|
(values child2 child2-val)))
|
||||||
|
(values child1 child1-val)))
|
||||||
|
(lambda (max-child max-child-val)
|
||||||
|
(cond ((elt< vi max-child-val)
|
||||||
|
(vector-set! v k max-child-val)
|
||||||
|
(lp max-child))
|
||||||
|
(else k))))))))) ; Done.
|
||||||
|
(vector-set! v final-k vi)))
|
||||||
|
|
||||||
|
;; Put the unsorted subvector V[start,end) into heap order.
|
||||||
|
(let ((first-leaf (quotient (+ start end) 2))) ; Can fixnum overflow.
|
||||||
|
(do ((i (- first-leaf 1) (- i 1)))
|
||||||
|
((< i start))
|
||||||
|
(restore-heap! end i)))
|
||||||
|
|
||||||
|
(do ((i (- end 1) (- i 1)))
|
||||||
|
((<= i start))
|
||||||
|
(let ((top (vector-ref v start)))
|
||||||
|
(vector-set! v start (vector-ref v i))
|
||||||
|
(vector-set! v i top)
|
||||||
|
(restore-heap! i start))))
|
||||||
|
|
||||||
|
;;; Here are the two exported interfaces.
|
||||||
|
|
||||||
|
(define (vector-heap-sort! elt< v . maybe-start+end)
|
||||||
|
(call-with-values
|
||||||
|
(lambda () (vector-start+end v maybe-start+end))
|
||||||
|
(lambda (start end)
|
||||||
|
(really-vector-heap-sort! elt< v start end))))
|
||||||
|
|
||||||
|
(define (vector-heap-sort elt< v . maybe-start+end)
|
||||||
|
(call-with-values
|
||||||
|
(lambda () (vector-start+end v maybe-start+end))
|
||||||
|
(lambda (start end)
|
||||||
|
(let ((ans (vector-portion-copy v start end)))
|
||||||
|
(really-vector-heap-sort! elt< ans 0 (- end start))
|
||||||
|
ans))))
|
||||||
|
|
||||||
|
;;; Notes on porting
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;;;
|
||||||
|
;;; Bumming the code for speed
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;;; If you can use a module system to lock up the internal function
|
||||||
|
;;; REALLY-VECTOR-HEAP-SORT! so that it can only be called from VECTOR-HEAP-SORT and
|
||||||
|
;;; VECTOR-HEAP-SORT!, then you can hack the internal functions to run with no safety
|
||||||
|
;;; checks. The safety checks performed by the exported functions VECTOR-HEAP-SORT &
|
||||||
|
;;; VECTOR-HEAP-SORT! guarantee that there will be no type errors or array-indexing
|
||||||
|
;;; errors. In addition, with the exception of the two computations of
|
||||||
|
;;; FIRST-LEAF, all arithmetic will be fixnum arithmetic that never overflows
|
||||||
|
;;; into bignums, assuming your Scheme provides that you can't allocate an
|
||||||
|
;;; array so large you might need a bignum to index an element, which is
|
||||||
|
;;; definitely the case for every implementation with which I am familiar.
|
||||||
|
;;;
|
||||||
|
;;; If you want to code up the first-leaf = (quotient (+ s e) 2) computation
|
||||||
|
;;; so that it will never fixnum overflow when S & E are fixnums, you can do
|
||||||
|
;;; it this way:
|
||||||
|
;;; - compute floor(e/2), which throws away e's low-order bit.
|
||||||
|
;;; - add e's low-order bit to s, and divide that by two:
|
||||||
|
;;; floor[(s + e mod 2) / 2]
|
||||||
|
;;; - add these two parts together.
|
||||||
|
;;; giving you
|
||||||
|
;;; (+ (quotient e 2)
|
||||||
|
;;; (quotient (+ s (modulo e 2)) 2))
|
||||||
|
;;; If we know that e & s are fixnums, and that 0 <= s <= e, then this
|
||||||
|
;;; can only fixnum-overflow when s = e = max-fixnum. Note that the
|
||||||
|
;;; two divides and one modulo op can be done very quickly with two
|
||||||
|
;;; right-shifts and a bitwise and.
|
||||||
|
;;;
|
||||||
|
;;; I suspect there has never been a heapsort written in the history of
|
||||||
|
;;; the world in C that got this detail right.
|
||||||
|
;;;
|
||||||
|
;;; If your Scheme has a faster mechanism for handling optional arguments
|
||||||
|
;;; (e.g., Chez), you should definitely port over to it. Note that argument
|
||||||
|
;;; defaulting and error-checking are interleaved -- you don't have to
|
||||||
|
;;; error-check defaulted START/END args to see if they are fixnums that are
|
||||||
|
;;; legal vector indices for the corresponding vector, etc.
|
78
srfi/sorting/visort.scm
Normal file
78
srfi/sorting/visort.scm
Normal file
|
@ -0,0 +1,78 @@
|
||||||
|
;;; The sort package -- stable vector insertion sort -*- Scheme -*-
|
||||||
|
;;; Copyright (c) 1998 by Olin Shivers.
|
||||||
|
;;; This code is open-source; see the end of the file for porting and
|
||||||
|
;;; more copyright information.
|
||||||
|
;;; Olin Shivers 10/98.
|
||||||
|
|
||||||
|
;;; Exports:
|
||||||
|
;;; vector-insert-sort < v [start end] -> vector
|
||||||
|
;;; vector-insert-sort! < v [start end] -> unspecific
|
||||||
|
;;;
|
||||||
|
;;; %vector-insert-sort! is also called from vqsort.scm's quick-sort function.
|
||||||
|
|
||||||
|
(define (vector-insert-sort elt< v . maybe-start+end)
|
||||||
|
(call-with-values
|
||||||
|
(lambda () (vector-start+end v maybe-start+end))
|
||||||
|
(lambda (start end)
|
||||||
|
(let ((ans (vector-portion-copy v start end)))
|
||||||
|
(%vector-insert-sort! elt< ans 0 (- end start))
|
||||||
|
ans))))
|
||||||
|
|
||||||
|
(define (vector-insert-sort! < v . maybe-start+end)
|
||||||
|
(call-with-values
|
||||||
|
(lambda () (vector-start+end v maybe-start+end))
|
||||||
|
(lambda (start end)
|
||||||
|
(%vector-insert-sort! < v start end))))
|
||||||
|
|
||||||
|
(define (%vector-insert-sort! elt< v start end)
|
||||||
|
(do ((i (+ 1 start) (+ i 1))) ; Invariant: [start,i) is sorted.
|
||||||
|
((>= i end))
|
||||||
|
(let ((val (vector-ref v i)))
|
||||||
|
(vector-set! v (let lp ((j i)) ; J is the location of the
|
||||||
|
(if (<= j start)
|
||||||
|
start ; "hole" as it bubbles down.
|
||||||
|
(let* ((j-1 (- j 1))
|
||||||
|
(vj-1 (vector-ref v j-1)))
|
||||||
|
(cond ((elt< val vj-1)
|
||||||
|
(vector-set! v j vj-1)
|
||||||
|
(lp j-1))
|
||||||
|
(else j)))))
|
||||||
|
val))))
|
||||||
|
|
||||||
|
;;; Copyright
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;;; This code is
|
||||||
|
;;; Copyright (c) 1998 by Olin Shivers.
|
||||||
|
;;; The terms are: You may do as you please with this code, as long as
|
||||||
|
;;; you do not delete this notice or hold me responsible for any outcome
|
||||||
|
;;; related to its use.
|
||||||
|
;;;
|
||||||
|
;;; Blah blah blah. Don't you think source files should contain more lines
|
||||||
|
;;; of code than copyright notice?
|
||||||
|
|
||||||
|
|
||||||
|
;;; Code tuning & porting
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;;;
|
||||||
|
;;; This code is tightly bummed as far as I can go in portable Scheme.
|
||||||
|
;;;
|
||||||
|
;;; The code can be converted to use unsafe vector-indexing and
|
||||||
|
;;; fixnum-specific arithmetic ops -- the safety checks done on entry
|
||||||
|
;;; to VECTOR-INSERT-SORT and VECTOR-INSERT-SORT! are sufficient to
|
||||||
|
;;; guarantee nothing bad will happen. However, note that if you alter
|
||||||
|
;;; %VECTOR-INSERT-SORT! to use dangerous primitives, you must ensure
|
||||||
|
;;; it is only called from clients that guarantee to observe its
|
||||||
|
;;; preconditions. In the implementation, %VECTOR-INSERT-SORT! is only
|
||||||
|
;;; called from VECTOR-INSERT-SORT! and the quick-sort code in
|
||||||
|
;;; vqsort.scm, and the preconditions are guaranteed for these two
|
||||||
|
;;; clients. This should provide *big* speedups. In fact, all the
|
||||||
|
;;; code bumming I've done pretty much disappears in the noise unless
|
||||||
|
;;; you have a good compiler and also can dump the vector-index checks
|
||||||
|
;;; and generic arithmetic -- so I've really just set things up for
|
||||||
|
;;; you to exploit.
|
||||||
|
;;;
|
||||||
|
;;; If your Scheme has a faster mechanism for handling optional arguments
|
||||||
|
;;; (e.g., Chez), you should definitely port over to it. Note that argument
|
||||||
|
;;; defaulting and error-checking are interleaved -- you don't have to
|
||||||
|
;;; error-check defaulted START/END args to see if they are fixnums that are
|
||||||
|
;;; legal vector indices for the corresponding vector, etc.
|
246
srfi/sorting/vmsort.scm
Normal file
246
srfi/sorting/vmsort.scm
Normal file
|
@ -0,0 +1,246 @@
|
||||||
|
;;; The sort package -- stable vector merge & merge sort -*- Scheme -*-
|
||||||
|
;;; Copyright (c) 1998 by Olin Shivers.
|
||||||
|
;;; This code is open-source; see the end of the file for porting and
|
||||||
|
;;; more copyright information.
|
||||||
|
;;; Olin Shivers 10/98.
|
||||||
|
|
||||||
|
;;; Exports:
|
||||||
|
;;; (vector-merge < v1 v2 [start1 end1 start2 end2]) -> vector
|
||||||
|
;;; (vector-merge! < v v1 v2 [start0 start1 end1 start2 end2]) -> unspecific
|
||||||
|
;;;
|
||||||
|
;;; (vector-merge-sort < v [start end temp]) -> vector
|
||||||
|
;;; (vector-merge-sort! < v [start end temp]) -> unspecific
|
||||||
|
|
||||||
|
|
||||||
|
;;; Merge
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;;; (vector-merge < v1 v2 [start1 end1 start2 end2]) -> vector
|
||||||
|
;;; (vector-merge! < v v1 v2 [start start1 end1 start2 end2]) -> unspecific
|
||||||
|
;;;
|
||||||
|
;;; Stable vector merge -- V1's elements come out ahead of equal V2 elements.
|
||||||
|
|
||||||
|
(define (vector-merge < v1 v2 . maybe-starts+ends)
|
||||||
|
(call-with-values
|
||||||
|
(lambda () (vectors-start+end-2 v1 v2 maybe-starts+ends))
|
||||||
|
(lambda (start1 end1 start2 end2)
|
||||||
|
(let ((ans (make-vector (+ (- end1 start1) (- end2 start2)))))
|
||||||
|
(%vector-merge! < ans v1 v2 0 start1 end1 start2 end2)
|
||||||
|
ans))))
|
||||||
|
|
||||||
|
(define (vector-merge! < v v1 v2 . maybe-starts+ends)
|
||||||
|
(call-with-values
|
||||||
|
(lambda ()
|
||||||
|
(if (pair? maybe-starts+ends)
|
||||||
|
(values (car maybe-starts+ends)
|
||||||
|
(cdr maybe-starts+ends))
|
||||||
|
(values 0
|
||||||
|
'())))
|
||||||
|
(lambda (start rest)
|
||||||
|
(call-with-values
|
||||||
|
(lambda () (vectors-start+end-2 v1 v2 rest))
|
||||||
|
(lambda (start1 end1 start2 end2)
|
||||||
|
(%vector-merge! < v v1 v2 start start1 end1 start2 end2))))))
|
||||||
|
|
||||||
|
|
||||||
|
;;; This routine is not exported. The code is tightly bummed.
|
||||||
|
;;;
|
||||||
|
;;; If these preconditions hold, the routine can be bummed to run with
|
||||||
|
;;; unsafe vector-indexing and fixnum arithmetic ops:
|
||||||
|
;;; - V V1 V2 are vectors.
|
||||||
|
;;; - START START1 END1 START2 END2 are fixnums.
|
||||||
|
;;; - (<= 0 START END0 (vector-length V),
|
||||||
|
;;; where end0 = start + (end1 - start1) + (end2 - start2)
|
||||||
|
;;; - (<= 0 START1 END1 (vector-length V1))
|
||||||
|
;;; - (<= 0 START2 END2 (vector-length V2))
|
||||||
|
;;; If you put these error checks in the two client procedures above, you can
|
||||||
|
;;; safely convert this procedure to use unsafe ops -- which is why it isn't
|
||||||
|
;;; exported. This will provide *huge* speedup.
|
||||||
|
|
||||||
|
(define (%vector-merge! elt< v v1 v2 start start1 end1 start2 end2)
|
||||||
|
(letrec ((vblit (lambda (fromv j i end) ; Blit FROMV[J,END) to V[I,?].
|
||||||
|
(let lp ((j j) (i i))
|
||||||
|
(vector-set! v i (vector-ref fromv j))
|
||||||
|
(let ((j (+ j 1)))
|
||||||
|
(if (< j end) (lp j (+ i 1))))))))
|
||||||
|
|
||||||
|
(cond ((<= end1 start1) (if (< start2 end2) (vblit v2 start2 start end2)))
|
||||||
|
((<= end2 start2) (vblit v1 start1 start end1))
|
||||||
|
|
||||||
|
;; Invariants: I is next index of V to write; X = V1[J]; Y = V2[K].
|
||||||
|
(else (let lp ((i start)
|
||||||
|
(j start1) (x (vector-ref v1 start1))
|
||||||
|
(k start2) (y (vector-ref v2 start2)))
|
||||||
|
(let ((i1 (+ i 1))) ; "i+1" is a complex number in R4RS!
|
||||||
|
(if (elt< y x)
|
||||||
|
(let ((k (+ k 1)))
|
||||||
|
(vector-set! v i y)
|
||||||
|
(if (< k end2)
|
||||||
|
(lp i1 j x k (vector-ref v2 k))
|
||||||
|
(vblit v1 j i1 end1)))
|
||||||
|
(let ((j (+ j 1)))
|
||||||
|
(vector-set! v i x)
|
||||||
|
(if (< j end1)
|
||||||
|
(lp i1 j (vector-ref v1 j) k y)
|
||||||
|
(vblit v2 k i1 end2))))))))))
|
||||||
|
|
||||||
|
|
||||||
|
;;; (vector-merge-sort < v [start end temp]) -> vector
|
||||||
|
;;; (vector-merge-sort! < v [start end temp]) -> unspecific
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;;; Stable natural vector merge sort
|
||||||
|
|
||||||
|
(define (vector-merge-sort! < v . maybe-args)
|
||||||
|
(call-with-values
|
||||||
|
(lambda () (vector-start+end v maybe-args))
|
||||||
|
(lambda (start end)
|
||||||
|
(let ((temp (if (and (pair? maybe-args) ; kludge
|
||||||
|
(pair? (cdr maybe-args))
|
||||||
|
(pair? (cddr maybe-args)))
|
||||||
|
(caddr maybe-args)
|
||||||
|
(vector-copy v))))
|
||||||
|
(%vector-merge-sort! < v start end temp)))))
|
||||||
|
|
||||||
|
(define (vector-merge-sort < v . maybe-args)
|
||||||
|
(call-with-values
|
||||||
|
(lambda () (vector-start+end v maybe-args))
|
||||||
|
(lambda (start end)
|
||||||
|
(let ((ans (vector-copy v start end)))
|
||||||
|
(vector-merge-sort! < ans)
|
||||||
|
ans))))
|
||||||
|
|
||||||
|
|
||||||
|
;;; %VECTOR-MERGE-SORT! is not exported.
|
||||||
|
;;; Preconditions:
|
||||||
|
;;; V TEMP vectors
|
||||||
|
;;; START END fixnums
|
||||||
|
;;; START END legal indices for V and TEMP
|
||||||
|
;;; If these preconditions are ensured by the cover functions, you
|
||||||
|
;;; can safely change this code to use unsafe fixnum arithmetic and vector
|
||||||
|
;;; indexing ops, for *huge* speedup.
|
||||||
|
|
||||||
|
;;; This merge sort is "opportunistic" -- the leaves of the merge tree are
|
||||||
|
;;; contiguous runs of already sorted elements in the vector. In the best
|
||||||
|
;;; case -- an already sorted vector -- it runs in linear time. Worst case
|
||||||
|
;;; is still O(n lg n) time.
|
||||||
|
|
||||||
|
(define (%vector-merge-sort! elt< v0 l r temp0)
|
||||||
|
(define (xor a b) (not (eq? a b)))
|
||||||
|
|
||||||
|
;; Merge v1[l,l+len1) and v2[l+len1,l+len1+len2) into target[l,l+len1+len2)
|
||||||
|
;; Merge left-to-right, so that TEMP may be either V1 or V2
|
||||||
|
;; (that this is OK takes a little bit of thought).
|
||||||
|
;; V2=TARGET? is true if V2 and TARGET are the same, which allows
|
||||||
|
;; merge to punt the final blit half of the time.
|
||||||
|
|
||||||
|
(define (merge target v1 v2 l len1 len2 v2=target?)
|
||||||
|
(letrec ((vblit (lambda (fromv j i end) ; Blit FROMV[J,END) to TARGET[I,?]
|
||||||
|
(let lp ((j j) (i i)) ; J < END. The final copy.
|
||||||
|
(vector-set! target i (vector-ref fromv j))
|
||||||
|
(let ((j (+ j 1)))
|
||||||
|
(if (< j end) (lp j (+ i 1))))))))
|
||||||
|
|
||||||
|
(let* ((r1 (+ l len1))
|
||||||
|
(r2 (+ r1 len2)))
|
||||||
|
; Invariants:
|
||||||
|
(let lp ((n l) ; N is next index of
|
||||||
|
(j l) (x (vector-ref v1 l)) ; TARGET to write.
|
||||||
|
(k r1) (y (vector-ref v2 r1))) ; X = V1[J]
|
||||||
|
(let ((n+1 (+ n 1))) ; Y = V2[K]
|
||||||
|
(if (elt< y x)
|
||||||
|
(let ((k (+ k 1)))
|
||||||
|
(vector-set! target n y)
|
||||||
|
(if (< k r2)
|
||||||
|
(lp n+1 j x k (vector-ref v2 k))
|
||||||
|
(vblit v1 j n+1 r1)))
|
||||||
|
(let ((j (+ j 1)))
|
||||||
|
(vector-set! target n x)
|
||||||
|
(if (< j r1)
|
||||||
|
(lp n+1 j (vector-ref v1 j) k y)
|
||||||
|
(if (not v2=target?) (vblit v2 k n+1 r2))))))))))
|
||||||
|
|
||||||
|
|
||||||
|
;; Might hack GETRUN so that if the run is short it pads it out to length
|
||||||
|
;; 10 with insert sort...
|
||||||
|
|
||||||
|
;; Precondition: l < r.
|
||||||
|
(define (getrun v l r)
|
||||||
|
(let lp ((i (+ l 1)) (x (vector-ref v l)))
|
||||||
|
(if (>= i r)
|
||||||
|
(- i l)
|
||||||
|
(let ((y (vector-ref v i)))
|
||||||
|
(if (elt< y x)
|
||||||
|
(- i l)
|
||||||
|
(lp (+ i 1) y))))))
|
||||||
|
|
||||||
|
;; RECUR: Sort V0[L,L+LEN) for some LEN where 0 < WANT <= LEN <= (R-L).
|
||||||
|
;; That is, sort *at least* WANT elements in V0 starting at index L.
|
||||||
|
;; May put the result into either V0[L,L+LEN) or TEMP0[L,L+LEN).
|
||||||
|
;; Must not alter either vector outside this range.
|
||||||
|
;; Return:
|
||||||
|
;; - LEN -- the number of values we sorted
|
||||||
|
;; - ANSVEC -- the vector holding the value
|
||||||
|
;; - ANS=V0? -- tells if ANSVEC is V0 or TEMP
|
||||||
|
;;
|
||||||
|
;; LP: V[L,L+PFXLEN) holds a sorted prefix of V0.
|
||||||
|
;; TEMP = if V = V0 then TEMP0 else V0. (I.e., TEMP is the other vec.)
|
||||||
|
;; PFXLEN2 is a power of 2 <= PFXLEN.
|
||||||
|
;; Solve RECUR's problem.
|
||||||
|
(if (< l r) ; Don't try to sort an empty range.
|
||||||
|
(call-with-values
|
||||||
|
(lambda ()
|
||||||
|
(let recur ((l l) (want (- r l)))
|
||||||
|
(let ((len (- r l)))
|
||||||
|
(let lp ((pfxlen (getrun v0 l r)) (pfxlen2 1)
|
||||||
|
(v v0) (temp temp0)
|
||||||
|
(v=v0? #t))
|
||||||
|
(if (or (>= pfxlen want) (= pfxlen len))
|
||||||
|
(values pfxlen v v=v0?)
|
||||||
|
(let ((pfxlen2 (let lp ((j pfxlen2))
|
||||||
|
(let ((j*2 (+ j j)))
|
||||||
|
(if (<= j pfxlen) (lp j*2) j))))
|
||||||
|
(tail-len (- len pfxlen)))
|
||||||
|
;; PFXLEN2 is now the largest power of 2 <= PFXLEN.
|
||||||
|
;; (Just think of it as being roughly PFXLEN.)
|
||||||
|
(call-with-values
|
||||||
|
(lambda ()
|
||||||
|
(recur (+ pfxlen l) pfxlen2))
|
||||||
|
(lambda (nr-len nr-vec nrvec=v0?)
|
||||||
|
(merge temp v nr-vec l pfxlen nr-len
|
||||||
|
(xor nrvec=v0? v=v0?))
|
||||||
|
(lp (+ pfxlen nr-len) (+ pfxlen2 pfxlen2)
|
||||||
|
temp v (not v=v0?))))))))))
|
||||||
|
(lambda (ignored-len ignored-ansvec ansvec=v0?)
|
||||||
|
(if (not ansvec=v0?)
|
||||||
|
(vector-copy! v0 l temp0 l r))))))
|
||||||
|
|
||||||
|
|
||||||
|
;;; Copyright
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;;; This code is
|
||||||
|
;;; Copyright (c) 1998 by Olin Shivers.
|
||||||
|
;;; The terms are: You may do as you please with this code, as long as
|
||||||
|
;;; you do not delete this notice or hold me responsible for any outcome
|
||||||
|
;;; related to its use.
|
||||||
|
;;;
|
||||||
|
;;; Blah blah blah. Don't you think source files should contain more lines
|
||||||
|
;;; of code than copyright notice?
|
||||||
|
|
||||||
|
|
||||||
|
;;; Code tuning & porting
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;;; This code is *tightly* bummed as far as I can go in portable Scheme.
|
||||||
|
;;;
|
||||||
|
;;; The two internal primitives that do the real work can be converted to
|
||||||
|
;;; use unsafe vector-indexing and fixnum-specific arithmetic ops *if* you
|
||||||
|
;;; alter the four small cover functions to enforce the invariants. This should
|
||||||
|
;;; provide *big* speedups. In fact, all the code bumming I've done pretty
|
||||||
|
;;; much disappears in the noise unless you have a good compiler and also
|
||||||
|
;;; can dump the vector-index checks and generic arithmetic -- so I've really
|
||||||
|
;;; just set things up for you to exploit.
|
||||||
|
;;;
|
||||||
|
;;; The optional-arg parsing, defaulting, and error checking is done with a
|
||||||
|
;;; portable R4RS macro. But if your Scheme has a faster mechanism (e.g.,
|
||||||
|
;;; Chez), you should definitely port over to it. Note that argument defaulting
|
||||||
|
;;; and error-checking are interleaved -- you don't have to error-check
|
||||||
|
;;; defaulted START/END args to see if they are fixnums that are legal vector
|
||||||
|
;;; indices for the corresponding vector, etc.
|
191
srfi/sorting/vqsort2.scm
Normal file
191
srfi/sorting/vqsort2.scm
Normal file
|
@ -0,0 +1,191 @@
|
||||||
|
;;; The SRFI-32 sort package -- quick sort -*- Scheme -*-
|
||||||
|
;;; Copyright (c) 2002 by Olin Shivers.
|
||||||
|
;;; This code is open-source; see the end of the file for porting and
|
||||||
|
;;; more copyright information.
|
||||||
|
;;; Olin Shivers 2002/7.
|
||||||
|
|
||||||
|
;;; (quick-sort < v [start end]) -> vector
|
||||||
|
;;; (quick-sort! < v [start end]) -> unspecific
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;;; The algorithm is a standard quicksort, but the partition loop is fancier,
|
||||||
|
;;; arranging the vector into a left part that is <, a middle region that is
|
||||||
|
;;; =, and a right part that is > the pivot. Here's how it is done:
|
||||||
|
;;; The partition loop divides the range being partitioned into five
|
||||||
|
;;; subranges:
|
||||||
|
;;; =======<<<<<<<<<?????????>>>>>>>=======
|
||||||
|
;;; where = marks a value that is equal the pivot, < marks a value that
|
||||||
|
;;; is less than the pivot, ? marks a value that hasn't been scanned, and
|
||||||
|
;;; > marks a value that is greater than the pivot. Let's consider the
|
||||||
|
;;; left-to-right scan. If it checks a ? value that is <, it keeps scanning.
|
||||||
|
;;; If the ? value is >, we stop the scan -- we are ready to start the
|
||||||
|
;;; right-to-left scan and then do a swap. But if the rightward scan checks
|
||||||
|
;;; a ? value that is =, we swap it *down* to the end of the initial chunk
|
||||||
|
;;; of ====='s -- we exchange it with the leftmost < value -- and then
|
||||||
|
;;; continue our rightward scan. The leftwards scan works in a similar
|
||||||
|
;;; fashion, scanning past > elements, stopping on a < element, and swapping
|
||||||
|
;;; up = elements. When we are done, we have a picture like this
|
||||||
|
;;; ========<<<<<<<<<<<<>>>>>>>>>>=========
|
||||||
|
;;; Then swap the = elements up into the middle of the vector to get
|
||||||
|
;;; this:
|
||||||
|
;;; <<<<<<<<<<<<=================>>>>>>>>>>
|
||||||
|
;;; Then recurse on the <'s and >'s. Work out all the tricky little
|
||||||
|
;;; boundary cases, and you're done.
|
||||||
|
;;;
|
||||||
|
;;; Other tricks:
|
||||||
|
;;; - This quicksort also makes some effort to pick the pivot well -- it uses
|
||||||
|
;;; the median of three elements as the partition pivot, so pathological n^2
|
||||||
|
;;; run time is much rarer (but not eliminated completely). If you really
|
||||||
|
;;; wanted to get fancy, you could use a random number generator to choose
|
||||||
|
;;; pivots. The key to this trick is that you only need to pick one random
|
||||||
|
;;; number for each *level* of recursion -- i.e. you only need (lg n) random
|
||||||
|
;;; numbers.
|
||||||
|
;;; - After the partition, we *recurse* on the smaller of the two pending
|
||||||
|
;;; regions, then *tail-recurse* (iterate) on the larger one. This guarantees
|
||||||
|
;;; we use no more than lg(n) stack frames, worst case.
|
||||||
|
;;; - There are two ways to finish off the sort.
|
||||||
|
;;; A Recurse down to regions of size 10, then sort each such region using
|
||||||
|
;;; insertion sort.
|
||||||
|
;;; B Recurse down to regions of size 10, then sort *the entire vector*
|
||||||
|
;;; using insertion sort.
|
||||||
|
;;; We do A. Each choice has a cost. Choice A has more overhead to invoke
|
||||||
|
;;; all the separate insertion sorts -- choice B only calls insertion sort
|
||||||
|
;;; once. But choice B will call the comparison function *more times* --
|
||||||
|
;;; it will unnecessarily compare elt 9 of one segment to elt 0 of the
|
||||||
|
;;; following segment. The overhead of choice A is linear in the length
|
||||||
|
;;; of the vector, but *otherwise independent of the algorithm's parameters*.
|
||||||
|
;;; I.e., it's a *fixed*, *small* constant factor. The cost of the extra
|
||||||
|
;;; comparisons made by choice B, however, is dependent on an externality:
|
||||||
|
;;; the comparison function passed in by the client. This can be made
|
||||||
|
;;; arbitrarily bad -- that is, the constant factor *isn't* fixed by the
|
||||||
|
;;; sort algorithm; instead, it's determined by the comparison function.
|
||||||
|
;;; If your comparison function is very, very slow, you want to eliminate
|
||||||
|
;;; every single one that you can. Choice A limits the potential badness,
|
||||||
|
;;; so that is what we do.
|
||||||
|
|
||||||
|
(define (vector-quick-sort! < v . maybe-start+end)
|
||||||
|
(call-with-values
|
||||||
|
(lambda () (vector-start+end v maybe-start+end))
|
||||||
|
(lambda (start end)
|
||||||
|
(%quick-sort! < v start end))))
|
||||||
|
|
||||||
|
(define (vector-quick-sort < v . maybe-start+end)
|
||||||
|
(call-with-values
|
||||||
|
(lambda () (vector-start+end v maybe-start+end))
|
||||||
|
(lambda (start end)
|
||||||
|
(let ((ans (make-vector (- end start))))
|
||||||
|
(vector-portion-copy! ans v start end)
|
||||||
|
(%quick-sort! < ans 0 (- end start))
|
||||||
|
ans))))
|
||||||
|
|
||||||
|
;;; %QUICK-SORT is not exported.
|
||||||
|
;;; Preconditions:
|
||||||
|
;;; V vector
|
||||||
|
;;; START END fixnums
|
||||||
|
;;; 0 <= START, END <= (vector-length V)
|
||||||
|
;;; If these preconditions are ensured by the cover functions, you
|
||||||
|
;;; can safely change this code to use unsafe fixnum arithmetic and vector
|
||||||
|
;;; indexing ops, for *huge* speedup.
|
||||||
|
;;;
|
||||||
|
;;; We bail out to insertion sort for small ranges; feel free to tune the
|
||||||
|
;;; crossover -- it's just a random guess. If you don't have the insertion
|
||||||
|
;;; sort routine, just kill that branch of the IF and change the recursion
|
||||||
|
;;; test to (< 1 (- r l)) -- the code is set up to work that way.
|
||||||
|
|
||||||
|
(define (%quick-sort! elt< v start end)
|
||||||
|
;; Swap the N outer pairs of the range [l,r).
|
||||||
|
(define (swap l r n)
|
||||||
|
(if (> n 0)
|
||||||
|
(let ((x (vector-ref v l))
|
||||||
|
(r-1 (- r 1)))
|
||||||
|
(vector-set! v l (vector-ref v r-1))
|
||||||
|
(vector-set! v r-1 x)
|
||||||
|
(swap (+ l 1) r-1 (- n 1)))))
|
||||||
|
|
||||||
|
;; Choose the median of V[l], V[r], and V[middle] for the pivot.
|
||||||
|
(define (median v1 v2 v3)
|
||||||
|
(call-with-values
|
||||||
|
(lambda () (if (elt< v1 v2) (values v1 v2) (values v2 v1)))
|
||||||
|
(lambda (little big)
|
||||||
|
(if (elt< big v3)
|
||||||
|
big
|
||||||
|
(if (elt< little v3) v3 little)))))
|
||||||
|
|
||||||
|
(let recur ((l start) (r end)) ; Sort the range [l,r).
|
||||||
|
(if (< 10 (- r l)) ; Ten: the gospel according to Sedgewick.
|
||||||
|
|
||||||
|
(let ((pivot (median (vector-ref v l)
|
||||||
|
(vector-ref v (quotient (+ l r) 2))
|
||||||
|
(vector-ref v (- r 1)))))
|
||||||
|
|
||||||
|
;; Everything in these loops is driven by the invariants expressed
|
||||||
|
;; in the little pictures & the corresponding l,i,j,k,m,r indices
|
||||||
|
;; and the associated ranges.
|
||||||
|
|
||||||
|
;; =======<<<<<<<<<?????????>>>>>>>=======
|
||||||
|
;; l i j k m r
|
||||||
|
;; [l,i) [i,j) [j,k] (k,m] (m,r)
|
||||||
|
(letrec ((lscan (lambda (i j k m) ; left-to-right scan
|
||||||
|
(let lp ((i i) (j j))
|
||||||
|
(if (> j k)
|
||||||
|
(done i j m)
|
||||||
|
(let ((x (vector-ref v j)))
|
||||||
|
(cond ((elt< x pivot) (lp i (+ j 1)))
|
||||||
|
|
||||||
|
((elt< pivot x) (rscan i j k m))
|
||||||
|
|
||||||
|
(else ; Equal
|
||||||
|
(if (< i j)
|
||||||
|
(begin (vector-set! v j (vector-ref v i))
|
||||||
|
(vector-set! v i x)))
|
||||||
|
(lp (+ i 1) (+ j 1)))))))))
|
||||||
|
|
||||||
|
;; =======<<<<<<<<<>????????>>>>>>>=======
|
||||||
|
;; l i j k m r
|
||||||
|
;; [l,i) [i,j) j (j,k] (k,m] (m,r)
|
||||||
|
(rscan (lambda (i j k m) ; right-to-left scan
|
||||||
|
(let lp ((k k) (m m))
|
||||||
|
(if (<= k j)
|
||||||
|
(done i j m)
|
||||||
|
(let* ((x (vector-ref v k)))
|
||||||
|
(cond ((elt< pivot x) (lp (- k 1) m))
|
||||||
|
|
||||||
|
((elt< x pivot) ; Swap j & k & lscan.
|
||||||
|
(vector-set! v k (vector-ref v j))
|
||||||
|
(vector-set! v j x)
|
||||||
|
(lscan i (+ j 1) (- k 1) m))
|
||||||
|
|
||||||
|
(else ; x=pivot
|
||||||
|
(if (< k m)
|
||||||
|
(begin (vector-set! v k (vector-ref v m))
|
||||||
|
(vector-set! v m x)))
|
||||||
|
(lp (- k 1) (- m 1)))))))))
|
||||||
|
|
||||||
|
|
||||||
|
;; =======<<<<<<<<<<<<<>>>>>>>>>>>=======
|
||||||
|
;; l i j m r
|
||||||
|
;; [l,i) [i,j) [j,m] (m,r)
|
||||||
|
(done (lambda (i j m)
|
||||||
|
(let ((num< (- j i))
|
||||||
|
(num> (+ 1 (- m j)))
|
||||||
|
(num=l (- i l))
|
||||||
|
(num=r (- (- r m) 1)))
|
||||||
|
(swap l j (min num< num=l)) ; Swap ='s into
|
||||||
|
(swap j r (min num> num=r)) ; the middle.
|
||||||
|
;; Recur on the <'s and >'s. Recurring on the
|
||||||
|
;; smaller range and iterating on the bigger
|
||||||
|
;; range ensures O(lg n) stack frames, worst case.
|
||||||
|
(cond ((<= num< num>)
|
||||||
|
(recur l (+ l num<))
|
||||||
|
(recur (- r num>) r))
|
||||||
|
(else
|
||||||
|
(recur (- r num>) r)
|
||||||
|
(recur l (+ l num<))))))))
|
||||||
|
|
||||||
|
(let ((r-1 (- r 1)))
|
||||||
|
(lscan l l r-1 r-1))))
|
||||||
|
|
||||||
|
;; Small segment => punt to insert sort.
|
||||||
|
;; Use the dangerous subprimitive.
|
||||||
|
(%vector-insert-sort! elt< v l r))))
|
||||||
|
|
||||||
|
|
261
srfi/sorting/vqsort3.scm
Normal file
261
srfi/sorting/vqsort3.scm
Normal file
|
@ -0,0 +1,261 @@
|
||||||
|
;;; The SRFI-32 sort package -- three-way quick sort -*- Scheme -*-
|
||||||
|
;;; Copyright (c) 2002 by Olin Shivers.
|
||||||
|
;;; This code is open-source; see the end of the file for porting and
|
||||||
|
;;; more copyright information.
|
||||||
|
;;; Olin Shivers 2002/7.
|
||||||
|
|
||||||
|
;;; (quick-sort3! c v [start end]) -> unspecific
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;;; Sort vector V[start,end) using three-way comparison function C:
|
||||||
|
;;; (c x y) < 0 => x<y
|
||||||
|
;;; (c x y) = 0 => x=y
|
||||||
|
;;; (c x y) > 0 => x>y
|
||||||
|
;;; That is, C acts as a sort of "subtraction" procedure; using - for the
|
||||||
|
;;; comparison function will cause numbers to be sorted into increasing order.
|
||||||
|
;;;
|
||||||
|
;;; This algorithm is more efficient than standard, two-way quicksort if there
|
||||||
|
;;; are many duplicate items in the data set and the comparison function is
|
||||||
|
;;; relatively expensive (e.g., comparing large strings). It is due to Jon
|
||||||
|
;;; Bentley & Doug McIlroy; I learned it from Bentley.
|
||||||
|
;;;
|
||||||
|
;;; The algorithm is a standard quicksort, but the partition loop is fancier,
|
||||||
|
;;; arranging the vector into a left part that is <, a middle region that is
|
||||||
|
;;; =, and a right part that is > the pivot. Here's how it is done:
|
||||||
|
;;; The partition loop divides the range being partitioned into five
|
||||||
|
;;; subranges:
|
||||||
|
;;; =======<<<<<<<<<?????????>>>>>>>=======
|
||||||
|
;;; where = marks a value that is equal the pivot, < marks a value that
|
||||||
|
;;; is less than the pivot, ? marks a value that hasn't been scanned, and
|
||||||
|
;;; > marks a value that is greater than the pivot. Let's consider the
|
||||||
|
;;; left-to-right scan. If it checks a ? value that is <, it keeps scanning.
|
||||||
|
;;; If the ? value is >, we stop the scan -- we are ready to start the
|
||||||
|
;;; right-to-left scan and then do a swap. But if the rightward scan checks
|
||||||
|
;;; a ? value that is =, we swap it *down* to the end of the initial chunk
|
||||||
|
;;; of ====='s -- we exchange it with the leftmost < value -- and then
|
||||||
|
;;; continue our rightward scan. The leftwards scan works in a similar
|
||||||
|
;;; fashion, scanning past > elements, stopping on a < element, and swapping
|
||||||
|
;;; up = elements. When we are done, we have a picture like this
|
||||||
|
;;; ========<<<<<<<<<<<<>>>>>>>>>>=========
|
||||||
|
;;; Then swap the = elements up into the middle of the vector to get
|
||||||
|
;;; this:
|
||||||
|
;;; <<<<<<<<<<<<=================>>>>>>>>>>
|
||||||
|
;;; Then recurse on the <'s and >'s. Work out all the tricky little
|
||||||
|
;;; boundary cases, and you're done.
|
||||||
|
;;;
|
||||||
|
;;; Other tricks that make this implementation industrial strength:
|
||||||
|
;;; - This quicksort makes some effort to pick the pivot well -- it uses the
|
||||||
|
;;; median of three elements as the partition pivot, so pathological n^2
|
||||||
|
;;; run time is much rarer (but not eliminated completely). If you really
|
||||||
|
;;; wanted to get fancy, you could use a random number generator to choose
|
||||||
|
;;; pivots. The key to this trick is that you only need to pick one random
|
||||||
|
;;; number for each *level* of recursion -- i.e. you only need (lg n) random
|
||||||
|
;;; numbers.
|
||||||
|
;;;
|
||||||
|
;;; - After the partition, we *recurse* on the smaller of the two pending
|
||||||
|
;;; regions, then *tail-recurse* (iterate) on the larger one. This guarantees
|
||||||
|
;;; we use no more than lg(n) stack frames, worst case.
|
||||||
|
;;;
|
||||||
|
;;; - There are two ways to finish off the sort.
|
||||||
|
;;; A. Recurse down to regions of size 10, then sort each such region using
|
||||||
|
;;; insertion sort.
|
||||||
|
;;; B. Recurse down to regions of size 10, then sort *the entire vector*
|
||||||
|
;;; using insertion sort.
|
||||||
|
;;; We do A. Each choice has a cost. Choice A has more overhead to invoke
|
||||||
|
;;; all the separate insertion sorts -- choice B only calls insertion sort
|
||||||
|
;;; once. But choice B will call the comparison function *more times* --
|
||||||
|
;;; it will unnecessarily compare elt 9 of one segment to elt 0 of the
|
||||||
|
;;; following segment. The overhead of choice A is linear in the length
|
||||||
|
;;; of the vector, but *otherwise independent of the algorithm's parameters*.
|
||||||
|
;;; I.e., it's a *fixed*, *small* constant factor. The cost of the extra
|
||||||
|
;;; comparisons made by choice B, however, is dependent on an externality:
|
||||||
|
;;; the comparison function passed in by the client. This can be made
|
||||||
|
;;; arbitrarily bad -- that is, the constant factor *isn't* fixed by the
|
||||||
|
;;; sort algorithm; instead, it's determined by the comparison function.
|
||||||
|
;;; If your comparison function is very, very slow, you want to eliminate
|
||||||
|
;;; every single one that you can. Choice A limits the potential badness,
|
||||||
|
;;; so that is what we do.
|
||||||
|
|
||||||
|
(define (vector-quick-sort3! c v . maybe-start+end)
|
||||||
|
(call-with-values
|
||||||
|
(lambda () (vector-start+end v maybe-start+end))
|
||||||
|
(lambda (start end)
|
||||||
|
(%quick-sort3! c v start end))))
|
||||||
|
|
||||||
|
(define (vector-quick-sort3 c v . maybe-start+end)
|
||||||
|
(call-with-values
|
||||||
|
(lambda () (vector-start+end v maybe-start+end))
|
||||||
|
(lambda (start end)
|
||||||
|
(let ((ans (make-vector (- end start))))
|
||||||
|
(vector-portion-copy! ans v start end)
|
||||||
|
(%quick-sort3! c ans 0 (- end start))
|
||||||
|
ans))))
|
||||||
|
|
||||||
|
;;; %QUICK-SORT3! is not exported.
|
||||||
|
;;; Preconditions:
|
||||||
|
;;; V vector
|
||||||
|
;;; START END fixnums
|
||||||
|
;;; 0 <= START, END <= (vector-length V)
|
||||||
|
;;; If these preconditions are ensured by the cover functions, you
|
||||||
|
;;; can safely change this code to use unsafe fixnum arithmetic and vector
|
||||||
|
;;; indexing ops, for *huge* speedup.
|
||||||
|
;;;
|
||||||
|
;;; We bail out to insertion sort for small ranges; feel free to tune the
|
||||||
|
;;; crossover -- it's just a random guess. If you don't have the insertion
|
||||||
|
;;; sort routine, just kill that branch of the IF and change the recursion
|
||||||
|
;;; test to (< 1 (- r l)) -- the code is set up to work that way.
|
||||||
|
|
||||||
|
(define (%quick-sort3! c v start end)
|
||||||
|
(define (swap l r n) ; Little utility -- swap the N
|
||||||
|
(if (> n 0)
|
||||||
|
(let ((x (vector-ref v l)) ; outer pairs of the range [l,r).
|
||||||
|
(r-1 (- r 1)))
|
||||||
|
(vector-set! v l (vector-ref v r-1))
|
||||||
|
(vector-set! v r-1 x)
|
||||||
|
(swap (+ l 1) r-1 (- n 1)))))
|
||||||
|
|
||||||
|
(define (sort3 v1 v2 v3)
|
||||||
|
(call-with-values
|
||||||
|
(lambda () (if (< (c v1 v2) 0) (values v1 v2) (values v2 v1)))
|
||||||
|
(lambda (little big)
|
||||||
|
(if (< (c big v3) 0)
|
||||||
|
(values little big v3)
|
||||||
|
(if (< (c little v3) 0)
|
||||||
|
(values little v3 big)
|
||||||
|
(values v3 little big))))))
|
||||||
|
|
||||||
|
(define (elt< v1 v2)
|
||||||
|
(negative? (c v1 v2)))
|
||||||
|
|
||||||
|
(let recur ((l start) (r end)) ; Sort the range [l,r).
|
||||||
|
(if (< 10 (- r l)) ; 10: the gospel according to Sedgewick.
|
||||||
|
|
||||||
|
;; Choose the median of V[l], V[r-1], and V[middle] for the pivot.
|
||||||
|
;; We do this by sorting these three elts; call the results LO, PIVOT
|
||||||
|
;; & HI. Put LO, PIVOT & HI where they should go in the vector. We
|
||||||
|
;; will kick off the partition step with one elt (PIVOT) in the left=
|
||||||
|
;; range, one elt (LO) in the < range, one elt (HI) in in the > range
|
||||||
|
;; & no elts in the right= range.
|
||||||
|
(let* ((r-1 (- r 1)) ; Three handy
|
||||||
|
(mid (quotient (+ l r) 2)) ; common
|
||||||
|
(l+1 (+ l 1)) ; subexpressions
|
||||||
|
(pivot (call-with-values
|
||||||
|
(lambda ()
|
||||||
|
(sort3 (vector-ref v l)
|
||||||
|
(vector-ref v mid)
|
||||||
|
(vector-ref v r-1)))
|
||||||
|
(lambda (lo piv hi)
|
||||||
|
(let ((tmp (vector-ref v l+1))) ; Put LO, PIV & HI
|
||||||
|
(vector-set! v l piv) ; back into V
|
||||||
|
(vector-set! v r-1 hi) ; where they belong,
|
||||||
|
(vector-set! v l+1 lo)
|
||||||
|
(vector-set! v mid tmp)
|
||||||
|
piv))))) ; and return PIV as pivot.
|
||||||
|
|
||||||
|
|
||||||
|
;; Everything in these loops is driven by the invariants expressed
|
||||||
|
;; in the little pictures, the corresponding l,i,j,k,m,r indices,
|
||||||
|
;; & the associated ranges.
|
||||||
|
|
||||||
|
;; =======<<<<<<<<<?????????>>>>>>>======= (picture)
|
||||||
|
;; l i j k m r (indices)
|
||||||
|
;; [l,i) [i,j) [j,k] (k,m] (m,r) (ranges )
|
||||||
|
(letrec ((lscan (lambda (i j k m) ; left-to-right scan
|
||||||
|
(let lp ((i i) (j j))
|
||||||
|
(if (> j k)
|
||||||
|
(done i j m)
|
||||||
|
(let* ((x (vector-ref v j))
|
||||||
|
(sign (c x pivot)))
|
||||||
|
(cond ((< sign 0) (lp i (+ j 1)))
|
||||||
|
|
||||||
|
((= sign 0)
|
||||||
|
(if (< i j)
|
||||||
|
(begin (vector-set! v j (vector-ref v i))
|
||||||
|
(vector-set! v i x)))
|
||||||
|
(lp (+ i 1) (+ j 1)))
|
||||||
|
|
||||||
|
((> sign 0) (rscan i j k m))))))))
|
||||||
|
|
||||||
|
;; =======<<<<<<<<<>????????>>>>>>>=======
|
||||||
|
;; l i j k m r
|
||||||
|
;; [l,i) [i,j) j (j,k] (k,m] (m,r)
|
||||||
|
(rscan (lambda (i j k m) ; right-to-left scan
|
||||||
|
(let lp ((k k) (m m))
|
||||||
|
(if (<= k j)
|
||||||
|
(done i j m)
|
||||||
|
(let* ((x (vector-ref v k))
|
||||||
|
(sign (c x pivot)))
|
||||||
|
(cond ((> sign 0) (lp (- k 1) m))
|
||||||
|
|
||||||
|
((= sign 0)
|
||||||
|
(if (< k m)
|
||||||
|
(begin (vector-set! v k (vector-ref v m))
|
||||||
|
(vector-set! v m x)))
|
||||||
|
(lp (- k 1) (- m 1)))
|
||||||
|
|
||||||
|
((< sign 0) ; Swap j & k & lscan.
|
||||||
|
(vector-set! v k (vector-ref v j))
|
||||||
|
(vector-set! v j x)
|
||||||
|
(lscan i (+ j 1) (- k 1) m))))))))
|
||||||
|
|
||||||
|
;; =======<<<<<<<<<<<<<>>>>>>>>>>>=======
|
||||||
|
;; l i j m r
|
||||||
|
;; [l,i) [i,j) [j,m] (m,r)
|
||||||
|
(done (lambda (i j m)
|
||||||
|
(let ((num< (- j i))
|
||||||
|
(num> (+ 1 (- m j)))
|
||||||
|
(num=l (- i l))
|
||||||
|
(num=r (- (- r m) 1)))
|
||||||
|
(swap l j (min num< num=l)) ; Swap ='s into
|
||||||
|
(swap j r (min num> num=r)) ; the middle.
|
||||||
|
;; Recur on the <'s and >'s. Recurring on the
|
||||||
|
;; smaller range and iterating on the bigger
|
||||||
|
;; range ensures O(lg n) stack frames, worst case.
|
||||||
|
(cond ((<= num< num>)
|
||||||
|
(recur l (+ l num<))
|
||||||
|
(recur (- r num>) r))
|
||||||
|
(else
|
||||||
|
(recur (- r num>) r)
|
||||||
|
(recur l (+ l num<))))))))
|
||||||
|
|
||||||
|
;; To repeat: We kick off the partition step with one elt (PIVOT)
|
||||||
|
;; in the left= range, one elt (LO) in the < range, one elt (HI)
|
||||||
|
;; in the > range & no elts in the right= range.
|
||||||
|
(lscan l+1 (+ l 2) (- r 2) r-1)))
|
||||||
|
|
||||||
|
;; Small segment => punt to insert sort.
|
||||||
|
;; Use the dangerous subprimitive.
|
||||||
|
(%vector-insert-sort! elt< v l r))))
|
||||||
|
|
||||||
|
;;; Copyright
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;;; This code is
|
||||||
|
;;; Copyright (c) 1998 by Olin Shivers.
|
||||||
|
;;; The terms are: You may do as you please with this code, as long as
|
||||||
|
;;; you do not delete this notice or hold me responsible for any outcome
|
||||||
|
;;; related to its use.
|
||||||
|
;;;
|
||||||
|
;;; Blah blah blah.
|
||||||
|
|
||||||
|
|
||||||
|
;;; Code tuning & porting
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;;; - The quicksort recursion bottoms out in a call to an insertion sort
|
||||||
|
;;; routine, %INSERT-SORT!. But you could even punt this and go with pure
|
||||||
|
;;; recursion in a pinch.
|
||||||
|
;;;
|
||||||
|
;;; This code is *tightly* bummed as far as I can go in portable Scheme.
|
||||||
|
;;;
|
||||||
|
;;; The internal primitive %QUICK-SORT! that does the real work can be
|
||||||
|
;;; converted to use unsafe vector-indexing and fixnum-specific arithmetic ops
|
||||||
|
;;; *if* you alter the two small cover functions to enforce the invariants.
|
||||||
|
;;; This should provide *big* speedups. In fact, all the code bumming I've
|
||||||
|
;;; done pretty much disappears in the noise unless you have a good compiler
|
||||||
|
;;; and also can dump the vector-index checks and generic arithmetic -- so
|
||||||
|
;;; I've really just set things up for you to exploit.
|
||||||
|
;;;
|
||||||
|
;;; The optional-arg parsing, defaulting, and error checking is done with a
|
||||||
|
;;; portable R4RS macro. But if your Scheme has a faster mechanism (e.g.,
|
||||||
|
;;; Chez), you should definitely port over to it. Note that argument defaulting
|
||||||
|
;;; and error-checking are interleaved -- you don't have to error-check
|
||||||
|
;;; defaulted START/END args to see if they are fixnums that are legal vector
|
||||||
|
;;; indices for the corresponding vector, etc.
|
Loading…
Add table
Reference in a new issue