From 75cf2d7331c6c538ba3840dee2df148d3d7a5240 Mon Sep 17 00:00:00 2001 From: Alex Shinn Date: Sun, 1 Sep 2013 23:08:33 +0900 Subject: [PATCH] Adding memoization library. --- lib/chibi/memoize.scm | 297 ++++++++++++++++++++++++++++++++++++++++ lib/chibi/memoize.sld | 17 +++ tests/memoize-tests.scm | 49 +++++++ 3 files changed, 363 insertions(+) create mode 100644 lib/chibi/memoize.scm create mode 100644 lib/chibi/memoize.sld create mode 100644 tests/memoize-tests.scm diff --git a/lib/chibi/memoize.scm b/lib/chibi/memoize.scm new file mode 100644 index 00000000..fc37d960 --- /dev/null +++ b/lib/chibi/memoize.scm @@ -0,0 +1,297 @@ +;; memoize.scm -- caching and memoization utilities +;; Copyright (c) 2003-2013 Alex Shinn. All rights reserved. +;; BSD-style license: http://synthcode.com/license.txt + +;; Memory and persistent caching with various levels of control, based +;; on a combination of lru-cache from Hato and an older memoization +;; library for Gauche. + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; types + +(define-record-type Lru-Cache + (%make-lru-cache table front back size size-limit compute-size) + lru-cache? + (table lru-table) + (front lru-front lru-front-set!) + (back lru-back lru-back-set!) + (size lru-size lru-size-set!) + (size-limit lru-size-limit) + (compute-size lru-compute-size)) + +(define-record-type Lru-Entry + (make-lru-entry key value size prev) + lru-entry? + (key lru-entry-key) + (value lru-entry-value lru-entry-value-set!) + (size lru-entry-size lru-entry-size-set!) + (prev lru-entry-prev lru-entry-prev-set!)) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; high-level interface + +;;> Analagous to the procedure form of \scheme{define} but +;;> automatically memoizes the function. Uses \scheme{equal?} for +;;> equality comparisons and reasonable defaults - for finer grained +;;> control use \scheme{memoize}. + +(define-syntax define-memoized + (syntax-rules () + ((define-memoized (proc x ...) . body) + (define proc + (make-memoizer (lambda (x ...) . body) (length '(x ...)) (make-lru-cache)))) + ((define-memoized (proc . x) . body) + (define proc + (make-memoizer (lambda x . body) #f (make-lru-cache )))))) + +(define (make-memoizer proc arity cache) + (let ((ref! (if (lru-cache? cache) lru-ref! hash-table-ref!))) + (case arity + ((0) + proc) + ((1) + (lambda (x) (ref! cache x proc))) + ((2) + (lambda (x y) + (ref! cache (cons x y) (lambda (xy) (proc (car xy) (cdr xy)))))) + (else + (lambda args + (ref! cache args (lambda (args) (apply proc args)))))))) + +;;> Returns a memoized version of the procedure \var{proc}. By +;;> default uses a least-recently-used (LRU) cache, which can be tuned +;;> with the following keyword arguments: +;;> +;;> \items[ +;;> \item{cache: an explicit pre-existing cache (LRU or hash-table)} +;;> \item{equal: an equality predicate defaulting to \scheme{equal?}} +;;> \item{hash: a hash function to match the equality predicate, defaulting to \scheme{hash} from \scheme{(srfi 69)}} +;;> \item{init-size: a hint for the initial size of the backing hash table} +;;> \item{size-limit: the maximum size of the cache} +;;> \item{compute-size: compute the size of a cache entry} +;;> ] +;;> +;;> \var{compute-size} is a procedure of two arguments, the key and +;;> value to be stored, and defaults to a constant 1 per entry. After +;;> every insertion the oldest elements will be removed until the size +;;> is under \var{size-limit}. You may find +;;> +;;> \scheme{(lambda (k v) (+ (object-size k) (object-size v)))} +;;> +;;> using \scheme{object-size} from \scheme{(chibi ast)} to be a +;;> useful \var{compute-size}. +;;> +;;> If \var{size-limit} is \scheme{#f} then the cache is unlimited, +;;> and a simple hash-table will be used in place of an LRU cache. + +(define (memoize proc . o) + (let-keywords* o + ((equal equal: equal?) + (hash hash: hash) + (arity arity: (and (not (procedure-variadic? proc)) + (procedure-arity proc))) + (init-size init-size: 31) + (limit size-limit: 1000) + (compute-size compute-size: (lambda (k v) 1)) + (cache cache: (if limit + (make-lru-cache 'equal: equal + 'hash: hash + 'init-size: init-size + 'size-limit: limit + 'compute-size: compute-size) + (make-hash-table equal hash)))) + (make-memoizer proc arity cache))) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; persistent memoization + +(define (get-memo-directory proc-name) + (let ((uid (current-user-id))) + (if (zero? uid) + (make-path "/var/run/memo.d" proc-name) + (make-path (user-home (user-information uid)) ".memo.d" proc-name)))) + +(define (encode-file-name str) + (define (file-name-safe-char? ch) + (or (char-alphabetic? ch) (char-numeric? ch) + (memv ch '(#\_ #\- #\+ #\. #\,)))) + (define (encode-char ch) + (let* ((i (char->integer ch)) + (hex (number->string i 16))) + (if (< i 16) + (string-append "%0" hex) + (string-append "%" hex)))) + (define (collect str from to res) + (if (>= from to) + res + (cons (substring-cursor str from to) res))) + (let ((end (string-cursor-end str))) + (let lp ((from 0) (to 0) (res '())) + (if (string-cursor>=? to end) + (if (zero? from) + str + (string-concatenate (reverse (collect str from to res)))) + (let* ((ch (string-cursor-ref str to)) + (next (string-cursor-next str to))) + (if (file-name-safe-char? ch) + (lp from next res) + (lp next next (cons (encode-char ch) + (collect str from to res))))))))) + +(define (default-args-encoder args) + (encode-file-name + (string-append (call-with-output-string (lambda (out) (write/ss args out))) + ".memo"))) + +(define (memoize-to-file proc . o) + (let-keywords* o + ((args-encoder args-encoder: default-args-encoder) + (proc-name proc-name: (or (procedure-name proc) "lambda")) + (memo-dir memo-dir: (get-memo-directory proc-name)) + (file-validator file-validator: (lambda args #t)) + (validator validator: (lambda args #t)) + (read read: read/ss) + (write write: write/ss)) + (lambda args + (let ((file (make-path memo-dir (apply args-encoder args)))) + (define (compute) + (let ((res (apply proc args))) + (create-directory* (path-directory file)) + (call-with-output-file file + (lambda (out) (write res out))) + res)) + (if (and (file-exists? file) + (apply file-validator file args)) + (let ((res (call-with-input-file file read))) + (if (validator res) + res + (compute))) + (compute)))))) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; low-level utilities + +;;> Creates a new empty LRU object. The same keyword arguments as in +;;> \scheme{memoize} are available, except of course for \var{cache}. + +(define (make-lru-cache . o) + (let-keywords* o ((equal equal: equal?) + (hash hash: hash) + (init-size init-size: 31) + (compute-size compute-size: (lambda (k v) 1)) + (size-limit size-limit: 1000)) + (let ((tab (make-hash-table equal hash init-size))) + (%make-lru-cache tab '() '() 0 size-limit compute-size)))) + +;; add entry to the back of the queue +(define (lru-enq! lru entry) + (let ((cell (list entry))) + (if (null? (lru-front lru)) ; empty + (lru-front-set! lru cell) + (set-cdr! (lru-back lru) cell)) + (lru-back-set! lru cell))) + +;; pop the front of the queue +(define (lru-deq! lru) + (let ((cell (lru-front lru))) + (if (null? cell) + (error "lru queue is empty") + (let ((rest (cdr cell))) + (lru-front-set! lru rest) + (if (null? rest) + (lru-back-set! lru '())) + (car cell))))) + +;; shift the given entry, anywhere in the queue, to the end +(define (lru-shift-to-last! lru entry) + (let ((prev (lru-entry-prev entry)) + (last-pair (lru-back lru))) + (cond + ((null? prev) + ;; first entry, just pop and re-queue it, and update prev pointers + (lru-enq! lru (lru-deq! lru)) + (lru-entry-prev-set! entry last-pair) + (lru-entry-prev-set! (car (lru-front lru)) '())) + ((eq? (cdr prev) last-pair) + ;; already at the end, nothing to do + ) + (else + ;; a middle element, splice it out and re-queue + (let ((cell (cdr prev))) + (set-cdr! prev (cdr cell)) ; splice out + (if (pair? (cdr cell)) + (lru-entry-prev-set! (cadr cell) prev)) + (lru-enq! lru entry) ; reinsert at end + (lru-entry-prev-set! entry last-pair)))))) + +(define (lru-shrink! lru) + (let ((size-limit (lru-size-limit lru)) + (size (lru-size lru))) + (if (> size size-limit) + (let lp ((size size)) + (if (> size size-limit) + (let ((x (lru-deq! lru))) + (let ((next (lru-front lru))) + (if (pair? next) + (lru-entry-prev-set! (car next) '()))) + (hash-table-delete! (lru-table lru) (lru-entry-key x)) + (lp (- size (lru-entry-size x)))) + (lru-size-set! lru size)))))) + +;;> Looks up \var{key} in the cache LRU. If not found returns #f, +;;> unless \var{compute} is given in which case \var{compute} is +;;> applied to \var{key} to determine the return value. This does not +;;> update the cache. + +(define (lru-ref lru key . o) + (let ((entry (hash-table-ref/default (lru-table lru) key #f))) + (cond (entry + (lru-shift-to-last! lru entry) + (lru-entry-value entry)) + ((pair? o) + ((car o) key)) + (else + (error "no lru entry for" key))))) + +;;> Identical to lru-ref except that it updates the cache on a miss. + +(define (lru-ref! lru key compute) + (cond ((hash-table-ref/default (lru-table lru) key #f) + => (lambda (entry) + (lru-shift-to-last! lru entry) + (lru-entry-value entry))) + (else + (let ((value (compute key))) + (lru-add! lru key value) + value)))) + +(define (lru-add! lru key value) + (let* ((size ((lru-compute-size lru) key value)) + (last-pair (lru-back lru)) + (entry (make-lru-entry key value size last-pair))) + (hash-table-set! (lru-table lru) key entry) + (lru-enq! lru entry) + (lru-size-set! lru (+ size (lru-size lru))) + (lru-shrink! lru))) + +;;> Directly set a value in the cache. + +(define (lru-set! lru key value) + (let ((entry (hash-table-ref/default (lru-table lru) key #f))) + (cond (entry + (lru-shift-to-last! lru entry) + (lru-entry-value-set! entry value) + (let ((prev-size (lru-entry-size entry)) + (size ((lru-compute-size lru) key value))) + (lru-entry-size-set! entry size) + (lru-size-set! lru (+ (lru-size lru) (- size prev-size))))) + (else + (lru-add! lru key value))) + (lru-shrink! lru))) + +(define (hash-table-ref! table key proc) + (hash-table-ref table key + (lambda () + (let ((res (proc key))) + (hash-table-set! table key res) + res)))) diff --git a/lib/chibi/memoize.sld b/lib/chibi/memoize.sld new file mode 100644 index 00000000..09802d98 --- /dev/null +++ b/lib/chibi/memoize.sld @@ -0,0 +1,17 @@ + +(define-library (chibi memoize) + (import (chibi) (chibi optional) (chibi time) (chibi io) + (chibi pathname) (chibi filesystem) (chibi system) + (srfi 9) (srfi 38) (srfi 69)) + (cond-expand + (chibi + (import (chibi ast))) + (else + (begin + (define (procedure-name x) #f) + (define (procedure-arity x) #f) + (define (procedure-variadic? x) #f)))) + (export define-memoized memoize memoize-to-file + make-lru-cache lru-cache? lru-ref lru-ref! lru-set! + hash-table-ref!) + (include "memoize.scm")) diff --git a/tests/memoize-tests.scm b/tests/memoize-tests.scm new file mode 100644 index 00000000..21504139 --- /dev/null +++ b/tests/memoize-tests.scm @@ -0,0 +1,49 @@ + +(import (chibi) (chibi memoize) (chibi filesystem) (chibi test)) + +(test-begin "memoize") + +(define-memoized (fib n) + (if (<= n 1) + 1 + (+ (fib (- n 1)) (fib (- n 2))))) + +(test 1 (fib 1)) +(test 573147844013817084101 (fib 100)) + +(define-memoized (ack m n) + (cond + ((= m 0) (+ n 1)) + ((= n 0) (ack (- m 1) 1)) + (else (ack (- m 1) (ack m (- n 1)))))) + +(test 29 (ack 3 2)) +(test 61 (ack 3 3)) + +(let ((n 0)) + (let ((f (memoize (lambda (x) (set! n (+ n 1)) (* x x))))) + (test 0 n) + (test 9 (f 3)) + (test 1 n) + (test 9 (f 3)) + (test 1 n))) + +(let ((n 0)) + (let ((f (memoize (lambda (x) (set! n (+ n 1)) (* x x)) + 'size-limit: #f))) + (test 0 n) + (test 9 (f 3)) + (test 1 n) + (test 9 (f 3)) + (test 1 n))) + +(letrec ((fib (lambda (n) + (if (<= n 1) + 1 + (+ (fib (- n 1)) (fib (- n 2))))))) + (let ((f (memoize-to-file fib 'memo-dir: "/tmp/memo.d/"))) + (test 89 (f 10)) + (test-assert (file-exists? "/tmp/memo.d/10.memo")) + (test 89 (f 10)))) + +(test-end)