chibi-scheme/lib/chibi/memoize.scm

356 lines
14 KiB
Scheme

;; memoize.scm -- caching and memoization utilities
;; Copyright (c) 2003-2013 Alex Shinn. All rights reserved.
;; BSD-style license: http://synthcode.com/license.txt
;;> Memory and persistent caching with various levels of control, based
;;> on a combination of lru-cache from Hato and an older memoization
;;> library for Gauche.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; types
(define-record-type Lru-Cache
(%make-lru-cache table front back size size-limit compute-size)
lru-cache?
(table lru-table)
(front lru-front lru-front-set!)
(back lru-back lru-back-set!)
(size lru-size lru-size-set!)
(size-limit lru-size-limit)
(compute-size lru-compute-size))
(define-record-type Lru-Entry
(make-lru-entry key value size prev)
lru-entry?
(key lru-entry-key)
(value lru-entry-value lru-entry-value-set!)
(size lru-entry-size lru-entry-size-set!)
(prev lru-entry-prev lru-entry-prev-set!))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; high-level interface
;;> Analagous to the procedure form of \scheme{define} but
;;> automatically memoizes the function. Uses \scheme{equal?} for
;;> equality comparisons and reasonable defaults - for finer grained
;;> control use \scheme{memoize}.
(define-syntax define-memoized
(syntax-rules ()
((define-memoized (proc x ...) . body)
(define proc
(make-memoizer (lambda (x ...) . body) (length '(x ...)) (make-lru-cache))))
((define-memoized (proc . x) . body)
(define proc
(make-memoizer (lambda x . body) #f (make-lru-cache ))))))
(define (make-memoizer proc arity cache)
(let ((ref! (if (lru-cache? cache) lru-ref! hash-table-ref!)))
(case arity
((0)
proc)
((1)
(lambda (x) (ref! cache x proc)))
((2)
(lambda (x y)
(ref! cache (cons x y) (lambda (xy) (proc (car xy) (cdr xy))))))
(else
(lambda args
(ref! cache args (lambda (args) (apply proc args))))))))
;;> Returns a memoized version of the procedure \var{proc}. By
;;> default uses a least-recently-used (LRU) cache, which can be tuned
;;> with the following keyword arguments:
;;>
;;> \items[
;;> \item{cache: an explicit pre-existing cache (LRU or hash-table)}
;;> \item{equal: an equality predicate defaulting to \scheme{equal?}}
;;> \item{hash: a hash function to match the equality predicate, defaulting to \scheme{hash} from \scheme{(srfi 69)}}
;;> \item{init-size: a hint for the initial size of the backing hash table}
;;> \item{size-limit: the maximum size of the cache}
;;> \item{compute-size: compute the size of a cache entry}
;;> ]
;;>
;;> \var{compute-size} is a procedure of two arguments, the key and
;;> value to be stored, and defaults to a constant 1 per entry. After
;;> every insertion the oldest elements will be removed until the size
;;> is under \var{size-limit}. You may find
;;>
;;> \scheme{(lambda (k v) (+ (object-size k) (object-size v)))}
;;>
;;> using \scheme{object-size} from \scheme{(chibi ast)} to be a
;;> useful \var{compute-size}.
;;>
;;> If \var{size-limit} is \scheme{#f} then the cache is unlimited,
;;> and a simple hash-table will be used in place of an LRU cache.
(define (memoize proc . o)
(let-keywords* o
((equal equal: equal?)
(hash hash: hash)
(arity arity: (and (not (procedure-variadic? proc))
(procedure-arity proc)))
(init-size init-size: 31)
(limit size-limit: 1000)
(compute-size compute-size: (lambda (k v) 1))
(cache-init cache: '()))
(let ((cache (cond ((lru-cache? cache-init)
cache-init)
(limit
(make-lru-cache 'equal: equal
'hash: hash
'init-size: init-size
'size-limit: limit
'compute-size: compute-size))
(else
(make-hash-table equal hash)))))
;; allow an alist initializer for the cache
(if (pair? cache-init)
(for-each (lambda (x) (lru-add! cache (car x) (cdr x)))
cache-init))
(make-memoizer proc arity cache))))
;;> Equivalent to memoize except that the procedure's first argument
;;> must be a pathname. If the corresponding file has been modified
;;> since the memoized value, the value is recomputed. Useful to
;;> automatically reflect external changes to a file-backed resource.
;;> The additional keyword argument \scheme{reloader?:}, if true,
;;> indicates that the result of loading is itself a procedure which
;;> should check for updates on each call.
(define (memoize-file-loader proc . o)
(let* ((f (lambda (file . rest)
(let ((mtime (file-modification-time/safe file)))
(cons mtime (apply proc file rest)))))
(g (apply memoize f o))
(reloader? (cond ((memq 'reloader?: o) => cdr) (else #f))))
(lambda (file . rest)
(let ((cell (apply g file rest)))
(let-syntax ((update!
(syntax-rules ()
((update! default)
(let ((mtime (file-modification-time/safe file)))
(if (and mtime
(or (not (car cell))
(> mtime (car cell))))
(let ((res (apply proc file rest)))
(set-car! cell mtime)
(set-cdr! cell res)
res)
default))))))
(update! (if (and reloader? (procedure? (cdr cell)))
(lambda args (apply (update! (cdr cell)) args))
(cdr cell))))))))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; persistent memoization
(define (get-memo-directory proc-name)
(or (get-environment-variable "MEMOIZE_DIR")
(if (i-am-root?)
(make-path "/var/run/memo.d" proc-name)
(make-path (or (get-environment-variable "HOME") ".")
".memo.d" proc-name))))
(define (encode-file-name str)
(define (file-name-safe-char? ch)
(or (char-alphabetic? ch) (char-numeric? ch)
(memv ch '(#\_ #\- #\+ #\. #\,))))
(define (encode-char ch)
(let* ((i (char->integer ch))
(hex (number->string i 16)))
(if (< i 16)
(string-append "%0" hex)
(string-append "%" hex))))
(define (collect str from to res)
(if (string-cursor>=? from to)
res
(cons (substring-cursor str from to) res)))
(let ((start (string-cursor-start str))
(end (string-cursor-end str)))
(let lp ((from start) (to start) (res '()))
(if (string-cursor>=? to end)
(if (string-cursor=? from start)
str
(string-concatenate (reverse (collect str from to res))))
(let* ((ch (string-cursor-ref str to))
(next (string-cursor-next str to)))
(if (file-name-safe-char? ch)
(lp from next res)
(lp next next (cons (encode-char ch)
(collect str from to res)))))))))
(define (default-args-encoder args)
(let ((out (open-output-string)))
(write/ss args out)
(encode-file-name (string-append (get-output-string out) ".memo"))))
;;> Returns a memoized version of the procedure \var{proc} which
;;> stores the memoized results persistently in a file. Garbage
;;> collection of the files is left as an external task for monitoring
;;> tools or cron jobs.
;;>
;;> Accepts the following keyword arguments:
;;>
;;> \items[
;;> \item{args-encoder: procedure which takes the arguments as a single list, and returns a string representation suitable for use as a (base) file name}
;;> \item{proc-name: the name of the procedure, to use a a subdir of memo-dir to distinguish from other memoized procedures}
;;> \item{memo-dir: the directory to store results in, defaulting to ~/.memo/}
;;> \item{file-validator: validator to run on the existing file - if it returns false, the file is considered bad and the result recomputed}
;;> \item{validator: validator to run on the result of reading the file}
;;> \item{read: the read procedure to extract the result from the file}
;;> \item{write: the write procedure to write the result to the file}
;;> ]
(define (memoize-to-file proc . o)
(let-keywords* o
((args-encoder args-encoder: default-args-encoder)
(proc-name proc-name: (or (procedure-name proc) "lambda"))
(memo-dir memo-dir: (get-memo-directory proc-name))
(file-validator file-validator: (lambda args #t))
(validator validator: (lambda args #t))
(read read: read/ss)
(write write: write/ss))
(lambda args
(let ((file (make-path memo-dir (args-encoder args))))
(define (compute)
(let ((res (apply proc args)))
(create-directory* (path-directory file))
(call-with-output-file file
(lambda (out) (write res out)))
res))
(if (and (file-exists? file)
(apply file-validator file args))
(let ((res (call-with-input-file file read)))
(if (validator res)
res
(compute)))
(compute))))))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; low-level utilities
;;> Creates a new empty LRU object. The same keyword arguments as in
;;> \scheme{memoize} are available, except of course for \var{cache}.
(define (make-lru-cache . o)
(let-keywords* o ((equal equal: equal?)
(hash hash: hash)
(init-size init-size: 31)
(compute-size compute-size: (lambda (k v) 1))
(size-limit size-limit: 1000))
(let ((tab (make-hash-table equal hash))) ; init-size
(%make-lru-cache tab '() '() 0 size-limit compute-size))))
;; add entry to the back of the queue
(define (lru-enq! lru entry)
(let ((cell (list entry)))
(if (null? (lru-front lru)) ; empty
(lru-front-set! lru cell)
(set-cdr! (lru-back lru) cell))
(lru-back-set! lru cell)))
;; pop the front of the queue
(define (lru-deq! lru)
(let ((cell (lru-front lru)))
(if (null? cell)
(error "lru queue is empty")
(let ((rest (cdr cell)))
(lru-front-set! lru rest)
(if (null? rest)
(lru-back-set! lru '()))
(car cell)))))
;; shift the given entry, anywhere in the queue, to the end
(define (lru-shift-to-last! lru entry)
(let ((prev (lru-entry-prev entry))
(last-pair (lru-back lru)))
(cond
((null? prev)
;; first entry, just pop and re-queue it, and update prev pointers
(lru-enq! lru (lru-deq! lru))
(lru-entry-prev-set! entry last-pair)
(lru-entry-prev-set! (car (lru-front lru)) '()))
((eq? (cdr prev) last-pair)
;; already at the end, nothing to do
)
(else
;; a middle element, splice it out and re-queue
(let ((cell (cdr prev)))
(set-cdr! prev (cdr cell)) ; splice out
(if (pair? (cdr cell))
(lru-entry-prev-set! (cadr cell) prev))
(lru-enq! lru entry) ; reinsert at end
(lru-entry-prev-set! entry last-pair))))))
(define (lru-shrink! lru)
(let ((size-limit (lru-size-limit lru))
(size (lru-size lru)))
(if (> size size-limit)
(let lp ((size size))
(if (> size size-limit)
(let ((x (lru-deq! lru)))
(let ((next (lru-front lru)))
(if (pair? next)
(lru-entry-prev-set! (car next) '())))
(hash-table-delete! (lru-table lru) (lru-entry-key x))
(lp (- size (lru-entry-size x))))
(lru-size-set! lru size))))))
;;> Looks up \var{key} in the cache LRU. If not found returns #f,
;;> unless \var{compute} is given in which case \var{compute} is
;;> applied to \var{key} to determine the return value. This does not
;;> update the cache.
(define (lru-ref lru key . o)
(let ((entry (hash-table-ref/default (lru-table lru) key #f)))
(cond (entry
(lru-shift-to-last! lru entry)
(lru-entry-value entry))
((pair? o)
((car o) key))
(else
(error "no lru entry for" key)))))
;;> Identical to lru-ref except that it updates the cache on a miss.
(define (lru-ref! lru key compute)
(cond ((hash-table-ref/default (lru-table lru) key #f)
=> (lambda (entry)
(lru-shift-to-last! lru entry)
(lru-entry-value entry)))
(else
(let ((value (compute key)))
(lru-add! lru key value)
value))))
(define (lru-add! lru key value)
(let* ((size ((lru-compute-size lru) key value))
(last-pair (lru-back lru))
(entry (make-lru-entry key value size last-pair)))
(hash-table-set! (lru-table lru) key entry)
(lru-enq! lru entry)
(lru-size-set! lru (+ size (lru-size lru)))
(lru-shrink! lru)))
;;> Directly set a value in the cache.
(define (lru-set! lru key value)
(let ((entry (hash-table-ref/default (lru-table lru) key #f)))
(cond (entry
(lru-shift-to-last! lru entry)
(lru-entry-value-set! entry value)
(let ((prev-size (lru-entry-size entry))
(size ((lru-compute-size lru) key value)))
(lru-entry-size-set! entry size)
(lru-size-set! lru (+ (lru-size lru) (- size prev-size)))))
(else
(lru-add! lru key value)))
(lru-shrink! lru)))
(define (hash-table-ref! table key proc)
(hash-table-ref table key
(lambda ()
(let ((res (proc key)))
(hash-table-set! table key res)
res))))