mirror of
https://github.com/ashinn/chibi-scheme.git
synced 2025-05-18 21:29:19 +02:00
356 lines
14 KiB
Scheme
356 lines
14 KiB
Scheme
;; memoize.scm -- caching and memoization utilities
|
|
;; Copyright (c) 2003-2013 Alex Shinn. All rights reserved.
|
|
;; BSD-style license: http://synthcode.com/license.txt
|
|
|
|
;;> Memory and persistent caching with various levels of control, based
|
|
;;> on a combination of lru-cache from Hato and an older memoization
|
|
;;> library for Gauche.
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
;; types
|
|
|
|
(define-record-type Lru-Cache
|
|
(%make-lru-cache table front back size size-limit compute-size)
|
|
lru-cache?
|
|
(table lru-table)
|
|
(front lru-front lru-front-set!)
|
|
(back lru-back lru-back-set!)
|
|
(size lru-size lru-size-set!)
|
|
(size-limit lru-size-limit)
|
|
(compute-size lru-compute-size))
|
|
|
|
(define-record-type Lru-Entry
|
|
(make-lru-entry key value size prev)
|
|
lru-entry?
|
|
(key lru-entry-key)
|
|
(value lru-entry-value lru-entry-value-set!)
|
|
(size lru-entry-size lru-entry-size-set!)
|
|
(prev lru-entry-prev lru-entry-prev-set!))
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
;; high-level interface
|
|
|
|
;;> Analagous to the procedure form of \scheme{define} but
|
|
;;> automatically memoizes the function. Uses \scheme{equal?} for
|
|
;;> equality comparisons and reasonable defaults - for finer grained
|
|
;;> control use \scheme{memoize}.
|
|
|
|
(define-syntax define-memoized
|
|
(syntax-rules ()
|
|
((define-memoized (proc x ...) . body)
|
|
(define proc
|
|
(make-memoizer (lambda (x ...) . body) (length '(x ...)) (make-lru-cache))))
|
|
((define-memoized (proc . x) . body)
|
|
(define proc
|
|
(make-memoizer (lambda x . body) #f (make-lru-cache ))))))
|
|
|
|
(define (make-memoizer proc arity cache)
|
|
(let ((ref! (if (lru-cache? cache) lru-ref! hash-table-ref!)))
|
|
(case arity
|
|
((0)
|
|
proc)
|
|
((1)
|
|
(lambda (x) (ref! cache x proc)))
|
|
((2)
|
|
(lambda (x y)
|
|
(ref! cache (cons x y) (lambda (xy) (proc (car xy) (cdr xy))))))
|
|
(else
|
|
(lambda args
|
|
(ref! cache args (lambda (args) (apply proc args))))))))
|
|
|
|
;;> Returns a memoized version of the procedure \var{proc}. By
|
|
;;> default uses a least-recently-used (LRU) cache, which can be tuned
|
|
;;> with the following keyword arguments:
|
|
;;>
|
|
;;> \items[
|
|
;;> \item{cache: an explicit pre-existing cache (LRU or hash-table)}
|
|
;;> \item{equal: an equality predicate defaulting to \scheme{equal?}}
|
|
;;> \item{hash: a hash function to match the equality predicate, defaulting to \scheme{hash} from \scheme{(srfi 69)}}
|
|
;;> \item{init-size: a hint for the initial size of the backing hash table}
|
|
;;> \item{size-limit: the maximum size of the cache}
|
|
;;> \item{compute-size: compute the size of a cache entry}
|
|
;;> ]
|
|
;;>
|
|
;;> \var{compute-size} is a procedure of two arguments, the key and
|
|
;;> value to be stored, and defaults to a constant 1 per entry. After
|
|
;;> every insertion the oldest elements will be removed until the size
|
|
;;> is under \var{size-limit}. You may find
|
|
;;>
|
|
;;> \scheme{(lambda (k v) (+ (object-size k) (object-size v)))}
|
|
;;>
|
|
;;> using \scheme{object-size} from \scheme{(chibi ast)} to be a
|
|
;;> useful \var{compute-size}.
|
|
;;>
|
|
;;> If \var{size-limit} is \scheme{#f} then the cache is unlimited,
|
|
;;> and a simple hash-table will be used in place of an LRU cache.
|
|
|
|
(define (memoize proc . o)
|
|
(let-keywords* o
|
|
((equal equal: equal?)
|
|
(hash hash: hash)
|
|
(arity arity: (and (not (procedure-variadic? proc))
|
|
(procedure-arity proc)))
|
|
(init-size init-size: 31)
|
|
(limit size-limit: 1000)
|
|
(compute-size compute-size: (lambda (k v) 1))
|
|
(cache-init cache: '()))
|
|
(let ((cache (cond ((lru-cache? cache-init)
|
|
cache-init)
|
|
(limit
|
|
(make-lru-cache 'equal: equal
|
|
'hash: hash
|
|
'init-size: init-size
|
|
'size-limit: limit
|
|
'compute-size: compute-size))
|
|
(else
|
|
(make-hash-table equal hash)))))
|
|
;; allow an alist initializer for the cache
|
|
(if (pair? cache-init)
|
|
(for-each (lambda (x) (lru-add! cache (car x) (cdr x)))
|
|
cache-init))
|
|
(make-memoizer proc arity cache))))
|
|
|
|
;;> Equivalent to memoize except that the procedure's first argument
|
|
;;> must be a pathname. If the corresponding file has been modified
|
|
;;> since the memoized value, the value is recomputed. Useful to
|
|
;;> automatically reflect external changes to a file-backed resource.
|
|
;;> The additional keyword argument \scheme{reloader?:}, if true,
|
|
;;> indicates that the result of loading is itself a procedure which
|
|
;;> should check for updates on each call.
|
|
|
|
(define (memoize-file-loader proc . o)
|
|
(let* ((f (lambda (file . rest)
|
|
(let ((mtime (file-modification-time/safe file)))
|
|
(cons mtime (apply proc file rest)))))
|
|
(g (apply memoize f o))
|
|
(reloader? (cond ((memq 'reloader?: o) => cdr) (else #f))))
|
|
(lambda (file . rest)
|
|
(let ((cell (apply g file rest)))
|
|
(let-syntax ((update!
|
|
(syntax-rules ()
|
|
((update! default)
|
|
(let ((mtime (file-modification-time/safe file)))
|
|
(if (and mtime
|
|
(or (not (car cell))
|
|
(> mtime (car cell))))
|
|
(let ((res (apply proc file rest)))
|
|
(set-car! cell mtime)
|
|
(set-cdr! cell res)
|
|
res)
|
|
default))))))
|
|
(update! (if (and reloader? (procedure? (cdr cell)))
|
|
(lambda args (apply (update! (cdr cell)) args))
|
|
(cdr cell))))))))
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
;; persistent memoization
|
|
|
|
(define (get-memo-directory proc-name)
|
|
(or (get-environment-variable "MEMOIZE_DIR")
|
|
(if (i-am-root?)
|
|
(make-path "/var/run/memo.d" proc-name)
|
|
(make-path (or (get-environment-variable "HOME") ".")
|
|
".memo.d" proc-name))))
|
|
|
|
(define (encode-file-name str)
|
|
(define (file-name-safe-char? ch)
|
|
(or (char-alphabetic? ch) (char-numeric? ch)
|
|
(memv ch '(#\_ #\- #\+ #\. #\,))))
|
|
(define (encode-char ch)
|
|
(let* ((i (char->integer ch))
|
|
(hex (number->string i 16)))
|
|
(if (< i 16)
|
|
(string-append "%0" hex)
|
|
(string-append "%" hex))))
|
|
(define (collect str from to res)
|
|
(if (string-cursor>=? from to)
|
|
res
|
|
(cons (substring-cursor str from to) res)))
|
|
(let ((start (string-cursor-start str))
|
|
(end (string-cursor-end str)))
|
|
(let lp ((from start) (to start) (res '()))
|
|
(if (string-cursor>=? to end)
|
|
(if (string-cursor=? from start)
|
|
str
|
|
(string-concatenate (reverse (collect str from to res))))
|
|
(let* ((ch (string-cursor-ref str to))
|
|
(next (string-cursor-next str to)))
|
|
(if (file-name-safe-char? ch)
|
|
(lp from next res)
|
|
(lp next next (cons (encode-char ch)
|
|
(collect str from to res)))))))))
|
|
|
|
(define (default-args-encoder args)
|
|
(let ((out (open-output-string)))
|
|
(write/ss args out)
|
|
(encode-file-name (string-append (get-output-string out) ".memo"))))
|
|
|
|
;;> Returns a memoized version of the procedure \var{proc} which
|
|
;;> stores the memoized results persistently in a file. Garbage
|
|
;;> collection of the files is left as an external task for monitoring
|
|
;;> tools or cron jobs.
|
|
;;>
|
|
;;> Accepts the following keyword arguments:
|
|
;;>
|
|
;;> \items[
|
|
;;> \item{args-encoder: procedure which takes the arguments as a single list, and returns a string representation suitable for use as a (base) file name}
|
|
;;> \item{proc-name: the name of the procedure, to use a a subdir of memo-dir to distinguish from other memoized procedures}
|
|
;;> \item{memo-dir: the directory to store results in, defaulting to ~/.memo/}
|
|
;;> \item{file-validator: validator to run on the existing file - if it returns false, the file is considered bad and the result recomputed}
|
|
;;> \item{validator: validator to run on the result of reading the file}
|
|
;;> \item{read: the read procedure to extract the result from the file}
|
|
;;> \item{write: the write procedure to write the result to the file}
|
|
;;> ]
|
|
|
|
(define (memoize-to-file proc . o)
|
|
(let-keywords* o
|
|
((args-encoder args-encoder: default-args-encoder)
|
|
(proc-name proc-name: (or (procedure-name proc) "lambda"))
|
|
(memo-dir memo-dir: (get-memo-directory proc-name))
|
|
(file-validator file-validator: (lambda args #t))
|
|
(validator validator: (lambda args #t))
|
|
(read read: read/ss)
|
|
(write write: write/ss))
|
|
(lambda args
|
|
(let ((file (make-path memo-dir (args-encoder args))))
|
|
(define (compute)
|
|
(let ((res (apply proc args)))
|
|
(create-directory* (path-directory file))
|
|
(call-with-output-file file
|
|
(lambda (out) (write res out)))
|
|
res))
|
|
(if (and (file-exists? file)
|
|
(apply file-validator file args))
|
|
(let ((res (call-with-input-file file read)))
|
|
(if (validator res)
|
|
res
|
|
(compute)))
|
|
(compute))))))
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
;; low-level utilities
|
|
|
|
;;> Creates a new empty LRU object. The same keyword arguments as in
|
|
;;> \scheme{memoize} are available, except of course for \var{cache}.
|
|
|
|
(define (make-lru-cache . o)
|
|
(let-keywords* o ((equal equal: equal?)
|
|
(hash hash: hash)
|
|
(init-size init-size: 31)
|
|
(compute-size compute-size: (lambda (k v) 1))
|
|
(size-limit size-limit: 1000))
|
|
(let ((tab (make-hash-table equal hash))) ; init-size
|
|
(%make-lru-cache tab '() '() 0 size-limit compute-size))))
|
|
|
|
;; add entry to the back of the queue
|
|
(define (lru-enq! lru entry)
|
|
(let ((cell (list entry)))
|
|
(if (null? (lru-front lru)) ; empty
|
|
(lru-front-set! lru cell)
|
|
(set-cdr! (lru-back lru) cell))
|
|
(lru-back-set! lru cell)))
|
|
|
|
;; pop the front of the queue
|
|
(define (lru-deq! lru)
|
|
(let ((cell (lru-front lru)))
|
|
(if (null? cell)
|
|
(error "lru queue is empty")
|
|
(let ((rest (cdr cell)))
|
|
(lru-front-set! lru rest)
|
|
(if (null? rest)
|
|
(lru-back-set! lru '()))
|
|
(car cell)))))
|
|
|
|
;; shift the given entry, anywhere in the queue, to the end
|
|
(define (lru-shift-to-last! lru entry)
|
|
(let ((prev (lru-entry-prev entry))
|
|
(last-pair (lru-back lru)))
|
|
(cond
|
|
((null? prev)
|
|
;; first entry, just pop and re-queue it, and update prev pointers
|
|
(lru-enq! lru (lru-deq! lru))
|
|
(lru-entry-prev-set! entry last-pair)
|
|
(lru-entry-prev-set! (car (lru-front lru)) '()))
|
|
((eq? (cdr prev) last-pair)
|
|
;; already at the end, nothing to do
|
|
)
|
|
(else
|
|
;; a middle element, splice it out and re-queue
|
|
(let ((cell (cdr prev)))
|
|
(set-cdr! prev (cdr cell)) ; splice out
|
|
(if (pair? (cdr cell))
|
|
(lru-entry-prev-set! (cadr cell) prev))
|
|
(lru-enq! lru entry) ; reinsert at end
|
|
(lru-entry-prev-set! entry last-pair))))))
|
|
|
|
(define (lru-shrink! lru)
|
|
(let ((size-limit (lru-size-limit lru))
|
|
(size (lru-size lru)))
|
|
(if (> size size-limit)
|
|
(let lp ((size size))
|
|
(if (> size size-limit)
|
|
(let ((x (lru-deq! lru)))
|
|
(let ((next (lru-front lru)))
|
|
(if (pair? next)
|
|
(lru-entry-prev-set! (car next) '())))
|
|
(hash-table-delete! (lru-table lru) (lru-entry-key x))
|
|
(lp (- size (lru-entry-size x))))
|
|
(lru-size-set! lru size))))))
|
|
|
|
;;> Looks up \var{key} in the cache LRU. If not found returns #f,
|
|
;;> unless \var{compute} is given in which case \var{compute} is
|
|
;;> applied to \var{key} to determine the return value. This does not
|
|
;;> update the cache.
|
|
|
|
(define (lru-ref lru key . o)
|
|
(let ((entry (hash-table-ref/default (lru-table lru) key #f)))
|
|
(cond (entry
|
|
(lru-shift-to-last! lru entry)
|
|
(lru-entry-value entry))
|
|
((pair? o)
|
|
((car o) key))
|
|
(else
|
|
(error "no lru entry for" key)))))
|
|
|
|
;;> Identical to lru-ref except that it updates the cache on a miss.
|
|
|
|
(define (lru-ref! lru key compute)
|
|
(cond ((hash-table-ref/default (lru-table lru) key #f)
|
|
=> (lambda (entry)
|
|
(lru-shift-to-last! lru entry)
|
|
(lru-entry-value entry)))
|
|
(else
|
|
(let ((value (compute key)))
|
|
(lru-add! lru key value)
|
|
value))))
|
|
|
|
(define (lru-add! lru key value)
|
|
(let* ((size ((lru-compute-size lru) key value))
|
|
(last-pair (lru-back lru))
|
|
(entry (make-lru-entry key value size last-pair)))
|
|
(hash-table-set! (lru-table lru) key entry)
|
|
(lru-enq! lru entry)
|
|
(lru-size-set! lru (+ size (lru-size lru)))
|
|
(lru-shrink! lru)))
|
|
|
|
;;> Directly set a value in the cache.
|
|
|
|
(define (lru-set! lru key value)
|
|
(let ((entry (hash-table-ref/default (lru-table lru) key #f)))
|
|
(cond (entry
|
|
(lru-shift-to-last! lru entry)
|
|
(lru-entry-value-set! entry value)
|
|
(let ((prev-size (lru-entry-size entry))
|
|
(size ((lru-compute-size lru) key value)))
|
|
(lru-entry-size-set! entry size)
|
|
(lru-size-set! lru (+ (lru-size lru) (- size prev-size)))))
|
|
(else
|
|
(lru-add! lru key value)))
|
|
(lru-shrink! lru)))
|
|
|
|
(define (hash-table-ref! table key proc)
|
|
(hash-table-ref table key
|
|
(lambda ()
|
|
(let ((res (proc key)))
|
|
(hash-table-set! table key res)
|
|
res))))
|