;; memoize.scm -- caching and memoization utilities
;; Copyright (c) 2003-2013 Alex Shinn.  All rights reserved.
;; BSD-style license: http://synthcode.com/license.txt

;;> Memory and persistent caching with various levels of control, based
;;> on a combination of lru-cache from Hato and an older memoization
;;> library for Gauche.

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; types

(define-record-type Lru-Cache
  (%make-lru-cache table front back size size-limit compute-size)
  lru-cache?
  (table lru-table)
  (front lru-front lru-front-set!)
  (back lru-back lru-back-set!)
  (size lru-size lru-size-set!)
  (size-limit lru-size-limit)
  (compute-size lru-compute-size))

(define-record-type Lru-Entry
  (make-lru-entry key value size prev)
  lru-entry?
  (key lru-entry-key)
  (value lru-entry-value lru-entry-value-set!)
  (size lru-entry-size lru-entry-size-set!)
  (prev lru-entry-prev lru-entry-prev-set!))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; high-level interface

;;> Analagous to the procedure form of \scheme{define} but
;;> automatically memoizes the function.  Uses \scheme{equal?} for
;;> equality comparisons and reasonable defaults - for finer grained
;;> control use \scheme{memoize}.

(define-syntax define-memoized
  (syntax-rules ()
    ((define-memoized (proc x ...) . body)
     (define proc
       (make-memoizer (lambda (x ...) . body) (length '(x ...)) (make-lru-cache))))
    ((define-memoized (proc . x) . body)
     (define proc
       (make-memoizer (lambda x . body) #f (make-lru-cache ))))))

(define (make-memoizer proc arity cache)
  (let ((ref! (if (lru-cache? cache) lru-ref! hash-table-ref!)))
    (case arity
      ((0)
       proc)
      ((1)
       (lambda (x) (ref! cache x proc)))
      ((2)
       (lambda (x y)
         (ref! cache (cons x y) (lambda (xy) (proc (car xy) (cdr xy))))))
      (else
       (lambda args
         (ref! cache args (lambda (args) (apply proc args))))))))

;;> Returns a memoized version of the procedure \var{proc}.  By
;;> default uses a least-recently-used (LRU) cache, which can be tuned
;;> with the following keyword arguments:
;;>
;;> \items[
;;> \item{cache: an explicit pre-existing cache (LRU or hash-table)}
;;> \item{equal: an equality predicate defaulting to \scheme{equal?}}
;;> \item{hash: a hash function to match the equality predicate, defaulting to \scheme{hash} from \scheme{(srfi 69)}}
;;> \item{init-size: a hint for the initial size of the backing hash table}
;;> \item{size-limit: the maximum size of the cache}
;;> \item{compute-size: compute the size of a cache entry}
;;> ]
;;>
;;> \var{compute-size} is a procedure of two arguments, the key and
;;> value to be stored, and defaults to a constant 1 per entry.  After
;;> every insertion the oldest elements will be removed until the size
;;> is under \var{size-limit}.  You may find
;;>
;;>   \scheme{(lambda (k v) (+ (object-size k) (object-size v)))}
;;>
;;> using \scheme{object-size} from \scheme{(chibi ast)} to be a
;;> useful \var{compute-size}.
;;>
;;> If \var{size-limit} is \scheme{#f} then the cache is unlimited,
;;> and a simple hash-table will be used in place of an LRU cache.

(define (memoize proc . o)
  (let-keywords* o
      ((equal equal: equal?)
       (hash hash: hash)
       (arity arity: (and (not (procedure-variadic? proc))
                          (procedure-arity proc)))
       (init-size init-size: 31)
       (limit size-limit: 1000)
       (compute-size compute-size: (lambda (k v) 1))
       (cache-init cache: '()))
    (let ((cache (cond ((lru-cache? cache-init)
                        cache-init)
                       (limit
                        (make-lru-cache 'equal: equal
                                        'hash: hash
                                        'init-size: init-size
                                        'size-limit: limit
                                        'compute-size: compute-size))
                       (else
                        (make-hash-table equal hash)))))
      ;; allow an alist initializer for the cache
      (if (pair? cache-init)
          (for-each (lambda (x) (lru-add! cache (car x) (cdr x)))
                    cache-init))
      (make-memoizer proc arity cache))))

;;> Equivalent to memoize except that the procedure's first argument
;;> must be a pathname.  If the corresponding file has been modified
;;> since the memoized value, the value is recomputed.  Useful to
;;> automatically reflect external changes to a file-backed resource.
;;> The additional keyword argument \scheme{reloader?:}, if true,
;;> indicates that the result of loading is itself a procedure which
;;> should check for updates on each call.

(define (memoize-file-loader proc . o)
  (let* ((f (lambda (file . rest)
              (let ((mtime (file-modification-time/safe file)))
                (cons mtime (apply proc file rest)))))
         (g (apply memoize f o))
         (reloader? (cond ((memq 'reloader?: o) => cdr) (else #f))))
    (lambda (file . rest)
      (let ((cell (apply g file rest)))
        (let-syntax ((update!
                      (syntax-rules ()
                        ((update! default)
                         (let ((mtime (file-modification-time/safe file)))
                           (if (and mtime
                                    (or (not (car cell))
                                        (> mtime (car cell))))
                               (let ((res (apply proc file rest)))
                                 (set-car! cell mtime)
                                 (set-cdr! cell res)
                                 res)
                               default))))))
          (update! (if (and reloader? (procedure? (cdr cell)))
                       (lambda args (apply (update! (cdr cell)) args))
                       (cdr cell))))))))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; persistent memoization

(define (get-memo-directory proc-name)
  (or (get-environment-variable "MEMOIZE_DIR")
      (if (i-am-root?)
          (make-path "/var/run/memo.d" proc-name)
          (make-path (or (get-environment-variable "HOME") ".")
                     ".memo.d" proc-name))))

(define (encode-file-name str)
  (define (file-name-safe-char? ch)
    (or (char-alphabetic? ch) (char-numeric? ch)
        (memv ch '(#\_ #\- #\+ #\. #\,))))
  (define (encode-char ch)
    (let* ((i (char->integer ch))
           (hex (number->string i 16)))
      (if (< i 16)
          (string-append "%0" hex)
          (string-append "%" hex))))
  (define (collect str from to res)
    (if (string-cursor>=? from to)
        res
        (cons (substring-cursor str from to) res)))
  (let ((start (string-cursor-start str))
        (end (string-cursor-end str)))
    (let lp ((from start) (to start) (res '()))
      (if (string-cursor>=? to end)
          (if (string-cursor=? from start)
              str
              (string-concatenate (reverse (collect str from to res))))
          (let* ((ch (string-cursor-ref str to))
                 (next (string-cursor-next str to)))
            (if (file-name-safe-char? ch)
                (lp from next res)
                (lp next next (cons (encode-char ch)
                                    (collect str from to res)))))))))

(define (default-args-encoder args)
  (let ((out (open-output-string)))
    (write/ss args out)
    (encode-file-name (string-append (get-output-string out) ".memo"))))

;;> Returns a memoized version of the procedure \var{proc} which
;;> stores the memoized results persistently in a file.  Garbage
;;> collection of the files is left as an external task for monitoring
;;> tools or cron jobs.
;;>
;;> Accepts the following keyword arguments:
;;>
;;> \items[
;;> \item{args-encoder: procedure which takes the arguments as a single list, and returns a string representation suitable for use as a (base) file name}
;;> \item{proc-name: the name of the procedure, to use a a subdir of memo-dir to distinguish from other memoized procedures}
;;> \item{memo-dir: the directory to store results in, defaulting to ~/.memo/}
;;> \item{file-validator: validator to run on the existing file - if it returns false, the file is considered bad and the result recomputed}
;;> \item{validator: validator to run on the result of reading the file}
;;> \item{read: the read procedure to extract the result from the file}
;;> \item{write: the write procedure to write the result to the file}
;;> ]

(define (memoize-to-file proc . o)
  (let-keywords* o
      ((args-encoder args-encoder: default-args-encoder)
       (proc-name proc-name: (or (procedure-name proc) "lambda"))
       (memo-dir memo-dir: (get-memo-directory proc-name))
       (file-validator file-validator: (lambda args #t))
       (validator validator: (lambda args #t))
       (read read: read/ss)
       (write write: write/ss))
    (lambda args
      (let ((file (make-path memo-dir (args-encoder args))))
        (define (compute)
          (let ((res (apply proc args)))
            (create-directory* (path-directory file))
            (call-with-output-file file
              (lambda (out) (write res out)))
            res))
        (if (and (file-exists? file)
                 (apply file-validator file args))
            (let ((res (call-with-input-file file read)))
              (if (validator res)
                  res
                  (compute)))
            (compute))))))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; low-level utilities

;;> Creates a new empty LRU object.  The same keyword arguments as in
;;> \scheme{memoize} are available, except of course for \var{cache}.

(define (make-lru-cache . o)
  (let-keywords* o ((equal equal: equal?)
                    (hash hash: hash)
                    (init-size init-size: 31)
                    (compute-size compute-size: (lambda (k v) 1))
                    (size-limit size-limit: 1000))
    (let ((tab (make-hash-table equal hash))) ; init-size
      (%make-lru-cache tab '() '() 0 size-limit compute-size))))

;; add entry to the back of the queue
(define (lru-enq! lru entry)
  (let ((cell (list entry)))
    (if (null? (lru-front lru))         ; empty
        (lru-front-set! lru cell)
        (set-cdr! (lru-back lru) cell))
    (lru-back-set! lru cell)))

;; pop the front of the queue
(define (lru-deq! lru)
  (let ((cell (lru-front lru)))
    (if (null? cell)
        (error "lru queue is empty")
        (let ((rest (cdr cell)))
          (lru-front-set! lru rest)
          (if (null? rest)
              (lru-back-set! lru '()))
          (car cell)))))

;; shift the given entry, anywhere in the queue, to the end
(define (lru-shift-to-last! lru entry)
  (let ((prev (lru-entry-prev entry))
        (last-pair (lru-back lru)))
    (cond
     ((null? prev)
      ;; first entry, just pop and re-queue it, and update prev pointers
      (lru-enq! lru (lru-deq! lru))
      (lru-entry-prev-set! entry last-pair)
      (lru-entry-prev-set! (car (lru-front lru)) '()))
     ((eq? (cdr prev) last-pair)
      ;; already at the end, nothing to do
      )
     (else
      ;; a middle element, splice it out and re-queue
      (let ((cell (cdr prev)))
        (set-cdr! prev (cdr cell))      ; splice out
        (if (pair? (cdr cell))
            (lru-entry-prev-set! (cadr cell) prev))
        (lru-enq! lru entry)            ; reinsert at end
        (lru-entry-prev-set! entry last-pair))))))

(define (lru-shrink! lru)
  (let ((size-limit (lru-size-limit lru))
        (size (lru-size lru)))
    (if (> size size-limit)
        (let lp ((size size))
          (if (> size size-limit)
              (let ((x (lru-deq! lru)))
                (let ((next (lru-front lru)))
                  (if (pair? next)
                      (lru-entry-prev-set! (car next) '())))
                (hash-table-delete! (lru-table lru) (lru-entry-key x))
                (lp (- size (lru-entry-size x))))
              (lru-size-set! lru size))))))

;;> Looks up \var{key} in the cache LRU.  If not found returns #f,
;;> unless \var{compute} is given in which case \var{compute} is
;;> applied to \var{key} to determine the return value.  This does not
;;> update the cache.

(define (lru-ref lru key . o)
  (let ((entry (hash-table-ref/default (lru-table lru) key #f)))
    (cond (entry
           (lru-shift-to-last! lru entry)
           (lru-entry-value entry))
          ((pair? o)
           ((car o) key))
          (else
           (error "no lru entry for" key)))))

;;> Identical to lru-ref except that it updates the cache on a miss.

(define (lru-ref! lru key compute)
  (cond ((hash-table-ref/default (lru-table lru) key #f)
         => (lambda (entry)
              (lru-shift-to-last! lru entry)
              (lru-entry-value entry)))
        (else
         (let ((value (compute key)))
           (lru-add! lru key value)
           value))))

(define (lru-add! lru key value)
  (let* ((size ((lru-compute-size lru) key value))
         (last-pair (lru-back lru))
         (entry (make-lru-entry key value size last-pair)))
    (hash-table-set! (lru-table lru) key entry)
    (lru-enq! lru entry)
    (lru-size-set! lru (+ size (lru-size lru)))
    (lru-shrink! lru)))

;;> Directly set a value in the cache.

(define (lru-set! lru key value)
  (let ((entry (hash-table-ref/default (lru-table lru) key #f)))
    (cond (entry
           (lru-shift-to-last! lru entry)
           (lru-entry-value-set! entry value)
           (let ((prev-size (lru-entry-size entry))
                 (size ((lru-compute-size lru) key value)))
             (lru-entry-size-set! entry size)
             (lru-size-set! lru (+ (lru-size lru) (- size prev-size)))))
          (else
           (lru-add! lru key value)))
    (lru-shrink! lru)))

(define (hash-table-ref! table key proc)
  (hash-table-ref table key
                  (lambda ()
                    (let ((res (proc key)))
                      (hash-table-set! table key res)
                      res))))