Better error handling for parsers.

This commit is contained in:
Alex Shinn 2013-06-04 04:26:01 +09:00
parent 65ed450d7a
commit a24c76a02d
3 changed files with 129 additions and 54 deletions

View file

@ -16,7 +16,8 @@
parse-beginning-of-line parse-end-of-line parse-beginning-of-line parse-end-of-line
parse-beginning-of-line parse-end-of-line parse-beginning-of-line parse-end-of-line
parse-beginning-of-word parse-end-of-word parse-beginning-of-word parse-end-of-word
parse-word parse-word+) parse-word parse-word+
parse-with-failure-reason)
(import (chibi) (chibi char-set) (srfi 9)) (import (chibi) (chibi char-set) (srfi 9))
(include "parse/parse.scm") (include "parse/parse.scm")
(cond-expand (cond-expand
@ -50,9 +51,9 @@
(,lambda_ (,r ,s ,i ,fk) (,lambda_ (,r ,s ,i ,fk)
(,set!_ ,new-tmp ,r) (,set!_ ,new-tmp ,r)
(,sk ,r ,s ,i ,fk)) (,sk ,r ,s ,i ,fk))
(,lambda_ () (,lambda_ (,s ,i ,r)
(,set!_ ,new-tmp ,save-tmp) (,set!_ ,new-tmp ,save-tmp)
(,fk)))) (,fk ,s ,i ,r))))
,new-tmp)) ,new-tmp))
(cons (list name new-tmp) bindings)))) (cons (list name new-tmp) bindings))))
(append k (list f bindings))))))))) (append k (list f bindings)))))))))
@ -73,6 +74,6 @@
(let ((save-tmp new-tmp)) (let ((save-tmp new-tmp))
(f s i (f s i
(lambda (r s i fk) (set! new-tmp r) (sk r s i fk)) (lambda (r s i fk) (set! new-tmp r) (sk r s i fk))
(lambda () (set! new-tmp save-tmp) (fk))))) (lambda (s i r) (set! new-tmp save-tmp) (fk s i r)))))
((var tmp) ... (name new-tmp))) ((var tmp) ... (name new-tmp)))
(k ... f ((var tmp) ...))))))))))) (k ... f ((var tmp) ...)))))))))))

View file

@ -94,7 +94,7 @@
(let lp ((s source) (i index)) (let lp ((s source) (i index))
(subsequent (subsequent
s i (lambda (r s i fk) (lp s i)) s i (lambda (r s i fk) (lp s i))
(lambda () (lambda (s i r)
(sk0 (string->symbol (parse-stream-substring source0 index0 s i)) (sk0 (string->symbol (parse-stream-substring source0 index0 s i))
s i fk0))))) s i fk0)))))
fk0)))) fk0))))

View file

@ -10,7 +10,8 @@
;; represents a single buffered chunk of text. ;; represents a single buffered chunk of text.
(define-record-type Parse-Stream (define-record-type Parse-Stream
(%make-parse-stream filename port buffer cache offset prev-char tail) (%make-parse-stream
filename port buffer cache offset prev-char line column tail)
parse-stream? parse-stream?
;; The file the data came from, for debugging and error reporting. ;; The file the data came from, for debugging and error reporting.
(filename parse-stream-filename) (filename parse-stream-filename)
@ -32,6 +33,9 @@
;; The previous char before the beginning of this Parse-Stream. ;; The previous char before the beginning of this Parse-Stream.
;; Used for line/word-boundary checks. ;; Used for line/word-boundary checks.
(prev-char parse-stream-prev-char) (prev-char parse-stream-prev-char)
;; The debug info for the start line and column of this chunk.
(line parse-stream-line)
(column parse-stream-column)
;; The successor Parse-Stream chunk, created on demand and filled ;; The successor Parse-Stream chunk, created on demand and filled
;; from the same port. ;; from the same port.
(tail %parse-stream-tail %parse-stream-tail-set!)) (tail %parse-stream-tail %parse-stream-tail-set!))
@ -44,7 +48,7 @@
(let ((port (if (pair? o) (car o) (open-input-file filename))) (let ((port (if (pair? o) (car o) (open-input-file filename)))
(len (if (and (pair? o) (pair? (cdr o))) (cadr o) default-buffer-size))) (len (if (and (pair? o) (pair? (cdr o))) (cadr o) default-buffer-size)))
(%make-parse-stream (%make-parse-stream
filename port (make-vector len #f) (make-vector len '()) 0 #f #f))) filename port (make-vector len #f) (make-vector len '()) 0 #f 0 0 #f)))
(define (file->parse-stream filename) (define (file->parse-stream filename)
(make-parse-stream filename (open-input-file filename))) (make-parse-stream filename (open-input-file filename)))
@ -55,12 +59,19 @@
(define (parse-stream-tail source) (define (parse-stream-tail source)
(or (%parse-stream-tail source) (or (%parse-stream-tail source)
(let* ((len (vector-length (parse-stream-buffer source))) (let* ((len (vector-length (parse-stream-buffer source)))
(line-info (parse-stream-count-lines source))
(line (+ (parse-stream-line source) (car line-info)))
(col (if (zero? (car line-info))
(+ (parse-stream-column source) (cadr line-info))
(cadr line-info)))
(tail (%make-parse-stream (parse-stream-filename source) (tail (%make-parse-stream (parse-stream-filename source)
(parse-stream-port source) (parse-stream-port source)
(make-vector len #f) (make-vector len #f)
(make-vector len '()) (make-vector len '())
0 0
(parse-stream-last-char source) (parse-stream-last-char source)
line
col
#f))) #f)))
(%parse-stream-tail-set! source tail) (%parse-stream-tail-set! source tail)
tail))) tail)))
@ -101,12 +112,55 @@
(define (parse-stream-max-char source) (define (parse-stream-max-char source)
(let ((buf (parse-stream-buffer source))) (let ((buf (parse-stream-buffer source)))
(let lp ((i (min (- (vector-length buf) 1) (parse-stream-offset source)))) (let lp ((i (min (- (vector-length buf) 1)
(parse-stream-offset source))))
(if (or (negative? i) (if (or (negative? i)
(char? (vector-ref buf i))) (char? (vector-ref buf i)))
i i
(lp (- i 1)))))) (lp (- i 1))))))
(define (parse-stream-count-lines source . o)
(let* ((buf (parse-stream-buffer source))
(end (if (pair? o) (car o) (vector-length buf))))
(let lp ((i 0) (from 0) (lines 0))
(if (>= i end)
(list lines (- i from) from)
(let ((ch (vector-ref buf i)))
(cond
((not (char? ch))
(list lines (- i from) from))
((eqv? ch #\newline)
(lp (+ i 1) i (+ lines 1)))
(else
(lp (+ i 1) from lines))))))))
(define (parse-stream-end-of-line source i)
(let* ((buf (parse-stream-buffer source))
(end (vector-length buf)))
(let lp ((i i))
(if (>= i end)
i
(let ((ch (vector-ref buf i)))
(if (or (not (char? ch)) (eqv? ch #\newline))
i
(lp (+ i 1))))))))
(define (parse-stream-debug-info s i)
;; i is the failed parse index, but we want the furthest reached
;; location
(if (%parse-stream-tail s)
(parse-stream-debug-info (%parse-stream-tail s) i)
(let* ((line-info
(parse-stream-count-lines s (parse-stream-max-char s)))
(line (+ (parse-stream-line s) (car line-info)))
(col (if (zero? (car line-info))
(+ (parse-stream-column s) (cadr line-info))
(cadr line-info)))
(from (car (cddr line-info)))
(to (parse-stream-end-of-line s (+ from 1)))
(str (parse-stream-substring s from s to)))
(list line col str))))
(define (parse-stream-next-source source i) (define (parse-stream-next-source source i)
(if (>= (+ i 1) (vector-length (parse-stream-buffer source))) (if (>= (+ i 1) (vector-length (parse-stream-buffer source)))
(parse-stream-tail source) (parse-stream-tail source)
@ -172,42 +226,49 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; the parser interface ;; the parser interface
(define (parse-failure s i reason)
(let ((line+col (parse-stream-debug-info s i)))
(error "incomplete parse at" (append line+col (list reason)))))
(define (call-with-parse f source index sk . o) (define (call-with-parse f source index sk . o)
(let ((p (if (string? source) (string->parse-stream source) source)) (let ((s (if (string? source) (string->parse-stream source) source))
(fk (if (pair? o) (car o) (lambda () #f)))) (fk (if (pair? o) (car o) (lambda (s i reason) #f))))
(f p index sk fk))) (f s index sk fk)))
(define (parse f source . o) (define (parse f source . o)
(let ((index (if (pair? o) (car o) 0))) (let ((index (if (pair? o) (car o) 0)))
(call-with-parse f source index (lambda (r s i fk) r)))) (call-with-parse f source index (lambda (r s i fk) r))))
(define (parse-fully f source . o) (define (parse-fully f source . o)
(let ((p (if (string? source) (string->parse-stream source) source)) (let ((s (if (string? source) (string->parse-stream source) source))
(index (if (pair? o) (car o) 0))) (index (if (pair? o) (car o) 0)))
(call-with-parse (call-with-parse
f p index f s index
(lambda (r s i fk) (if (parse-stream-end? s i) r (fk))) (lambda (r s i fk)
(lambda () (if (parse-stream-end? s i) r (fk s i "incomplete parse")))
(let ((i (parse-stream-max-char p))) parse-failure)))
(error "incomplete parse, max char" i (parse-stream-ref p i)))))))
(define (parse-fold f kons knil source . o) (define (parse-fold f kons knil source . o)
(let lp ((p (if (string? source) (string->parse-stream source) source)) (let lp ((p (if (string? source) (string->parse-stream source) source))
(index (if (pair? o) (car o) 0)) (index (if (pair? o) (car o) 0))
(acc knil)) (acc knil))
(f p index (lambda (r s i fk) (lp s i (kons r acc))) (lambda () acc)))) (f p index (lambda (r s i fk) (lp s i (kons r acc))) (lambda (s i r) acc))))
(define (parse->list f source . o) (define (parse->list f source . o)
(reverse (apply parse-fold cons '() f source o))) (reverse (apply parse-fold cons '() f source o)))
(define (parse-fully->list f source . o) (define (parse-fully->list f source . o)
(let lp ((p (if (string? source) (string->parse-stream source) source)) (let lp ((s (if (string? source) (string->parse-stream source) source))
(index (if (pair? o) (car o) 0)) (index (if (pair? o) (car o) 0))
(acc '())) (acc '()))
(f p index (f s index
(lambda (r s i fk) (lambda (r s i fk)
(if (eof-object? r) (reverse acc) (lp s i (cons r acc)))) (if (eof-object? r) (reverse acc) (lp s i (cons r acc))))
(lambda () (error "incomplete parse"))))) (lambda (s i reason) (error "incomplete parse")))))
(define (parse-with-failure-reason f reason)
(lambda (r s i fk)
(f r s i (lambda (s i r) (fk s i reason)))))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; basic parsing combinators ;; basic parsing combinators
@ -219,7 +280,7 @@
(define parse-anything (define parse-anything
(lambda (source index sk fk) (lambda (source index sk fk)
(if (parse-stream-end? source index) (if (parse-stream-end? source index)
(fk) (fk source index "end of input")
(sk (parse-stream-ref source index) (sk (parse-stream-ref source index)
(parse-stream-next-source source index) (parse-stream-next-source source index)
(parse-stream-next-index source index) (parse-stream-next-index source index)
@ -227,14 +288,18 @@
(define parse-nothing (define parse-nothing
(lambda (source index sk fk) (lambda (source index sk fk)
(fk))) (fk source index "nothing")))
(define (parse-or f . o) (define (parse-or f . o)
(if (null? o) (if (null? o)
f f
(let ((g (apply parse-or o))) (let ((g (apply parse-or o)))
(lambda (source index sk fk) (lambda (source index sk fk)
(let ((fk2 (lambda () (g source index sk fk)))) (let ((fk2 (lambda (s i r)
(g source index sk fk
;; (lambda (s2 i2 r2)
;; (fk s2 i2 `(or ,r ,r2)))
))))
(f source index sk fk2)))))) (f source index sk fk2))))))
(define (parse-and f g) (define (parse-and f g)
@ -243,9 +308,10 @@
(define (parse-not f) (define (parse-not f)
(lambda (source index sk fk) (lambda (source index sk fk)
(f source index (lambda (r s i fk) (fk)) (lambda () (sk #t source index fk))))) (f source index (lambda (r s i fk) (fk s i "not"))
(lambda (s i r) (sk #t source index fk)))))
(define (parse-seq . o) (define (parse-seq-list o)
(cond (cond
((null? o) ((null? o)
parse-epsilon) parse-epsilon)
@ -272,14 +338,17 @@
fk)) fk))
fk)))))) fk))))))
(define (maybe-parse-seq f . o) (define (parse-seq . o)
(if (null? o) f (apply parse-seq f o))) (parse-seq-list o))
(define (maybe-parse-seq ls)
(if (null? (cdr ls)) (car ls) (parse-seq-list ls)))
(define (parse-optional f . o) (define (parse-optional f . o)
(if (pair? o) (if (pair? o)
(parse-optional (apply parse-seq f o)) (parse-optional (apply parse-seq f o))
(lambda (source index sk fk) (lambda (source index sk fk)
(f source index sk (lambda () (sk #f source index fk)))))) (f source index sk (lambda (s i r) (sk #f source index fk))))))
(define ignored-value (list 'ignore)) (define ignored-value (list 'ignore))
@ -289,7 +358,7 @@
(lambda (source0 index0 sk fk) (lambda (source0 index0 sk fk)
(let repeat ((source source0) (index index0) (fk fk) (j 0) (res '())) (let repeat ((source source0) (index index0) (fk fk) (j 0) (res '()))
(let ((fk (if (>= j lo) (let ((fk (if (>= j lo)
(lambda () (sk (reverse res) source index fk)) (lambda (s i r) (sk (reverse res) source index fk))
fk))) fk)))
(if (and hi (= j hi)) (if (and hi (= j hi))
(sk (reverse res) source index fk) (sk (reverse res) source index fk)
@ -322,7 +391,7 @@
(f source (f source
index index
(lambda (res s i fk) (lambda (res s i fk)
(if (check? res) (sk res s i fk) (fk))) (if (check? res) (sk res s i fk) (fk s i "assertion failed")))
fk))) fk)))
(define (parse-atomic f) (define (parse-atomic f)
@ -331,7 +400,7 @@
(define (parse-commit f) (define (parse-commit f)
(lambda (source index sk fk) (lambda (source index sk fk)
(f source index (lambda (res s i fk) (sk res s i (lambda () #f))) fk))) (f source index (lambda (res s i fk) (sk res s i (lambda (s i r) #f))) fk)))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; boundary checks ;; boundary checks
@ -340,27 +409,27 @@
(lambda (source index sk fk) (lambda (source index sk fk)
(if (parse-stream-start? source index) (if (parse-stream-start? source index)
(sk #t source index fk) (sk #t source index fk)
(fk)))) (fk source index "expected beginning"))))
(define parse-end (define parse-end
(lambda (source index sk fk) (lambda (source index sk fk)
(if (parse-stream-end? source index) (if (parse-stream-end? source index)
(sk #t source index fk) (sk #t source index fk)
(fk)))) (fk source index "expected end"))))
(define parse-beginning-of-line (define parse-beginning-of-line
(lambda (source index sk fk) (lambda (source index sk fk)
(let ((before (parse-stream-char-before source index))) (let ((before (parse-stream-char-before source index)))
(if (or (not before) (eqv? #\newline before)) (if (or (not before) (eqv? #\newline before))
(sk #t source index fk) (sk #t source index fk)
(fk))))) (fk source index "expected beginning of line")))))
(define parse-end-of-line (define parse-end-of-line
(lambda (source index sk fk) (lambda (source index sk fk)
(if (or (parse-stream-end? source index) (if (or (parse-stream-end? source index)
(eqv? #\newline (parse-stream-ref source index))) (eqv? #\newline (parse-stream-ref source index)))
(sk #t source index fk) (sk #t source index fk)
(fk)))) (fk source index "expected end of line"))))
(define (char-word? ch) (define (char-word? ch)
(or (char-alphabetic? ch) (eqv? ch #\_))) (or (char-alphabetic? ch) (eqv? ch #\_)))
@ -372,7 +441,7 @@
(not (parse-stream-end? source index)) (not (parse-stream-end? source index))
(char-word? (parse-stream-ref source index))) (char-word? (parse-stream-ref source index)))
(sk #t source index fk) (sk #t source index fk)
(fk))))) (fk source index "expected beginning of word")))))
(define parse-end-of-word (define parse-end-of-word
(lambda (source index sk fk) (lambda (source index sk fk)
@ -382,7 +451,7 @@
(or (parse-stream-end? source index) (or (parse-stream-end? source index)
(not (char-word? (parse-stream-ref source index))))) (not (char-word? (parse-stream-ref source index)))))
(sk #t source index fk) (sk #t source index fk)
(fk))))) (fk source index "expected end of word")))))
(define (parse-word . o) (define (parse-word . o)
(let ((word (if (pair? o) (car o) (parse-token char-word?)))) (let ((word (if (pair? o) (car o) (parse-token char-word?))))
@ -408,7 +477,7 @@
(parse-stream-next-source source index) (parse-stream-next-source source index)
(parse-stream-next-index source index) (parse-stream-next-index source index)
fk) fk)
(fk))))) (fk source index "failed char pred")))))
(define (x->char-predicate x) (define (x->char-predicate x)
(cond (cond
@ -429,7 +498,9 @@
(parse-char-pred (lambda (ch) (not (pred ch)))))) (parse-char-pred (lambda (ch) (not (pred ch))))))
(define (parse-string x) (define (parse-string x)
(parse-map (apply parse-seq (map parse-char (string->list x))) (parse-map (parse-with-failure-reason
(parse-seq-list (map parse-char (string->list x)))
`(expected ,x))
list->string)) list->string))
(define (parse-token x) (define (parse-token x)
@ -444,9 +515,9 @@
(f source (f source
index index
(lambda (r s i fk) (lp s i)) (lambda (r s i fk) (lp s i))
(lambda () (lambda (s i r)
(if (and (eq? source source0) (eqv? index index0)) (if (and (eq? source source0) (eqv? index index0))
(fk) (fk s i r)
(sk (parse-stream-substring source0 index0 source index) (sk (parse-stream-substring source0 index0 source index)
source index fk)))))))) source index fk))))))))
@ -498,14 +569,14 @@
parse-epsilon) parse-epsilon)
((list? x) ((list? x)
(case (car x) (case (car x)
((: seq) (apply parse-seq (map parse-sre (cdr x)))) ((: seq) (parse-seq-list (map parse-sre (cdr x))))
((or) (apply parse-or (map parse-sre (cdr x)))) ((or) (apply parse-or (map parse-sre (cdr x))))
((and) (apply parse-and (map parse-sre (cdr x)))) ((and) (apply parse-and (map parse-sre (cdr x))))
((not) (apply parse-not (map parse-sre (cdr x)))) ((not) (apply parse-not (map parse-sre (cdr x))))
((*) (parse-repeat (apply maybe-parse-seq (map parse-sre (cdr x))))) ((*) (parse-repeat (maybe-parse-seq (map parse-sre (cdr x)))))
((+) (parse-repeat+ (apply maybe-parse-seq (map parse-sre (cdr x))))) ((+) (parse-repeat+ (maybe-parse-seq (map parse-sre (cdr x)))))
((?) (apply parse-optional (map parse-sre (cdr x)))) ((?) (parse-optional (parse-seq-list (map parse-sre (cdr x)))))
((=> ->) (apply maybe-parse-seq (map parse-sre (cddr x)))) ((=> ->) (maybe-parse-seq (map parse-sre (cddr x))))
((word) (apply parse-word (cdr x))) ((word) (apply parse-word (cdr x)))
((word+) (apply parse-word+ (cdr x))) ((word+) (apply parse-word+ (cdr x)))
((/ ~ & -) (parse-char (sre->char-set x))) ((/ ~ & -) (parse-char (sre->char-set x)))
@ -553,25 +624,28 @@
(define (procedure-name-set! f name) (define (procedure-name-set! f name)
(set! *procedures* (cons (cons f name) *procedures*))) (set! *procedures* (cons (cons f name) *procedures*)))
(define memoized-failure (list 'failure))
(define (parse-memoize name f) (define (parse-memoize name f)
(if (not (procedure-name f)) (procedure-name-set! f name)) ;;(if (not (procedure-name f)) (procedure-name-set! f name))
(lambda (source index sk fk) (lambda (source index sk fk)
(cond (cond
((parse-stream-cache-cell source index f) ((parse-stream-cache-cell source index f)
=> (lambda (cell) => (lambda (cell)
(if (cdr cell) (if (and (pair? (cdr cell)) (eq? memoized-failure (cadr cell)))
(apply sk (append (cdr cell) (list fk))) (fk source index (cddr cell))
(fk)))) (apply sk (append (cdr cell) (list fk))))))
(else (else
(f source (f source
index index
(lambda (res s i fk) (lambda (res s i fk)
(parse-stream-cache-set! source index f (list res s i)) (parse-stream-cache-set! source index f (list res s i))
(sk res s i fk)) (sk res s i fk))
(lambda () (lambda (s i r)
(if (not (pair? (parse-stream-cache-cell source index f))) (if (not (pair? (parse-stream-cache-cell source index f)))
(parse-stream-cache-set! source index f #f)) (parse-stream-cache-set!
(fk))))))) source index f (cons memoized-failure r)))
(fk s i r)))))))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; syntactic sugar ;; syntactic sugar