From 017bb1c2a03ff72fe3ecb2232b898701b05b6e4c Mon Sep 17 00:00:00 2001 From: Alex Shinn Date: Fri, 1 Feb 2019 00:31:13 +0800 Subject: [PATCH] adding -Dsafe-string-cursors feature to perform extra checks on string cursors --- lib/chibi/ast.scm | 20 +++- lib/chibi/pathname.scm | 44 ++++---- lib/chibi/regexp.scm | 8 +- lib/chibi/string.scm | 21 ++-- lib/init-7.scm | 221 +++++++++++++++++++++++++++++------------ lib/srfi/130.scm | 16 ++- opcodes.c | 6 +- 7 files changed, 234 insertions(+), 102 deletions(-) diff --git a/lib/chibi/ast.scm b/lib/chibi/ast.scm index 9a959574..a9e0c80b 100644 --- a/lib/chibi/ast.scm +++ b/lib/chibi/ast.scm @@ -353,11 +353,29 @@ ;;> Returns the interpretation of the integer \var{n} as ;;> an immediate object, useful for debugging. -;;> \procedure{(string-contains str pat)} +;;> \procedure{(string-contains str pat [start])} ;;> Returns the first string cursor of \var{pat} in \var{str}, ;;> of \scheme{#f} if it's not found. +(cond-expand + (safe-string-cursors + (define orig-string-contains string-contains) + (set! string-contains + (lambda (str pat . o) + (let ((res + (if (pair? o) + (orig-string-contains str pat (string-cursor-where (car o))) + (orig-string-contains str pat)))) + (and res (make-string-cursor str res (string-size str))))))) + (else + )) + +;;> \procedure{(string-cursor-copy! dst src from start end)} + +;;> Copies the characters from \var{src}[\var{start}..\var{end}] +;;> to \var{dst} starting at \var{from}. + ;;> \procedure{(safe-setenv name value)} ;;> Equivalent to \scheme{setenv} but does nothing and returns diff --git a/lib/chibi/pathname.scm b/lib/chibi/pathname.scm index 39f7f592..fe7ef7be 100644 --- a/lib/chibi/pathname.scm +++ b/lib/chibi/pathname.scm @@ -108,24 +108,32 @@ (let* ((path (path-normalize path)) (path-end (string-cursor-end path)) (dir (path-normalize dir)) - (dir-end (string-cursor-end dir)) - (i (string-mismatch dir path))) - (cond - ((not (string-cursor<=? 1 dir-end i path-end)) - (let ((i2 (string-cursor-next path i))) - (and (string-cursor=? i path-end) - (string-cursor=? i2 dir-end) - (eqv? #\/ (string-cursor-ref dir i)) - "."))) - ((string-cursor=? i path-end) - ".") - ((eqv? #\/ (string-cursor-ref path i)) - (let ((i2 (string-cursor-next path i))) - (if (string-cursor=? i2 path-end) "." (substring-cursor path i2)))) - ((eqv? #\/ (string-cursor-ref path (string-cursor-prev path i))) - (substring-cursor path i)) - (else - #f)))) + (dir-end (string-cursor-end dir))) + (call-with-values + (lambda () (string-mismatch dir path)) + (lambda (i j) + (cond + ( + ;;(not (string-cursor<=? + ;; (string-cursor-next path (string-cursor-start path)) + ;; dir-end i path-end)) + (not (and (string-cursor<=? + (string-cursor-next dir (string-cursor-start dir)) + dir-end i) + (string-cursor<=? j path-end))) + (and (string-cursor=? j path-end) + (string-cursor=? (string-cursor-next dir i) dir-end) + (eqv? #\/ (string-cursor-ref dir i)) + ".")) + ((string-cursor=? j path-end) + ".") + ((eqv? #\/ (string-cursor-ref path j)) + (let ((j2 (string-cursor-next path j))) + (if (string-cursor=? j2 path-end) "." (substring-cursor path j2)))) + ((eqv? #\/ (string-cursor-ref path (string-cursor-prev path j))) + (substring-cursor path j)) + (else + #f)))))) ;;> Resolve \var{path} relative to the given directory. Returns ;;> \var{path} unchanged if already absolute. diff --git a/lib/chibi/regexp.scm b/lib/chibi/regexp.scm index e017406d..1ad20982 100644 --- a/lib/chibi/regexp.scm +++ b/lib/chibi/regexp.scm @@ -564,10 +564,10 @@ (define (match/eog str i ch start end matches) (and (string-cursor>? i start) (or (string-cursor>=? i end) - (let ((m (regexp-search re:grapheme str - (string-cursor->index str i) - (string-cursor->index str end)))) - (and m (string-cursor<=? (regexp-match-submatch-end m 0) i)))))) + (let* ((sci (string-cursor->index str i)) + (sce (string-cursor->index str end)) + (m (regexp-search re:grapheme str sci sce))) + (and m (<= (regexp-match-submatch-end m 0) sci)))))) (define (lookup-char-set name flags) (cond diff --git a/lib/chibi/string.scm b/lib/chibi/string.scm index eddb7358..e74f1f12 100644 --- a/lib/chibi/string.scm +++ b/lib/chibi/string.scm @@ -164,6 +164,9 @@ "" (substring-cursor str left right)))) +;;> Returns two values: the first cursors from the left in +;;> \var{prefix} and in \var{str} where the two strings don't match. + (define (string-mismatch prefix str) (let ((end1 (string-cursor-end prefix)) (end2 (string-cursor-end str))) @@ -172,9 +175,12 @@ (if (or (string-cursor>=? i end1) (string-cursor>=? j end2) (not (eq? (string-cursor-ref prefix i) (string-cursor-ref str j)))) - j + (values i j) (lp (string-cursor-next prefix i) (string-cursor-next str j)))))) +;;> Returns two values: the first cursors from the right in +;;> \var{prefix} and in \var{str} where the two strings don't match. + (define (string-mismatch-right suffix str) (let ((end1 (string-cursor-start suffix)) (end2 (string-cursor-start str))) @@ -183,17 +189,14 @@ (if (or (string-cursor Returns true iff \var{prefix} is a prefix of \var{str}. (define (string-prefix? prefix str) - (string-cursor=? (string-cursor-end prefix) (string-mismatch prefix str))) + (call-with-values (lambda () (string-mismatch prefix str)) + (lambda (i j) (string-cursor=? (string-cursor-end prefix) i)))) ;;> Returns true iff \var{suffix} is a suffix of \var{str}. @@ -204,7 +207,9 @@ (string-cursor-start suffix)) (string-cursor-back str - (string-mismatch-right suffix str) + (call-with-values + (lambda () (string-mismatch-right suffix str)) + (lambda (i j) j)) diff))))) ;;> The fundamental string iterator. Calls \var{kons} on each diff --git a/lib/init-7.scm b/lib/init-7.scm index a3bc89b9..1c44145f 100644 --- a/lib/init-7.scm +++ b/lib/init-7.scm @@ -1,5 +1,5 @@ ;; init-7.scm -- core library procedures for R7RS -;; Copyright (c) 2009-2012 Alex Shinn. All rights reserved. +;; Copyright (c) 2009-2019 Alex Shinn. All rights reserved. ;; BSD-style license: http://synthcode.com/license.txt (define (caar x) (car (car x))) @@ -15,6 +15,10 @@ (cons kar kdr) (strip-syntactic-closures source))) +(define (not x) (if x #f #t)) +(define (boolean? x) (if (eq? x #t) #t (eq? x #f))) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; basic utils (define (procedure? x) (if (closure? x) #t (opcode? x))) @@ -378,14 +382,162 @@ (define-auxiliary-syntax unquote) (define-auxiliary-syntax unquote-splicing) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; SRFI-0 + +(define-syntax cond-expand + (er-macro-transformer + (lambda (expr rename compare) + (define (check x) + (if (pair? x) + (case (car x) + ((and) (every check (cdr x))) + ((or) (any check (cdr x))) + ((not) (not (check (cadr x)))) + ((library) (eval `(find-module ',(cadr x)) (%meta-env))) + (else (error "cond-expand: bad feature" x))) + (memq (identifier->symbol x) *features*))) + (let expand ((ls (cdr expr))) + (cond + ((null? ls)) ; (error "cond-expand: no expansions" expr) + ((not (pair? (car ls))) (error "cond-expand: bad clause" (car ls))) + ((eq? 'else (identifier->symbol (caar ls))) + (if (pair? (cdr ls)) + (error "cond-expand: else in non-final position") + `(,(rename 'begin) ,@(cdar ls)))) + ((check (caar ls)) `(,(rename 'begin) ,@(cdar ls))) + (else (expand (cdr ls)))))))) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; string cursors + +(define (string-copy str . o) + (apply substring str (if (pair? o) o '(0)))) + +(cond-expand + (safe-string-cursors + (define Safe-String-Cursor + (register-simple-type "Safe-String-Cursor" #f '(string where size))) + (define %make-string-cursor + (make-constructor "%make-string-cursor" Safe-String-Cursor)) + (set! string-cursor? + (make-type-predicate "string-cursor?" Safe-String-Cursor)) + (define string-cursor-string + (make-getter "string-cursor-string" Safe-String-Cursor 0)) + (define string-cursor-string-set! + (make-setter "string-cursor-string-set!" Safe-String-Cursor 0)) + (define string-cursor-where + (make-getter "string-cursor-where" Safe-String-Cursor 1)) + (define string-cursor-where-set! + (make-setter "string-cursor-where-set!" Safe-String-Cursor 1)) + (define string-cursor-size + (make-getter "string-cursor-size" Safe-String-Cursor 2)) + (define string-cursor-size-set! + (make-setter "string-cursor-size-set!" Safe-String-Cursor 2)) + (define (make-string-cursor string where size) + (let ((res (%make-string-cursor))) + (string-cursor-string-set! res string) + (string-cursor-where-set! res where) + (string-cursor-size-set! res size) + res)) + (define orig-string-cursor-offset string-cursor-offset) + (define orig-string-cursor->index string-cursor->index) + (define orig-string-index->cursor string-index->cursor) + (define orig-substring-cursor substring-cursor) + (define orig-string-cursor-end string-cursor-end) + (set! string-cursor-offset + (lambda (sc) (orig-string-cursor-offset (string-cursor-where sc)))) + (set! string-cursor->index + (lambda (str sc) (orig-string-cursor->index str (string-cursor-where sc)))) + (set! string-index->cursor + (lambda (str i) + (make-string-cursor str + (orig-string-index->cursor str i) + (string-size str)))) + (set! substring-cursor + (lambda (str start . o) + (if (pair? o) + (orig-substring-cursor str (string-cursor-where start) (string-cursor-where (car o))) + (orig-substring-cursor str (string-cursor-where start))))) + (define (string-cursor=? sc1 sc2 . o) + (and (equal? ((values string-cursor-offset) sc1) ((values string-cursor-offset) sc2)) + (or (null? o) (apply string-cursor=? sc2 o)))) + (define (string-cursor? sc1 sc2 . o) + (and (> ((values string-cursor-offset) sc1) ((values string-cursor-offset) sc2)) + (or (null? o) (apply string-cursor>? sc2 o)))) + (define (string-cursor>=? sc1 sc2 . o) + (and (>= ((values string-cursor-offset) sc1) ((values string-cursor-offset) sc2)) + (or (null? o) (apply string-cursor>=? sc2 o)))) + (define string-cursor-start + (let ((start (string-index->cursor "" 0))) + (lambda (s) (make-string-cursor s start (string-size s))))) + (set! string-cursor-end + (lambda (s) + (let ((end (orig-string-cursor-end s))) + (make-string-cursor s end (string-size s))))) + (define (string-size s) + (orig-string-cursor-offset (orig-string-cursor-end s))) + (define (validate-cursor str sc) + (cond + ((not (eq? str (string-cursor-string sc))) + (error "attempt to use string cursor on different string" str sc)) + ((not (= (string-size str) (string-cursor-size sc))) + (error "string has mutated since cursor was created" str sc)))) + (define orig-string-cursor-ref string-cursor-ref) + (define orig-string-cursor-next string-cursor-next) + (define orig-string-cursor-prev string-cursor-prev) + (set! string-cursor-ref + (lambda (str sc) + (validate-cursor str sc) + (orig-string-cursor-ref str (string-cursor-where sc)))) + (set! string-cursor-next + (lambda (str sc) + (validate-cursor str sc) + (make-string-cursor + str + (orig-string-cursor-next str (string-cursor-where sc)) + (string-cursor-size sc)))) + (set! string-cursor-prev + (lambda (str sc) + (validate-cursor str sc) + (make-string-cursor + str + (orig-string-cursor-prev str (string-cursor-where sc)) + (string-cursor-size sc))))) + (full-unicode + (define string-cursor=? eq?) + (define string-cursor-start + (let ((start (string-index->cursor "" 0))) + (lambda (s) start))) + (define (string-size s) + (string-cursor-offset (string-cursor-end s)))) + (else + (define string-cursor? fixnum?) + (define string-cursor=? eq?) + (define string-cursor? >) + (define string-cursor>=? >=) + (define (string-index->cursor str i) i) + (define (string-cursor->index str off) off) + (define (string-cursor-offset str off) off) + (define string-size string-length) + (define substring-cursor substring) + (define (string-cursor-start s) 0) + (define string-cursor-end string-length) + (define string-cursor-ref string-ref) + (define (string-cursor-next s i) (+ i 1)) + (define (string-cursor-prev s i) (- i 1)))) + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; library functions -;; booleans - -(define (not x) (if x #f #t)) -(define (boolean? x) (if (eq? x #t) #t (eq? x #f))) - ;; char utils (define (char-alphabetic? ch) (<= 65 (char->integer (char-upcase ch)) 90)) @@ -631,31 +783,6 @@ (apply consumer (cdr res)) (consumer res)))) -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; SRFI-0 - -(define-syntax cond-expand - (er-macro-transformer - (lambda (expr rename compare) - (define (check x) - (if (pair? x) - (case (car x) - ((and) (every check (cdr x))) - ((or) (any check (cdr x))) - ((not) (not (check (cadr x)))) - ((library) (eval `(find-module ',(cadr x)) (%meta-env))) - (else (error "cond-expand: bad feature" x))) - (memq (identifier->symbol x) *features*))) - (let expand ((ls (cdr expr))) - (cond ((null? ls)) ; (error "cond-expand: no expansions" expr) - ((not (pair? (car ls))) (error "cond-expand: bad clause" (car ls))) - ((eq? 'else (identifier->symbol (caar ls))) - (if (pair? (cdr ls)) - (error "cond-expand: else in non-final position") - `(,(rename 'begin) ,@(cdar ls)))) - ((check (caar ls)) `(,(rename 'begin) ,@(cdar ls))) - (else (expand (cdr ls)))))))) - ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; dynamic-wind @@ -1346,35 +1473,3 @@ (* (if (eqv? y -0.0) -1 1) (if (eqv? x -0.0) 3.141592653589793 x)) (atan1 (/ y x)))))))) - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; string cursors - -(define (string-copy str . o) - (apply substring str (if (pair? o) o '(0)))) - -(define string-cursor=? eq?) - -(cond-expand - (full-unicode - (define string-cursor-start - (let ((start (string-index->cursor "" 0))) - (lambda (s) start))) - (define (string-size s) - (string-cursor-offset (string-cursor-end s)))) - (else - (define string-cursor? fixnum?) - (define string-cursor? >) - (define string-cursor>=? >=) - (define (string-index->cursor str i) i) - (define (string-cursor->index str off) off) - (define (string-cursor-offset str off) off) - (define string-size string-length) - (define substring-cursor substring) - (define (string-cursor-start s) 0) - (define string-cursor-end string-length) - (define string-cursor-ref string-ref) - (define (string-cursor-next s i) (+ i 1)) - (define (string-cursor-prev s i) (- i 1)))) diff --git a/lib/srfi/130.scm b/lib/srfi/130.scm index 96532197..34462bfc 100644 --- a/lib/srfi/130.scm +++ b/lib/srfi/130.scm @@ -131,11 +131,13 @@ (define (string-prefix-length s1 s2 . o) (let ((s1 (string-arg s1 o)) (s2 (if (and (pair? o) (pair? (cdr o))) (string-arg s2 (cddr o)) s2))) - (string-cursor->index s1 (string-mismatch s1 s2)))) + (call-with-values (lambda () (string-mismatch s1 s2)) + (lambda (i j) (string-cursor->index s1 i))))) (define (string-suffix-length s1 s2 . o) (let* ((s1 (string-arg s1 o)) (s2 (if (and (pair? o) (pair? (cdr o))) (string-arg s2 (cddr o)) s2)) - (mismatch (string-mismatch-right s2 s1))) + (mismatch (call-with-values (lambda () (string-mismatch-right s2 s1)) + (lambda (i j) j)))) (string-cursor-diff s1 (string-cursor-next s1 mismatch) (string-cursor-end s1)))) @@ -238,9 +240,13 @@ (apply string-fold (lambda (ch n) (if (pred ch) (+ n 1) n)) 0 str o)) (define (string-replace s1 s2 start1 end1 . o) - (string-append (substring/cursors s1 0 start1) - (string-arg s2 o) - (substring/cursors s1 end1 (string-cursor-end s1)))) + (if (string-cursor? start1) + (string-append (substring/cursors s1 (string-cursor-start s1) start1) + (string-arg s2 o) + (substring/cursors s1 end1 (string-cursor-end s1))) + (string-append (substring s1 0 start1) + (string-arg s2 o) + (substring s1 end1 (string-length s1))))) (define (string-split str delim . o) (let* ((delim-len (string-length delim)) diff --git a/opcodes.c b/opcodes.c index 744bb46d..9add203e 100644 --- a/opcodes.c +++ b/opcodes.c @@ -225,13 +225,13 @@ _FN1(_I(SEXP_NUMBER), _I(SEXP_NUMBER), "ceiling", 0, sexp_ceiling), #endif _FN2(_I(SEXP_NUMBER), _I(SEXP_NUMBER), _I(SEXP_NUMBER), "expt", 0, sexp_expt_op), #if SEXP_USE_UTF8_STRINGS -_FN2(_I(SEXP_FIXNUM), _I(SEXP_STRING), _I(SEXP_FIXNUM), "string-index->cursor", 0, sexp_string_index_to_cursor), -_FN2(_I(SEXP_FIXNUM), _I(SEXP_STRING), _I(SEXP_FIXNUM), "string-cursor->index", 0, sexp_string_cursor_to_index), +_FN2(_I(SEXP_STRING_CURSOR), _I(SEXP_STRING), _I(SEXP_FIXNUM), "string-index->cursor", 0, sexp_string_index_to_cursor), +_FN2(_I(SEXP_FIXNUM), _I(SEXP_STRING), _I(SEXP_STRING_CURSOR), "string-cursor->index", 0, sexp_string_cursor_to_index), _FN2(_I(SEXP_CHAR), _I(SEXP_STRING), _I(SEXP_FIXNUM), "string-ref", 0, sexp_string_utf8_index_ref), #if SEXP_USE_MUTABLE_STRINGS _FN3(SEXP_VOID, _I(SEXP_STRING), _I(SEXP_FIXNUM), _I(SEXP_CHAR), "string-set!", 0, sexp_string_utf8_index_set), #endif -_FN3OPT(_I(SEXP_STRING), _I(SEXP_STRING), _I(SEXP_FIXNUM), _I(SEXP_FIXNUM), "substring-cursor", SEXP_FALSE, sexp_substring_op), +_FN3OPT(_I(SEXP_STRING), _I(SEXP_STRING), _I(SEXP_STRING_CURSOR), _I(SEXP_STRING_CURSOR), "substring-cursor", SEXP_FALSE, sexp_substring_op), _FN3OPT(_I(SEXP_STRING), _I(SEXP_STRING), _I(SEXP_FIXNUM), _I(SEXP_FIXNUM), "substring", SEXP_FALSE, sexp_utf8_substring_op), #else _FN3OPT(_I(SEXP_STRING), _I(SEXP_STRING), _I(SEXP_FIXNUM), _I(SEXP_FIXNUM), "substring", SEXP_FALSE, sexp_substring_op),