diff --git a/lib/chibi/uri.module b/lib/chibi/uri.module new file mode 100644 index 00000000..825ccd45 --- /dev/null +++ b/lib/chibi/uri.module @@ -0,0 +1,10 @@ + +(define-module (chibi uri) + (export uri->string make-uri string->uri + uri-scheme uri-user uri-host uri-path uri-query uri-fragment + uri-with-scheme uri-with-user uri-with-host uri-with-path + uri-with-query uri-with-fragment + uri-encode uri-decode uri-query->alist uri-alist->query) + (import (scheme) + (srfi 9)) + (include "uri.scm")) diff --git a/lib/chibi/uri.scm b/lib/chibi/uri.scm new file mode 100644 index 00000000..4386837a --- /dev/null +++ b/lib/chibi/uri.scm @@ -0,0 +1,303 @@ + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; URI representation + +(define-record-type uri + (%make-uri scheme user host port path query fragment) + uri? + (scheme uri-scheme) + (user uri-user) + (host uri-host) + (port uri-port) + (path uri-path) + (query uri-query) + (fragment uri-fragment)) + +;; (make-uri scheme [user host port path query fragment]) +(define (make-uri scheme . o) + (let* ((user (if (pair? o) (car o) #f)) + (o (if (pair? o) (cdr o) '())) + (host (if (pair? o) (car o) #f)) + (o (if (pair? o) (cdr o) '())) + (port (if (pair? o) (car o) #f)) + (o (if (pair? o) (cdr o) '())) + (path (if (pair? o) (car o) #f)) + (o (if (pair? o) (cdr o) '())) + (query (if (pair? o) (car o) #f)) + (o (if (pair? o) (cdr o) '())) + (fragment (if (and (pair? o) (pair? (cdr o))) (car (cdr o)) #f))) + (%make-uri scheme user host port path query fragment))) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; string utils (don't feel like using SRFI-13 and these are more +;; specialised) + +(define (string-scan str ch . o) + (let ((start (if (pair? o) (car o) 0)) + (end (if (and (pair? o) (pair? (cdr o))) (cadr o) (string-length str)))) + (let lp ((i start)) + (and (< i end) + (if (eqv? ch (string-ref str i)) + i + (lp (+ i 1))))))) + +(define (string-scan-right str ch . o) + (let ((start (if (pair? o) (car o) 0)) + (end (if (and (pair? o) (pair? (cdr o))) (cadr o) (string-length str)))) + (let lp ((i (- end 1))) + (and (>= i start) + (if (eqv? ch (string-ref str i)) + i + (lp (- i 1))))))) + +(define (string-index-of str pred . o) + (let ((start (if (pair? o) (car o) 0)) + (end (if (and (pair? o) (pair? (cdr o))) (cadr o) (string-length str)))) + (let lp ((i start)) + (cond ((>= i end) #f) + ((pred (string-ref str i)) i) + (else (lp (+ i 1))))))) + +(define (string-downcase->symbol str) + (let ((len (string-length str))) + (let lp ((i 0)) + (cond + ((= i len) + (string->symbol str)) + ((char-upper-case? (string-ref str i)) + (let ((res (make-string len))) + (do ((j 0 (+ j 1))) + ((= j i)) + (string-set! res j (string-ref str j))) + (string-set! res i (char-downcase (string-ref str i))) + (do ((j (+ i 1) (+ j 1))) + ((= j len)) + (string-set! res j (char-downcase (string-ref str j)))) + (string->symbol res))) + (else + (lp (+ i 1))))))) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; functional updaters (uses as much shared state as possible) + +(define (uri-with-scheme u scheme) + (%make-uri scheme (uri-user u) (uri-host u) (uri-port u) + (uri-path u) (uri-query u) (uri-fragment u))) + +(define (uri-with-user u user) + (%make-uri (uri-scheme u) user (uri-host u) (uri-port u) + (uri-path u) (uri-query u) (uri-fragment u))) + +(define (uri-with-host u host) + (%make-uri (uri-scheme u) (uri-user u) host (uri-port u) + (uri-path u) (uri-query u) (uri-fragment u))) + +(define (uri-with-port u port) + (%make-uri (uri-scheme u) (uri-user u) (uri-host u) port + (uri-path u) (uri-query u) (uri-fragment u))) + +(define (uri-with-path u path) + (%make-uri (uri-scheme u) (uri-user u) (uri-host u) (uri-port u) + path (uri-query u) (uri-fragment u))) + +(define (uri-with-query u query) + (%make-uri (uri-scheme u) (uri-user u) (uri-host u) (uri-port u) + (uri-path u) query (uri-fragment u))) + +(define (uri-with-fragment u fragment) + (%make-uri (uri-scheme u) (uri-user u) (uri-host u) (uri-port u) + (uri-path u) (uri-query u) fragment)) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; parsing - without :// we just split into scheme & path + +(define (char-uri-scheme-unsafe? ch) + (not (or (char-alphabetic? ch) (char-numeric? ch) (memv ch '(#\_ #\-))))) + +(define (string->path-uri scheme str . o) + (define decode? (and (pair? o) (car o))) + (define decode (if decode? uri-decode (lambda (x) x))) + (define decode-query + (if (and (pair? o) (pair? (cdr o)) (cadr o)) + uri-query->alist + decode)) + (if (pair? str) + str + (let* ((len (string-length str)) + (colon0 (string-scan str #\:)) + (colon + (and (not (string-index-of str char-uri-scheme-unsafe? + 0 (or colon0 len))) + colon0))) + (if (or (not colon) (zero? colon)) + (and scheme + (let* ((quest (string-scan str #\? 0)) + (pound (string-scan str #\# (or quest 0)))) + (make-uri scheme #f #f #f + (decode (substring str 0 (or quest pound len))) + (and quest + (decode-query + (substring str (+ quest 1) (or pound len)))) + (and pound + (decode (substring str (+ pound 1) len)))))) + (let ((sc1 (+ colon 1)) + (scheme (string-downcase->symbol (substring str 0 colon)))) + (if (= sc1 len) + (make-uri scheme) + (if (or (>= (+ sc1 1) len) + (not (and (eqv? #\/ (string-ref str sc1)) + (eqv? #\/ (string-ref str (+ sc1 1)))))) + (make-uri scheme #f #f #f (substring str sc1 len)) + (if (>= (+ sc1 2) len) + (make-uri scheme #f "") + (let* ((sc2 (+ sc1 2)) + (slash (string-scan str #\/ sc2)) + (sc3 (or slash len)) + (at (string-scan-right str #\@ sc2 sc3)) + (colon3 (string-scan str #\: (or at sc2) sc3)) + (quest (string-scan str #\? sc3)) + (pound (string-scan str #\# (or quest sc3)))) + (%make-uri + scheme + (and at (decode (substring str sc2 at))) + (decode + (substring str + (if at (+ at 1) sc2) + (or colon3 sc3))) + (and colon3 + (string->number + (substring str (+ colon3 1) sc3))) + (and slash + (decode + (substring str slash (or quest pound len)))) + (and quest + (decode-query + (substring str (+ quest 1) + (or pound len)))) + (and pound + (decode (substring str (+ pound 1) len))) + )))))))))) + +(define (string->uri str . o) + (apply string->path-uri #f str o)) + +(define (uri->string uri . o) + (define encode? (and (pair? o) (car o))) + (define encode (if encode? uri-encode (lambda (x) x))) + (if (string? uri) + uri + (let ((fragment (uri-fragment uri)) + (query (uri-query uri)) + (path (uri-path uri)) + (port (uri-port uri)) + (host (uri-host uri)) + (user (uri-user uri))) + (string-append + (symbol->string (uri-scheme uri)) ":" + (if (or user host port) "//" "") + (if user (encode user) "") (if user "@" "") + (or host "") ; host shouldn't need encoding + (if port ":" "") (if port (number->string port) "") + (if path (encode path) "") + (if query "?" "") + (if (pair? query) (uri-alist->query query) (or query "")) + (if fragment "#" "") (if fragment (encode fragment) ""))))) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; query encoding and decoding + +(define (uri-safe-char? ch) + (or (char-alphabetic? ch) + (char-numeric? ch) + (case ch + ((#\- #\_ #\. #\! #\~ #\* #\' #\( #\)) #t) + (else #f)))) + +(define (collect str from to res) + (if (>= from to) + res + (cons (substring str from to) res))) + +(define (uri-encode str . o) + (define (encode-1-space ch) + (if (eqv? ch #\space) + "+" + (encode-1-normal ch))) + (define (encode-1-normal ch) + (let* ((i (char->integer ch)) + (hex (number->string i 16))) + (if (< i 16) + (string-append "%0" hex) + (string-append "%" hex)))) + (let ((start 0) + (end (string-length str)) + (encode-1 (if (and (pair? o) (car o)) + encode-1-space + encode-1-normal))) + (let lp ((from start) (to start) (res '())) + (if (>= to end) + (if (zero? from) + str + (string-concatenate (reverse (collect str from to res)))) + (let* ((ch (string-ref str to)) + (next (+ to 1))) + (if (uri-safe-char? ch) + (lp from next res) + (lp next next (cons (encode-1 ch) + (collect str from to res))))))))) + +(define (uri-decode str . o) + (let ((space-as-plus? (and (pair? o) (car o))) + (start 0) + (end (string-length str))) + (let lp ((from start) (to start) (res '())) + (if (>= to end) + (if (zero? from) + str + (string-concatenate (reverse (collect str from to res)))) + (let* ((ch (string-ref str to)) + (next (+ to 1))) + (cond + ((eqv? ch #\%) + (if (>= next end) + (lp next next (collect str from to res)) + (let ((next2 (+ next 1))) + (if (>= next2 end) + (lp next2 next2 (collect str from to res)) + (let* ((next3 (+ next2 1)) + (hex (substring str next next3)) + (i (string->number hex 16))) + (lp next3 next3 (cons (string (integer->char i)) + (collect str from to res)))))))) + ((and space-as-plus? (eqv? ch #\+)) + (lp next next (cons " " (collect str from to res)))) + (else + (lp from next res)))))))) + +(define (uri-query->alist str . o) + (define (split-char? c) (if (eqv? c #\&) #t (eqv? c #\;))) + (let ((len (string-length str)) + (plus? (and (pair? o) (car o)))) + (let lp ((i 0) (res '())) + (if (>= i len) + (reverse res) + (let* ((j (or (string-index-of str split-char? i) len)) + (k (string-scan str #\= i j)) + (cell (if k + (cons (uri-decode (substring str i k) plus?) + (uri-decode (substring str (+ k 1) j) plus?)) + (cons (uri-decode (substring str i j) plus?) #f)))) + (lp (+ j 1) (cons cell res))))))) + +(define (uri-alist->query ls . o) + (define plus? (and (pair? o) (car o))) + (define (encode key val res) + (let ((res (cons (uri-encode key plus?) res))) + (if val (cons (uri-encode val plus?) (cons "=" res)) res))) + (if (null? ls) + "" + (let lp ((x (car ls)) (ls (cdr ls)) (res '())) + (let ((res (encode (car x) (cdr x) res))) + (if (null? ls) + (string-concatenate (reverse res)) + (lp (car ls) (cdr ls) (cons "&" res)))))))