mirror of
https://github.com/justinethier/cyclone.git
synced 2025-07-07 13:16:34 +02:00
Merge branch 'if-inline-dev'
This commit is contained in:
commit
19f15e6353
5 changed files with 313 additions and 31 deletions
|
@ -368,6 +368,8 @@ object Cyc_sum(void *data, object cont, int argc, object n, ...);
|
||||||
object Cyc_sub(void *data, object cont, int argc, object n, ...);
|
object Cyc_sub(void *data, object cont, int argc, object n, ...);
|
||||||
object Cyc_mul(void *data, object cont, int argc, object n, ...);
|
object Cyc_mul(void *data, object cont, int argc, object n, ...);
|
||||||
object Cyc_div(void *data, object cont, int argc, object n, ...);
|
object Cyc_div(void *data, object cont, int argc, object n, ...);
|
||||||
|
// Future idea, there may be uses for this in addition to if statements:
|
||||||
|
#define Cyc_if(c,t,e) ((boolean_f != c) ? (t) : (e))
|
||||||
object Cyc_fast_sum(void *data, object ptr, object x, object y);
|
object Cyc_fast_sum(void *data, object ptr, object x, object y);
|
||||||
object Cyc_fast_sub(void *data, object ptr, object x, object y);
|
object Cyc_fast_sub(void *data, object ptr, object x, object y);
|
||||||
object Cyc_fast_mul(void *data, object ptr, object x, object y);
|
object Cyc_fast_mul(void *data, object ptr, object x, object y);
|
||||||
|
|
|
@ -764,7 +764,8 @@
|
||||||
(apply f cars)
|
(apply f cars)
|
||||||
(recur cdrs)))))
|
(recur cdrs)))))
|
||||||
;; Fast path.
|
;; Fast path.
|
||||||
(if (eq? 1 (length lis1))
|
;(if (eq? 1 (length lis1))
|
||||||
|
(if (null? (cdr lis1)) ;; O(1) instead of O(n) for length
|
||||||
(f (car lis1))
|
(f (car lis1))
|
||||||
(begin (f (car lis1))
|
(begin (f (car lis1))
|
||||||
(for-each f (cdr lis1)))))))
|
(for-each f (cdr lis1)))))))
|
||||||
|
|
|
@ -46,6 +46,8 @@
|
||||||
adbv:set-const!
|
adbv:set-const!
|
||||||
adbv:const-value
|
adbv:const-value
|
||||||
adbv:set-const-value!
|
adbv:set-const-value!
|
||||||
|
adbv:ref-count
|
||||||
|
adbv:set-ref-count!
|
||||||
adbv:ref-by
|
adbv:ref-by
|
||||||
adbv:set-ref-by!
|
adbv:set-ref-by!
|
||||||
;; Analyze functions
|
;; Analyze functions
|
||||||
|
@ -68,7 +70,8 @@
|
||||||
(%adb:make-var
|
(%adb:make-var
|
||||||
global defined-by
|
global defined-by
|
||||||
defines-lambda-id
|
defines-lambda-id
|
||||||
const const-value ref-by
|
const const-value
|
||||||
|
ref-count ref-by
|
||||||
reassigned assigned-value
|
reassigned assigned-value
|
||||||
app-fnc-count app-arg-count
|
app-fnc-count app-arg-count
|
||||||
inlinable mutated-indirectly
|
inlinable mutated-indirectly
|
||||||
|
@ -79,6 +82,7 @@
|
||||||
(defines-lambda-id adbv:defines-lambda-id adbv:set-defines-lambda-id!)
|
(defines-lambda-id adbv:defines-lambda-id adbv:set-defines-lambda-id!)
|
||||||
(const adbv:const? adbv:set-const!)
|
(const adbv:const? adbv:set-const!)
|
||||||
(const-value adbv:const-value adbv:set-const-value!)
|
(const-value adbv:const-value adbv:set-const-value!)
|
||||||
|
(ref-count adbv:ref-count adbv:set-ref-count!)
|
||||||
(ref-by adbv:ref-by adbv:set-ref-by!)
|
(ref-by adbv:ref-by adbv:set-ref-by!)
|
||||||
;; TODO: need to set reassigned flag if variable is SET, however there is at least
|
;; TODO: need to set reassigned flag if variable is SET, however there is at least
|
||||||
;; one exception for local define's, which are initialized to #f and then assigned
|
;; one exception for local define's, which are initialized to #f and then assigned
|
||||||
|
@ -122,7 +126,7 @@
|
||||||
)
|
)
|
||||||
|
|
||||||
(define (adb:make-var)
|
(define (adb:make-var)
|
||||||
(%adb:make-var '? '? #f #f #f '() #f #f 0 0 #t #f #f))
|
(%adb:make-var '? '? #f #f #f 0 '() #f #f 0 0 #t #f #f))
|
||||||
|
|
||||||
(define-record-type <analysis-db-function>
|
(define-record-type <analysis-db-function>
|
||||||
(%adb:make-fnc simple unused-params assigned-to-var side-effects)
|
(%adb:make-fnc simple unused-params assigned-to-var side-effects)
|
||||||
|
@ -164,6 +168,10 @@
|
||||||
(let ((var (adb:get/default sym (adb:make-var))))
|
(let ((var (adb:get/default sym (adb:make-var))))
|
||||||
(fnc var)))
|
(fnc var)))
|
||||||
|
|
||||||
|
(define (with-fnc id callback)
|
||||||
|
(let ((fnc (adb:get/default id (adb:make-fnc))))
|
||||||
|
(callback fnc)))
|
||||||
|
|
||||||
(define (with-fnc! id callback)
|
(define (with-fnc! id callback)
|
||||||
(let ((fnc (adb:get/default id (adb:make-fnc))))
|
(let ((fnc (adb:get/default id (adb:make-fnc))))
|
||||||
(callback fnc)
|
(callback fnc)
|
||||||
|
@ -172,34 +180,6 @@
|
||||||
;; Determine if the given top-level function can be freed from CPS, due
|
;; Determine if the given top-level function can be freed from CPS, due
|
||||||
;; to it only containing calls to code that itself can be inlined.
|
;; to it only containing calls to code that itself can be inlined.
|
||||||
(define (inlinable-top-level-lambda? expr)
|
(define (inlinable-top-level-lambda? expr)
|
||||||
;; TODO: consolidate with same function in cps-optimizations module
|
|
||||||
(define (prim-creates-mutable-obj? prim)
|
|
||||||
(member
|
|
||||||
prim
|
|
||||||
'(
|
|
||||||
apply ;; ??
|
|
||||||
cons
|
|
||||||
make-vector
|
|
||||||
make-bytevector
|
|
||||||
bytevector
|
|
||||||
bytevector-append
|
|
||||||
bytevector-copy
|
|
||||||
string->utf8
|
|
||||||
number->string
|
|
||||||
symbol->string
|
|
||||||
list->string
|
|
||||||
utf8->string
|
|
||||||
read-line
|
|
||||||
string-append
|
|
||||||
string
|
|
||||||
substring
|
|
||||||
Cyc-installation-dir
|
|
||||||
Cyc-compilation-environment
|
|
||||||
Cyc-bytevector-copy
|
|
||||||
Cyc-utf8->string
|
|
||||||
Cyc-string->utf8
|
|
||||||
list->vector
|
|
||||||
)))
|
|
||||||
(define (scan expr fail)
|
(define (scan expr fail)
|
||||||
(cond
|
(cond
|
||||||
((string? expr) (fail))
|
((string? expr) (fail))
|
||||||
|
@ -265,6 +245,48 @@
|
||||||
(k #t))))))) ;; Scanned fine, return #t
|
(k #t))))))) ;; Scanned fine, return #t
|
||||||
(else #f)))
|
(else #f)))
|
||||||
|
|
||||||
|
;; Scan given if expression to determine if an inline is safe.
|
||||||
|
;; Returns #f if not, the new if expression otherwise.
|
||||||
|
(define (inline-if:scan-and-replace expr kont)
|
||||||
|
(define (scan expr fail)
|
||||||
|
;(trace:error `(inline-if:scan-and-replace:scan ,expr))
|
||||||
|
(cond
|
||||||
|
((ast:lambda? expr) (fail))
|
||||||
|
((string? expr) (fail))
|
||||||
|
((bytevector? expr) (fail))
|
||||||
|
((const? expr) expr) ;; Good enough? what about large vectors or anything requiring alloca (strings, bytevectors, what else?)
|
||||||
|
((ref? expr) expr)
|
||||||
|
((if? expr)
|
||||||
|
`(Cyc-if ,(scan (if->condition expr) fail)
|
||||||
|
,(scan (if->then expr) fail)
|
||||||
|
,(scan (if->else expr) fail)))
|
||||||
|
((app? expr)
|
||||||
|
(let ((fnc (car expr)))
|
||||||
|
;; If function needs CPS, fail right away
|
||||||
|
(cond
|
||||||
|
((equal? (car expr) kont)
|
||||||
|
;; Get rid of the continuation
|
||||||
|
(scan (cadr expr) fail))
|
||||||
|
((or (not (prim? fnc))
|
||||||
|
(prim:cont? fnc)
|
||||||
|
(prim:mutates? fnc)
|
||||||
|
(prim-creates-mutable-obj? fnc)
|
||||||
|
)
|
||||||
|
(fail))
|
||||||
|
(else
|
||||||
|
;; Otherwise, check for valid args
|
||||||
|
(cons
|
||||||
|
(car expr)
|
||||||
|
(map
|
||||||
|
(lambda (e)
|
||||||
|
(scan e fail))
|
||||||
|
(cdr expr)))))))
|
||||||
|
;; Reject everything else - define, set, lambda
|
||||||
|
(else (fail))))
|
||||||
|
(call/cc
|
||||||
|
(lambda (return)
|
||||||
|
(scan expr (lambda () (return #f))))))
|
||||||
|
|
||||||
(define (analyze-find-lambdas exp lid)
|
(define (analyze-find-lambdas exp lid)
|
||||||
(cond
|
(cond
|
||||||
((ast:lambda? exp)
|
((ast:lambda? exp)
|
||||||
|
@ -389,12 +411,14 @@
|
||||||
((quote? exp) #f)
|
((quote? exp) #f)
|
||||||
((ref? exp)
|
((ref? exp)
|
||||||
(let ((var (adb:get/default exp (adb:make-var))))
|
(let ((var (adb:get/default exp (adb:make-var))))
|
||||||
|
(adbv:set-ref-count! var (+ 1 (adbv:ref-count var)))
|
||||||
(adbv:set-ref-by! var (cons lid (adbv:ref-by var)))
|
(adbv:set-ref-by! var (cons lid (adbv:ref-by var)))
|
||||||
))
|
))
|
||||||
((define? exp)
|
((define? exp)
|
||||||
;(let ((var (adb:get/default (define->var exp) (adb:make-var))))
|
;(let ((var (adb:get/default (define->var exp) (adb:make-var))))
|
||||||
(with-var! (define->var exp) (lambda (var)
|
(with-var! (define->var exp) (lambda (var)
|
||||||
(adbv:set-defined-by! var lid)
|
(adbv:set-defined-by! var lid)
|
||||||
|
(adbv:set-ref-count! var (+ 1 (adbv:ref-count var)))
|
||||||
(adbv:set-ref-by! var (cons lid (adbv:ref-by var)))
|
(adbv:set-ref-by! var (cons lid (adbv:ref-by var)))
|
||||||
(adbv-set-assigned-value-helper! (define->var exp) var (define->exp exp))
|
(adbv-set-assigned-value-helper! (define->var exp) var (define->exp exp))
|
||||||
(adbv:set-const! var #f)
|
(adbv:set-const! var #f)
|
||||||
|
@ -406,6 +430,7 @@
|
||||||
(if (adbv:assigned-value var)
|
(if (adbv:assigned-value var)
|
||||||
(adbv:set-reassigned! var #t))
|
(adbv:set-reassigned! var #t))
|
||||||
(adbv-set-assigned-value-helper! (set!->var exp) var (set!->exp exp))
|
(adbv-set-assigned-value-helper! (set!->var exp) var (set!->exp exp))
|
||||||
|
(adbv:set-ref-count! var (+ 1 (adbv:ref-count var)))
|
||||||
(adbv:set-ref-by! var (cons lid (adbv:ref-by var)))
|
(adbv:set-ref-by! var (cons lid (adbv:ref-by var)))
|
||||||
(adbv:set-const! var #f)
|
(adbv:set-const! var #f)
|
||||||
(adbv:set-const-value! var #f)))
|
(adbv:set-const-value! var #f)))
|
||||||
|
@ -492,6 +517,7 @@
|
||||||
;; TODO:
|
;; TODO:
|
||||||
; ((ref? exp)
|
; ((ref? exp)
|
||||||
; (let ((var (adb:get/default exp (adb:make-var))))
|
; (let ((var (adb:get/default exp (adb:make-var))))
|
||||||
|
; (adbv:set-ref-count! var (+ 1 (adbv:ref-count var)))
|
||||||
; (adbv:set-ref-by! var (cons lid (adbv:ref-by var)))
|
; (adbv:set-ref-by! var (cons lid (adbv:ref-by var)))
|
||||||
; ))
|
; ))
|
||||||
((define? exp)
|
((define? exp)
|
||||||
|
@ -831,6 +857,67 @@
|
||||||
(set! args (cdr args)))
|
(set! args (cdr args)))
|
||||||
(ast:lambda-formals->list (car exp))))
|
(ast:lambda-formals->list (car exp))))
|
||||||
(opt:inline-prims (car (ast:lambda-body (car exp))) refs))
|
(opt:inline-prims (car (ast:lambda-body (car exp))) refs))
|
||||||
|
;; Issue #201 - Attempt to identify case where an if can be inlined
|
||||||
|
((and #f ;; TODO: Disabling for now, see issue for more info
|
||||||
|
(= (length exp) 2)
|
||||||
|
(ast:lambda? (car exp))
|
||||||
|
(ast:lambda? (cadr exp))
|
||||||
|
(ast:lambda-has-cont (car exp))
|
||||||
|
(= 1 (length (ast:lambda-formals->list (car exp))))
|
||||||
|
(= 1 (length (ast:lambda-formals->list (cadr exp))))
|
||||||
|
(if? (car (ast:lambda-body (car exp))))
|
||||||
|
;; TODO: think we can get rid of this simplification now
|
||||||
|
;; Simplification, for now only allow then/else that call a cont
|
||||||
|
;; immediately, to prevent having to scan/rewrite those expressions
|
||||||
|
(let ((if-exp (car (ast:lambda-body (car exp))))
|
||||||
|
(kont (car (ast:lambda-formals->list (car exp)))))
|
||||||
|
(and
|
||||||
|
(app? (if->then if-exp))
|
||||||
|
(app? (if->else if-exp))
|
||||||
|
;(equal? kont (car (if->then if-exp)))
|
||||||
|
;(equal? kont (car (if->else if-exp)))
|
||||||
|
))
|
||||||
|
;;
|
||||||
|
(not
|
||||||
|
(with-fnc (ast:lambda-id (car exp)) (lambda (fnc)
|
||||||
|
(adbf:side-effects fnc))))
|
||||||
|
)
|
||||||
|
;(trace:error `(DEBUG2 ,exp))
|
||||||
|
(let* ((new-exp (car (ast:lambda-body (cadr exp))))
|
||||||
|
(old-if (car (ast:lambda-body (car exp))))
|
||||||
|
(old-k (car (ast:lambda-formals->list (car exp))))
|
||||||
|
(old-arg (car (ast:lambda-formals->list (cadr exp))))
|
||||||
|
; TODO: what about nested if's? may need another pass above to make sure
|
||||||
|
;; the if is simple enough to inline
|
||||||
|
;TODO: can logic from inlinable-top-level-lambda? be repurposed to
|
||||||
|
;scan old-if to make sure everything is inlinable???
|
||||||
|
(new-if
|
||||||
|
(inline-if:scan-and-replace
|
||||||
|
`(Cyc-if ,(if->condition old-if)
|
||||||
|
,(if->then old-if)
|
||||||
|
,(if->else old-if))
|
||||||
|
old-k))
|
||||||
|
)
|
||||||
|
#;(trace:error `(DEBUG if inline candidate
|
||||||
|
,exp
|
||||||
|
old-k:
|
||||||
|
,old-k
|
||||||
|
old-arg:
|
||||||
|
,old-arg
|
||||||
|
new-if:
|
||||||
|
,new-if
|
||||||
|
new-exp:
|
||||||
|
,new-exp
|
||||||
|
))
|
||||||
|
|
||||||
|
(cond
|
||||||
|
(new-if
|
||||||
|
(hash-table-set! refs old-arg new-if)
|
||||||
|
(opt:inline-prims new-exp refs))
|
||||||
|
(else
|
||||||
|
;; Could not inline
|
||||||
|
(map (lambda (e) (opt:inline-prims e refs)) exp)))
|
||||||
|
)) ;;
|
||||||
(else
|
(else
|
||||||
(map (lambda (e) (opt:inline-prims e refs)) exp))))
|
(map (lambda (e) (opt:inline-prims e refs)) exp))))
|
||||||
(else
|
(else
|
||||||
|
|
|
@ -80,6 +80,7 @@
|
||||||
Cyc-stdin
|
Cyc-stdin
|
||||||
Cyc-stderr
|
Cyc-stderr
|
||||||
Cyc-list
|
Cyc-list
|
||||||
|
Cyc-if
|
||||||
Cyc-fast-plus
|
Cyc-fast-plus
|
||||||
Cyc-fast-sub
|
Cyc-fast-sub
|
||||||
Cyc-fast-mul
|
Cyc-fast-mul
|
||||||
|
@ -216,6 +217,7 @@
|
||||||
(Cyc-stdout 0 0)
|
(Cyc-stdout 0 0)
|
||||||
(Cyc-stdin 0 0)
|
(Cyc-stdin 0 0)
|
||||||
(Cyc-stderr 0 0)
|
(Cyc-stderr 0 0)
|
||||||
|
(Cyc-if 3 3)
|
||||||
(Cyc-fast-plus 2 2)
|
(Cyc-fast-plus 2 2)
|
||||||
(Cyc-fast-sub 2 2)
|
(Cyc-fast-sub 2 2)
|
||||||
(Cyc-fast-mul 2 2)
|
(Cyc-fast-mul 2 2)
|
||||||
|
@ -459,6 +461,7 @@
|
||||||
((eq? p 'Cyc-stdin) "Cyc_stdin")
|
((eq? p 'Cyc-stdin) "Cyc_stdin")
|
||||||
((eq? p 'Cyc-stderr) "Cyc_stderr")
|
((eq? p 'Cyc-stderr) "Cyc_stderr")
|
||||||
((eq? p 'Cyc-list) "Cyc_list")
|
((eq? p 'Cyc-list) "Cyc_list")
|
||||||
|
((eq? p 'Cyc-if) "Cyc_if")
|
||||||
((eq? p 'Cyc-fast-plus) "Cyc_fast_sum")
|
((eq? p 'Cyc-fast-plus) "Cyc_fast_sum")
|
||||||
((eq? p 'Cyc-fast-sub) "Cyc_fast_sub")
|
((eq? p 'Cyc-fast-sub) "Cyc_fast_sub")
|
||||||
((eq? p 'Cyc-fast-mul) "Cyc_fast_mul")
|
((eq? p 'Cyc-fast-mul) "Cyc_fast_mul")
|
||||||
|
|
189
tail-blog-post-notes.md
Normal file
189
tail-blog-post-notes.md
Normal file
|
@ -0,0 +1,189 @@
|
||||||
|
While going through a new run of the [R7RS benchmarks from Larceny](http://www.larcenists.org/benchmarksGenuineR7Linux.html), I noticed Cyclone performed significantly worse than other schemes on the tail benchmark. Certainly when testing locally the results are less than impressive:
|
||||||
|
|
||||||
|
[justin@justin-pc r7rs-benchmarks]$ ./bench cyclone tail
|
||||||
|
|
||||||
|
Testing tail under Cyclone
|
||||||
|
Including postlude /home/justin/Documents/r7rs-benchmarks/src/Cyclone-postlude.scm
|
||||||
|
Compiling...
|
||||||
|
Running...
|
||||||
|
Running tail:25
|
||||||
|
Elapsed time: 32.261715 seconds (32) for tail:25
|
||||||
|
+!CSVLINE!+cyclone-0.5.1,tail:25,32.261715
|
||||||
|
|
||||||
|
real 0m32.379s
|
||||||
|
user 0m31.783s
|
||||||
|
sys 0m0.513s
|
||||||
|
|
||||||
|
One of the easiest things to do is run a profiler on the code to figure out what is going on. This lets us see if there is something in the runtime or compiled code that is dominating the execution time, and possibly slowing things down. This isn't a catch-all - for example, it can't show us if a compiler optimization is needed. But it helps paint a picture of what is going on.
|
||||||
|
|
||||||
|
A compiled Cyclone program is just a regular C program so we can use the standard GNU tools for profiling and debugging.
|
||||||
|
|
||||||
|
To get started we change `Makefile.config` in cyclone-bootstrap to enable profiling. The `-O2` option in the lines below are replaced with `-g -pg`:
|
||||||
|
|
||||||
|
CFLAGS ?= -g -pg -fPIC -rdynamic -Wall -Iinclude -L.
|
||||||
|
COMP_CFLAGS ?= -g -pg -fPIC -rdynamic -Wall -I$(PREFIX)/include -L$(PREFIX)/lib
|
||||||
|
|
||||||
|
Then Cyclone must be rebuilt:
|
||||||
|
|
||||||
|
[justin@justin-pc cyclone-bootstrap]$ sudo make clean ; ./install.sh
|
||||||
|
|
||||||
|
Once this is done a `gmon.out` file will be generated each time Cyclone or a compiled Cyclone program is executed. This can be used to create a detailed analysis of what the program is doing at runtime.
|
||||||
|
|
||||||
|
Now we perform set up for running the `tail` benchmark directly:
|
||||||
|
|
||||||
|
[justin@justin-pc r7rs-benchmarks]$ cd /tmp/larcenous/Cyclone/
|
||||||
|
[justin@justin-pc Cyclone]$ cp -r ~/Documents/r7rs-benchmarks/inputs/ .
|
||||||
|
[justin@justin-pc Cyclone]$ mkdir outputs
|
||||||
|
|
||||||
|
And recompile `tail.scm` to get a version with profiling. We then run it to generate a `gmon.out` file:
|
||||||
|
|
||||||
|
[justin@justin-pc Cyclone]$ cyclone tail.scm
|
||||||
|
[justin@justin-pc Cyclone]$ ./tail < inputs/tail.input
|
||||||
|
|
||||||
|
Then we run `gprof` to create a report:
|
||||||
|
|
||||||
|
[justin@justin-pc Cyclone]$ ls
|
||||||
|
gmon.out inputs outputs tail tail.c tail.o tail.scm
|
||||||
|
[justin@justin-pc Cyclone]$ gprof ./tail gmon.out > report.txt
|
||||||
|
|
||||||
|
Let's examine the start of `report.txt` to see the functions that are taking up the most of the program's runtime:
|
||||||
|
|
||||||
|
Flat profile:
|
||||||
|
|
||||||
|
Each sample counts as 0.01 seconds.
|
||||||
|
% cumulative self self total
|
||||||
|
time seconds seconds calls s/call s/call name
|
||||||
|
99.19 61.09 61.09 2331598 0.00 0.00 Cyc_length
|
||||||
|
0.10 61.15 0.06 11221 0.00 0.00 gc_minor
|
||||||
|
0.08 61.20 0.05 777200 0.00 0.00 __lambda_3
|
||||||
|
0.08 61.25 0.05 777097 0.00 0.00 __lambda_300
|
||||||
|
0.03 61.27 0.02 4664280 0.00 0.00 Cyc_st_add
|
||||||
|
0.03 61.29 0.02 1562923 0.00 0.00 gc_thr_add_to_move_buffer
|
||||||
|
0.03 61.31 0.02 777572 0.00 0.00 __lambda_278
|
||||||
|
0.03 61.33 0.02 777178 0.00 0.00 dispatch
|
||||||
|
0.03 61.35 0.02 777147 0.00 0.00 __lambda_297
|
||||||
|
0.03 61.37 0.02 777135 0.00 0.00 __lambda_368
|
||||||
|
0.03 61.39 0.02 59 0.00 0.00 gc_empty_collector_stack
|
||||||
|
0.02 61.41 0.02 5441402 0.00 0.00 Cyc_is_pair
|
||||||
|
0.02 61.42 0.02 ck_pr_cas_char
|
||||||
|
0.02 61.43 0.01 3109193 0.00 0.00 Cyc_is_null
|
||||||
|
0.02 61.44 0.01 3109170 0.00 0.00 Cyc_car
|
||||||
|
|
||||||
|
Well that's interesting, `tail` is spending all of its time computing `Cyc_length`.
|
||||||
|
|
||||||
|
Again, Cyclone compiles programs to C, so we can use `gdb` to debug them and figure out how `Cyc_length` is being called.
|
||||||
|
|
||||||
|
First we need to know what inputs to use:
|
||||||
|
|
||||||
|
[justin@justin-pc Cyclone]$ cat inputs/tail.input
|
||||||
|
25
|
||||||
|
"inputs/bib"
|
||||||
|
"outputs/tail.output"
|
||||||
|
ignored
|
||||||
|
|
||||||
|
To run gdb:
|
||||||
|
|
||||||
|
[justin@justin-pc Cyclone]$ gdb ./tail
|
||||||
|
GNU gdb (GDB) 7.12.1
|
||||||
|
Copyright (C) 2017 Free Software Foundation, Inc.
|
||||||
|
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
|
||||||
|
This is free software: you are free to change and redistribute it.
|
||||||
|
There is NO WARRANTY, to the extent permitted by law. Type "show copying"
|
||||||
|
and "show warranty" for details.
|
||||||
|
This GDB was configured as "x86_64-pc-linux-gnu".
|
||||||
|
Type "show configuration" for configuration details.
|
||||||
|
For bug reporting instructions, please see:
|
||||||
|
<http://www.gnu.org/software/gdb/bugs/>.
|
||||||
|
Find the GDB manual and other documentation resources online at:
|
||||||
|
<http://www.gnu.org/software/gdb/documentation/>.
|
||||||
|
For help, type "help".
|
||||||
|
Type "apropos word" to search for commands related to "word"...
|
||||||
|
Reading symbols from ./tail...done.
|
||||||
|
(gdb) run
|
||||||
|
Starting program: /tmp/larcenous/Cyclone/tail
|
||||||
|
[Thread debugging using libthread_db enabled]
|
||||||
|
Using host libthread_db library "/usr/lib/libthread_db.so.1".
|
||||||
|
[New Thread 0x7ffff6031700 (LWP 1850)]
|
||||||
|
25
|
||||||
|
"inputs/bib"
|
||||||
|
"outputs/tail.output"
|
||||||
|
^C
|
||||||
|
Thread 1 "tail" received signal SIGINT, Interrupt.
|
||||||
|
0x00007ffff6fd44ed in read () from /usr/lib/libc.so.6
|
||||||
|
(gdb) break Cyc_length
|
||||||
|
Breakpoint 1 at 0x53b145: file runtime.c, line 1713.
|
||||||
|
(gdb) c
|
||||||
|
Continuing.
|
||||||
|
ignored
|
||||||
|
|
||||||
|
After continuing a few times, the code breaks here:
|
||||||
|
|
||||||
|
#0 Cyc_length (data=0x7cd4e0, l=0x7ffffffbc660) at runtime.c:1713
|
||||||
|
#1 0x00000000004af96b in __lambda_368 (data=0x7cd4e0, argc=3, _=0x7ffff6635340, k_734906=0x7ffff63331c0, f_731928=0x7ffffffb2dd0, lis1_731927=0x7ffffffbc660,
|
||||||
|
lists_731926_raw=0x7ca420 <Cyc_void_symbol>) at scheme/base.c:22680
|
||||||
|
#2 0x00000000004afde1 in __lambda_367 (data=0x7cd4e0, argc=1, self_736727=0x7ffffff9d4c0, r_734921=0x7ca420 <Cyc_void_symbol>) at scheme/base.c:22703
|
||||||
|
#3 0x00000000004b7aa7 in __lambda_299 (data=0x7cd4e0, argc=3, _=0x7ffff66350e0, k_735061=0x7ffffff9d4c0, char_731994=0x2a, port_731993_raw=0x7ffff6333200) at scheme/base.c:23706
|
||||||
|
#4 0x000000000055cf5e in do_dispatch (data=0x7cd4e0, argc=3, func=0x4b7523 <__lambda_299>, clo=0x7ffff66350e0, b=0x7ffffff9ccf0) at dispatch.c:6
|
||||||
|
|
||||||
|
Opening the source code for `scheme/base.c` you can see the code breaks in the `for-each` function:
|
||||||
|
|
||||||
|
static void __lambda_368(void *data, int argc, closure _,object k_734906, object f_731928, object lis1_731927, object lists_731926_raw, ...) {
|
||||||
|
load_varargs(lists_731926, lists_731926_raw, argc - 3);
|
||||||
|
Cyc_st_add(data, "scheme/base.sld:for-each");
|
||||||
|
if( (boolean_f != Cyc_is_null(lis1_731927)) ){
|
||||||
|
|
||||||
|
And in `scheme/base.sld` you can see where `length` is being called:
|
||||||
|
|
||||||
|
(define (for-each f lis1 . lists)
|
||||||
|
(if (not (null? lis1))
|
||||||
|
(if (pair? lists)
|
||||||
|
(let recur ((lists (cons lis1 lists)))
|
||||||
|
(receive (cars cdrs) (%cars+cdrs lists)
|
||||||
|
(if (pair? cars)
|
||||||
|
(begin
|
||||||
|
(apply f cars)
|
||||||
|
(recur cdrs)))))
|
||||||
|
;; Fast path.
|
||||||
|
(if (eq? 1 (length lis1))
|
||||||
|
(f (car lis1))
|
||||||
|
(begin (f (car lis1))
|
||||||
|
(for-each f (cdr lis1)))))))
|
||||||
|
|
||||||
|
The code can be simplified to make it more obvious what is going on:
|
||||||
|
|
||||||
|
(define (for-each f lis1 . lists)
|
||||||
|
(if (not (null? lis1))
|
||||||
|
(if (eq? 1 (length lis1))
|
||||||
|
(f (car lis1))
|
||||||
|
(begin (f (car lis1))
|
||||||
|
(for-each f (cdr lis1))))))
|
||||||
|
|
||||||
|
Basically on every iteration of `for-each` the code is calling `length` to see if `f` can be called directly. Well, that's not good - the main `for-each` loop itself has a [time complexity of `O(n)`](https://en.wikipedia.org/wiki/Big_O_notation). The runtime depends directly on the length of `lis1`. But each time `length` is called it must examine the entire contents of `lis1`, which is another `O(n)` operation. Combined with the outer loop this raises the overall time complexity to `O(n^2)` - which can really add up for large values of `n`.
|
||||||
|
|
||||||
|
This reminds me of [an old article from Joel Spolsky](http://global.joelonsoftware.com/English/Articles/Interviewing.html) that talks about the same issue with respect to strings:
|
||||||
|
|
||||||
|
> Is their function fast? Look at how many times they call strlen. I've seen O(n^2) algorithms for strrev when it should be O(n), because they are calling strlen again and again in a loop.
|
||||||
|
|
||||||
|
The solution is to check directly for null, instead of scanning the whole string:
|
||||||
|
|
||||||
|
(if (null? (cdr lis1))
|
||||||
|
(f (car lis1))
|
||||||
|
(begin (f (car lis1))
|
||||||
|
|
||||||
|
After rebuilding with this fix we can re-run the tail benchmark:
|
||||||
|
|
||||||
|
[justin@justin-pc r7rs-benchmarks]$ ./bench cyclone tail
|
||||||
|
|
||||||
|
Testing tail under Cyclone
|
||||||
|
Including postlude /home/justin/Documents/r7rs-benchmarks/src/Cyclone-postlude.scm
|
||||||
|
Compiling...
|
||||||
|
Running...
|
||||||
|
Running tail:25
|
||||||
|
Elapsed time: 0.72314 seconds (1) for tail:25
|
||||||
|
+!CSVLINE!+cyclone-0.5.1,tail:25,0.72314
|
||||||
|
|
||||||
|
real 0m0.729s
|
||||||
|
user 0m0.540s
|
||||||
|
sys 0m0.187s
|
||||||
|
|
||||||
|
Whoa! Remember the older code took over 32 seconds to finish? Now it finishes in less than a second. Not bad.
|
Loading…
Add table
Reference in a new issue