From 9242c424c361828e1934fa8117bba3c7552c0668 Mon Sep 17 00:00:00 2001 From: Justin Ethier Date: Wed, 24 May 2017 17:23:23 +0000 Subject: [PATCH 01/18] WIP --- include/cyclone/runtime.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/cyclone/runtime.h b/include/cyclone/runtime.h index 0995ee2a..880f6ec7 100644 --- a/include/cyclone/runtime.h +++ b/include/cyclone/runtime.h @@ -368,6 +368,8 @@ object Cyc_sum(void *data, object cont, int argc, object n, ...); object Cyc_sub(void *data, object cont, int argc, object n, ...); object Cyc_mul(void *data, object cont, int argc, object n, ...); object Cyc_div(void *data, object cont, int argc, object n, ...); +// Future idea, there may be uses for this in addition to if statements: +// #define Cyc_if(c,t,e) ((boolean_f != c) ? t : e) object Cyc_fast_sum(void *data, object ptr, object x, object y); object Cyc_fast_sub(void *data, object ptr, object x, object y); object Cyc_fast_mul(void *data, object ptr, object x, object y); From 7606eeeb5cf928a9ba37c437d54e213bd4e8a8a3 Mon Sep 17 00:00:00 2001 From: Justin Ethier Date: Thu, 25 May 2017 13:25:03 +0000 Subject: [PATCH 02/18] WIP --- scheme/cyclone/cps-optimizations.sld | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/scheme/cyclone/cps-optimizations.sld b/scheme/cyclone/cps-optimizations.sld index fcfcd74a..caaaae7e 100644 --- a/scheme/cyclone/cps-optimizations.sld +++ b/scheme/cyclone/cps-optimizations.sld @@ -164,6 +164,10 @@ (let ((var (adb:get/default sym (adb:make-var)))) (fnc var))) + (define (with-fnc id callback) + (let ((fnc (adb:get/default id (adb:make-fnc)))) + (callback fnc))) + (define (with-fnc! id callback) (let ((fnc (adb:get/default id (adb:make-fnc)))) (callback fnc) @@ -831,6 +835,30 @@ (set! args (cdr args))) (ast:lambda-formals->list (car exp)))) (opt:inline-prims (car (ast:lambda-body (car exp))) refs)) + ;; Issue #201 - Attempt to identify case where an if can be inlined + ((and (= (length exp) 2) + (ast:lambda? (car exp)) + (ast:lambda? (cadr exp)) + (ast:lambda-has-cont (car exp)) + (= 1 (length (ast:lambda-formals->list (car exp)))) + (= 1 (length (ast:lambda-formals->list (cadr exp)))) + (if? (car (ast:lambda-body (car exp)))) + (not + (with-fnc (ast:lambda-id (car exp)) (lambda (fnc) + (adbf:side-effects fnc)))) + ) + ;(let ((if-fnc (adb:get/default (ast:lambda-body (car exp)) + (trace:error `(DEBUG if inline candidate ,exp)) + + ;; TODO: behavior would be: + ;; - simplify calling lambda's if to remove cont + ;; - replace arg to other lambda with simplified expression + ;; - replace exp with body of other lambda, + ;; - and call opt:inline-prims on it + + ;; Same behavior for now, just seeing if this is possible first + (map (lambda (e) (opt:inline-prims e refs)) exp)) + ;; (else (map (lambda (e) (opt:inline-prims e refs)) exp)))) (else From 85a74f3a1e371557538e984a2527638846976a24 Mon Sep 17 00:00:00 2001 From: Justin Ethier Date: Thu, 25 May 2017 16:20:10 +0000 Subject: [PATCH 03/18] WIP --- scheme/cyclone/cps-optimizations.sld | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/scheme/cyclone/cps-optimizations.sld b/scheme/cyclone/cps-optimizations.sld index caaaae7e..0913dd7c 100644 --- a/scheme/cyclone/cps-optimizations.sld +++ b/scheme/cyclone/cps-optimizations.sld @@ -847,8 +847,27 @@ (with-fnc (ast:lambda-id (car exp)) (lambda (fnc) (adbf:side-effects fnc)))) ) - ;(let ((if-fnc (adb:get/default (ast:lambda-body (car exp)) - (trace:error `(DEBUG if inline candidate ,exp)) +;(trace:error `(DEBUG2 ,(car (ast:lambda-body (car exp))))) + (let* ((new-exp (car (ast:lambda-body (cadr exp)))) + (new-if (car (ast:lambda-body (car exp)))) +; TODO: No, just replace K with values + ;(new-if `(if ,(if->condition old-if) + ; ,(cadr (if->then old-if)) + ; ,(cadr (if->else old-if)))) + (old-k (car (ast:lambda-formals->list (car exp)))) + (old-arg (car (ast:lambda-formals->list (cadr exp)))) + ) + (trace:error `(DEBUG if inline candidate + ,exp + old-k: + ,old-k + old-arg: + ,old-arg + new-if: + ,new-if + new-exp: + ,new-exp + ))) ;; TODO: behavior would be: ;; - simplify calling lambda's if to remove cont From 7cdabc02b65e8e7984bbe7c8cd5dd3b9082e4a65 Mon Sep 17 00:00:00 2001 From: Justin Ethier Date: Thu, 25 May 2017 17:46:41 -0400 Subject: [PATCH 04/18] WIP --- include/cyclone/runtime.h | 2 +- scheme/cyclone/cps-optimizations.sld | 33 +++++++++++++++++++--------- scheme/cyclone/primitives.sld | 3 +++ 3 files changed, 27 insertions(+), 11 deletions(-) diff --git a/include/cyclone/runtime.h b/include/cyclone/runtime.h index 880f6ec7..08771e88 100644 --- a/include/cyclone/runtime.h +++ b/include/cyclone/runtime.h @@ -369,7 +369,7 @@ object Cyc_sub(void *data, object cont, int argc, object n, ...); object Cyc_mul(void *data, object cont, int argc, object n, ...); object Cyc_div(void *data, object cont, int argc, object n, ...); // Future idea, there may be uses for this in addition to if statements: -// #define Cyc_if(c,t,e) ((boolean_f != c) ? t : e) +#define Cyc_if(c,t,e) ((boolean_f != c) ? t : e) object Cyc_fast_sum(void *data, object ptr, object x, object y); object Cyc_fast_sub(void *data, object ptr, object x, object y); object Cyc_fast_mul(void *data, object ptr, object x, object y); diff --git a/scheme/cyclone/cps-optimizations.sld b/scheme/cyclone/cps-optimizations.sld index 0913dd7c..07d739c0 100644 --- a/scheme/cyclone/cps-optimizations.sld +++ b/scheme/cyclone/cps-optimizations.sld @@ -843,17 +843,28 @@ (= 1 (length (ast:lambda-formals->list (car exp)))) (= 1 (length (ast:lambda-formals->list (cadr exp)))) (if? (car (ast:lambda-body (car exp)))) + ;; Simplification, for now only allow then/else that call a cont + ;; immediately, to prevent having to scan/rewrite those expressions + (let ((if-exp (car (ast:lambda-body (car exp)))) + (kont (car (ast:lambda-formals->list (car exp))))) + (and + (app? (if->then if-exp)) + (app? (if->else if-exp)) + (equal? kont (car (if->then if-exp))) + (equal? kont (car (if->else if-exp))))) + ;; (not (with-fnc (ast:lambda-id (car exp)) (lambda (fnc) (adbf:side-effects fnc)))) ) -;(trace:error `(DEBUG2 ,(car (ast:lambda-body (car exp))))) +(trace:error `(DEBUG2 ,exp)) (let* ((new-exp (car (ast:lambda-body (cadr exp)))) - (new-if (car (ast:lambda-body (car exp)))) -; TODO: No, just replace K with values - ;(new-if `(if ,(if->condition old-if) - ; ,(cadr (if->then old-if)) - ; ,(cadr (if->else old-if)))) + (old-if (car (ast:lambda-body (car exp)))) +; TODO: what about nested if's? may need another pass above to make sure +;; the if is simple enough to inline + (new-if `(Cyc-if ,(if->condition old-if) + ,(cadr (if->then old-if)) + ,(cadr (if->else old-if)))) (old-k (car (ast:lambda-formals->list (car exp)))) (old-arg (car (ast:lambda-formals->list (cadr exp)))) ) @@ -867,17 +878,19 @@ ,new-if new-exp: ,new-exp - ))) + )) + (hash-table-set! refs old-k 'values) ;; TODO: only a temporary solution, requires (scheme base) which is not guaranteed to be imported + (hash-table-set! refs old-arg new-if) ;; TODO: behavior would be: ;; - simplify calling lambda's if to remove cont ;; - replace arg to other lambda with simplified expression ;; - replace exp with body of other lambda, ;; - and call opt:inline-prims on it - + (opt:inline-prims new-exp refs) ;; Same behavior for now, just seeing if this is possible first - (map (lambda (e) (opt:inline-prims e refs)) exp)) - ;; + ;(map (lambda (e) (opt:inline-prims e refs)) exp) + )) ;; (else (map (lambda (e) (opt:inline-prims e refs)) exp)))) (else diff --git a/scheme/cyclone/primitives.sld b/scheme/cyclone/primitives.sld index d4db074f..6c4a49e9 100644 --- a/scheme/cyclone/primitives.sld +++ b/scheme/cyclone/primitives.sld @@ -80,6 +80,7 @@ Cyc-stdin Cyc-stderr Cyc-list + Cyc-if Cyc-fast-plus Cyc-fast-sub Cyc-fast-mul @@ -216,6 +217,7 @@ (Cyc-stdout 0 0) (Cyc-stdin 0 0) (Cyc-stderr 0 0) + (Cyc-if 3 3) (Cyc-fast-plus 2 2) (Cyc-fast-sub 2 2) (Cyc-fast-mul 2 2) @@ -459,6 +461,7 @@ ((eq? p 'Cyc-stdin) "Cyc_stdin") ((eq? p 'Cyc-stderr) "Cyc_stderr") ((eq? p 'Cyc-list) "Cyc_list") + ((eq? p 'Cyc-if) "Cyc_if") ((eq? p 'Cyc-fast-plus) "Cyc_fast_sum") ((eq? p 'Cyc-fast-sub) "Cyc_fast_sub") ((eq? p 'Cyc-fast-mul) "Cyc_fast_mul") From ce6b24cc103cce6d8cc6cd13dffcd0ce3a57e6eb Mon Sep 17 00:00:00 2001 From: Justin Ethier Date: Thu, 25 May 2017 18:54:55 -0400 Subject: [PATCH 05/18] WIP --- scheme/cyclone/cps-optimizations.sld | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/scheme/cyclone/cps-optimizations.sld b/scheme/cyclone/cps-optimizations.sld index 07d739c0..98ced815 100644 --- a/scheme/cyclone/cps-optimizations.sld +++ b/scheme/cyclone/cps-optimizations.sld @@ -857,18 +857,20 @@ (with-fnc (ast:lambda-id (car exp)) (lambda (fnc) (adbf:side-effects fnc)))) ) -(trace:error `(DEBUG2 ,exp)) +;(trace:error `(DEBUG2 ,exp)) (let* ((new-exp (car (ast:lambda-body (cadr exp)))) (old-if (car (ast:lambda-body (car exp)))) ; TODO: what about nested if's? may need another pass above to make sure ;; the if is simple enough to inline +TODO: can logic from inlinable-top-level-lambda? be repurposed to +scan old-if to make sure everything is inlinable??? (new-if `(Cyc-if ,(if->condition old-if) ,(cadr (if->then old-if)) ,(cadr (if->else old-if)))) (old-k (car (ast:lambda-formals->list (car exp)))) (old-arg (car (ast:lambda-formals->list (cadr exp)))) ) - (trace:error `(DEBUG if inline candidate + #;(trace:error `(DEBUG if inline candidate ,exp old-k: ,old-k From a6093601ded32f0ef278b79b14484f0d29460ca5 Mon Sep 17 00:00:00 2001 From: Justin Ethier Date: Fri, 26 May 2017 14:10:25 +0000 Subject: [PATCH 06/18] Issue #201 - Inline if expressions --- scheme/cyclone/cps-optimizations.sld | 109 +++++++++++++++------------ 1 file changed, 62 insertions(+), 47 deletions(-) diff --git a/scheme/cyclone/cps-optimizations.sld b/scheme/cyclone/cps-optimizations.sld index 98ced815..ed2f0419 100644 --- a/scheme/cyclone/cps-optimizations.sld +++ b/scheme/cyclone/cps-optimizations.sld @@ -176,34 +176,6 @@ ;; Determine if the given top-level function can be freed from CPS, due ;; to it only containing calls to code that itself can be inlined. (define (inlinable-top-level-lambda? expr) - ;; TODO: consolidate with same function in cps-optimizations module - (define (prim-creates-mutable-obj? prim) - (member - prim - '( - apply ;; ?? - cons - make-vector - make-bytevector - bytevector - bytevector-append - bytevector-copy - string->utf8 - number->string - symbol->string - list->string - utf8->string - read-line - string-append - string - substring - Cyc-installation-dir - Cyc-compilation-environment - Cyc-bytevector-copy - Cyc-utf8->string - Cyc-string->utf8 - list->vector - ))) (define (scan expr fail) (cond ((string? expr) (fail)) @@ -269,6 +241,48 @@ (k #t))))))) ;; Scanned fine, return #t (else #f))) + ;; Scan given if expression to determine if an inline is safe. + ;; Returns #f if not, the new if expression otherwise. + (define (inline-if:scan-and-replace expr kont) + (define (scan expr fail) +;(trace:error `(inline-if:scan-and-replace:scan ,expr)) + (cond + ((ast:lambda? expr) (fail)) + ((string? expr) (fail)) + ((bytevector? expr) (fail)) + ((const? expr) expr) ;; Good enough? what about large vectors or anything requiring alloca (strings, bytevectors, what else?) + ((ref? expr) expr) + ((if? expr) + `(Cyc-if ,(scan (if->condition expr) fail) + ,(scan (if->then expr) fail) + ,(scan (if->else expr) fail))) + ((app? expr) + (let ((fnc (car expr))) + ;; If function needs CPS, fail right away + (cond + ((equal? (car expr) kont) + ;; Get rid of the continuation + (scan (cadr expr) fail)) + ((or (not (prim? fnc)) + (prim:cont? fnc) + (prim:mutates? fnc) + (prim-creates-mutable-obj? fnc) + ) + (fail)) + (else + ;; Otherwise, check for valid args + (cons + (car expr) + (map + (lambda (e) + (scan e fail)) + (cdr expr))))))) + ;; Reject everything else - define, set, lambda + (else (fail)))) + (call/cc + (lambda (return) + (scan expr (lambda () (return #f)))))) + (define (analyze-find-lambdas exp lid) (cond ((ast:lambda? exp) @@ -850,8 +864,9 @@ (and (app? (if->then if-exp)) (app? (if->else if-exp)) - (equal? kont (car (if->then if-exp))) - (equal? kont (car (if->else if-exp))))) + ;(equal? kont (car (if->then if-exp))) + ;(equal? kont (car (if->else if-exp))) + )) ;; (not (with-fnc (ast:lambda-id (car exp)) (lambda (fnc) @@ -860,15 +875,18 @@ ;(trace:error `(DEBUG2 ,exp)) (let* ((new-exp (car (ast:lambda-body (cadr exp)))) (old-if (car (ast:lambda-body (car exp)))) -; TODO: what about nested if's? may need another pass above to make sure -;; the if is simple enough to inline -TODO: can logic from inlinable-top-level-lambda? be repurposed to -scan old-if to make sure everything is inlinable??? - (new-if `(Cyc-if ,(if->condition old-if) - ,(cadr (if->then old-if)) - ,(cadr (if->else old-if)))) (old-k (car (ast:lambda-formals->list (car exp)))) (old-arg (car (ast:lambda-formals->list (cadr exp)))) +; TODO: what about nested if's? may need another pass above to make sure +;; the if is simple enough to inline +;TODO: can logic from inlinable-top-level-lambda? be repurposed to +;scan old-if to make sure everything is inlinable??? + (new-if + (inline-if:scan-and-replace + `(Cyc-if ,(if->condition old-if) + ,(if->then old-if) + ,(if->else old-if)) + old-k)) ) #;(trace:error `(DEBUG if inline candidate ,exp @@ -882,16 +900,13 @@ scan old-if to make sure everything is inlinable??? ,new-exp )) - (hash-table-set! refs old-k 'values) ;; TODO: only a temporary solution, requires (scheme base) which is not guaranteed to be imported - (hash-table-set! refs old-arg new-if) - ;; TODO: behavior would be: - ;; - simplify calling lambda's if to remove cont - ;; - replace arg to other lambda with simplified expression - ;; - replace exp with body of other lambda, - ;; - and call opt:inline-prims on it - (opt:inline-prims new-exp refs) - ;; Same behavior for now, just seeing if this is possible first - ;(map (lambda (e) (opt:inline-prims e refs)) exp) + (cond + (new-if + (hash-table-set! refs old-arg new-if) + (opt:inline-prims new-exp refs)) + (else + ;; Could not inline + (map (lambda (e) (opt:inline-prims e refs)) exp))) )) ;; (else (map (lambda (e) (opt:inline-prims e refs)) exp)))) From 4e83e93cd7879f62bcd357291d61a73753c44b50 Mon Sep 17 00:00:00 2001 From: Justin Ethier Date: Fri, 26 May 2017 14:21:00 +0000 Subject: [PATCH 07/18] Added TODO --- scheme/cyclone/cps-optimizations.sld | 1 + 1 file changed, 1 insertion(+) diff --git a/scheme/cyclone/cps-optimizations.sld b/scheme/cyclone/cps-optimizations.sld index ed2f0419..f3304c95 100644 --- a/scheme/cyclone/cps-optimizations.sld +++ b/scheme/cyclone/cps-optimizations.sld @@ -857,6 +857,7 @@ (= 1 (length (ast:lambda-formals->list (car exp)))) (= 1 (length (ast:lambda-formals->list (cadr exp)))) (if? (car (ast:lambda-body (car exp)))) +;; TODO: think we can get rid of this simplification now ;; Simplification, for now only allow then/else that call a cont ;; immediately, to prevent having to scan/rewrite those expressions (let ((if-exp (car (ast:lambda-body (car exp)))) From aea673c7649a8493c93b21c1eb94fb80079b355b Mon Sep 17 00:00:00 2001 From: Justin Ethier Date: Fri, 26 May 2017 15:13:22 +0000 Subject: [PATCH 08/18] Added parens --- include/cyclone/runtime.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/cyclone/runtime.h b/include/cyclone/runtime.h index 08771e88..4ba996e7 100644 --- a/include/cyclone/runtime.h +++ b/include/cyclone/runtime.h @@ -369,7 +369,7 @@ object Cyc_sub(void *data, object cont, int argc, object n, ...); object Cyc_mul(void *data, object cont, int argc, object n, ...); object Cyc_div(void *data, object cont, int argc, object n, ...); // Future idea, there may be uses for this in addition to if statements: -#define Cyc_if(c,t,e) ((boolean_f != c) ? t : e) +#define Cyc_if(c,t,e) ((boolean_f != c) ? (t) : (e)) object Cyc_fast_sum(void *data, object ptr, object x, object y); object Cyc_fast_sub(void *data, object ptr, object x, object y); object Cyc_fast_mul(void *data, object ptr, object x, object y); From da2f2bb05f2e1fce614b04e24d9ab336e617bd14 Mon Sep 17 00:00:00 2001 From: Justin Ethier Date: Fri, 26 May 2017 15:41:46 +0000 Subject: [PATCH 09/18] Issue #201 - disabling for now --- scheme/cyclone/cps-optimizations.sld | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scheme/cyclone/cps-optimizations.sld b/scheme/cyclone/cps-optimizations.sld index f3304c95..0d8de0c2 100644 --- a/scheme/cyclone/cps-optimizations.sld +++ b/scheme/cyclone/cps-optimizations.sld @@ -850,7 +850,8 @@ (ast:lambda-formals->list (car exp)))) (opt:inline-prims (car (ast:lambda-body (car exp))) refs)) ;; Issue #201 - Attempt to identify case where an if can be inlined - ((and (= (length exp) 2) + ((and #f ;; TODO: Disabling for now, see issue for more info + (= (length exp) 2) (ast:lambda? (car exp)) (ast:lambda? (cadr exp)) (ast:lambda-has-cont (car exp)) From 5390802816a462b6402eab545786df4503eb0536 Mon Sep 17 00:00:00 2001 From: Justin Ethier Date: Fri, 26 May 2017 15:49:56 +0000 Subject: [PATCH 10/18] Added adbv:ref-count --- scheme/cyclone/cps-optimizations.sld | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/scheme/cyclone/cps-optimizations.sld b/scheme/cyclone/cps-optimizations.sld index 0d8de0c2..d3a006b4 100644 --- a/scheme/cyclone/cps-optimizations.sld +++ b/scheme/cyclone/cps-optimizations.sld @@ -46,6 +46,8 @@ adbv:set-const! adbv:const-value adbv:set-const-value! + adbv:ref-count + adbv:set-ref-count! adbv:ref-by adbv:set-ref-by! ;; Analyze functions @@ -68,7 +70,8 @@ (%adb:make-var global defined-by defines-lambda-id - const const-value ref-by + const const-value + ref-count ref-by reassigned assigned-value app-fnc-count app-arg-count inlinable mutated-indirectly @@ -79,6 +82,7 @@ (defines-lambda-id adbv:defines-lambda-id adbv:set-defines-lambda-id!) (const adbv:const? adbv:set-const!) (const-value adbv:const-value adbv:set-const-value!) + (ref-count adbv:ref-count adbv:set-ref-count!) (ref-by adbv:ref-by adbv:set-ref-by!) ;; TODO: need to set reassigned flag if variable is SET, however there is at least ;; one exception for local define's, which are initialized to #f and then assigned @@ -122,7 +126,7 @@ ) (define (adb:make-var) - (%adb:make-var '? '? #f #f #f '() #f #f 0 0 #t #f #f)) + (%adb:make-var '? '? #f #f #f 0 '() #f #f 0 0 #t #f #f)) (define-record-type (%adb:make-fnc simple unused-params assigned-to-var side-effects) @@ -407,12 +411,14 @@ ((quote? exp) #f) ((ref? exp) (let ((var (adb:get/default exp (adb:make-var)))) + (adbv:set-ref-count! var (+ 1 (adbv:ref-count var))) (adbv:set-ref-by! var (cons lid (adbv:ref-by var))) )) ((define? exp) ;(let ((var (adb:get/default (define->var exp) (adb:make-var)))) (with-var! (define->var exp) (lambda (var) (adbv:set-defined-by! var lid) + (adbv:set-ref-count! var (+ 1 (adbv:ref-count var))) (adbv:set-ref-by! var (cons lid (adbv:ref-by var))) (adbv-set-assigned-value-helper! (define->var exp) var (define->exp exp)) (adbv:set-const! var #f) @@ -424,6 +430,7 @@ (if (adbv:assigned-value var) (adbv:set-reassigned! var #t)) (adbv-set-assigned-value-helper! (set!->var exp) var (set!->exp exp)) + (adbv:set-ref-count! var (+ 1 (adbv:ref-count var))) (adbv:set-ref-by! var (cons lid (adbv:ref-by var))) (adbv:set-const! var #f) (adbv:set-const-value! var #f))) @@ -510,6 +517,7 @@ ;; TODO: ; ((ref? exp) ; (let ((var (adb:get/default exp (adb:make-var)))) +; (adbv:set-ref-count! var (+ 1 (adbv:ref-count var))) ; (adbv:set-ref-by! var (cons lid (adbv:ref-by var))) ; )) ((define? exp) From aac5240a0bc3456fe1085f2dd40a4818632ee485 Mon Sep 17 00:00:00 2001 From: Justin Ethier Date: Fri, 2 Jun 2017 19:25:26 -0400 Subject: [PATCH 11/18] Avoid calling length each iteration of for-each length is O(n) so calling it should be avoided, especially for each iteration of for-each. Instead the code can just check if the first cdr is null. This has the potential for a huge speed improvement. --- scheme/base.sld | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scheme/base.sld b/scheme/base.sld index c6839bb1..144d11b2 100644 --- a/scheme/base.sld +++ b/scheme/base.sld @@ -764,7 +764,8 @@ (apply f cars) (recur cdrs))))) ;; Fast path. - (if (eq? 1 (length lis1)) + ;(if (eq? 1 (length lis1)) + (if (null? (cdr lis1)) ;; O(1) instead of O(n) for length (f (car lis1)) (begin (f (car lis1)) (for-each f (cdr lis1))))))) From 746a0e3e540428482758de0418bcb7931f43d712 Mon Sep 17 00:00:00 2001 From: Justin Ethier Date: Sat, 3 Jun 2017 18:21:37 -0400 Subject: [PATCH 12/18] WIP --- tail-blog-post-notes.txt | 173 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 173 insertions(+) create mode 100644 tail-blog-post-notes.txt diff --git a/tail-blog-post-notes.txt b/tail-blog-post-notes.txt new file mode 100644 index 00000000..6de5d7b0 --- /dev/null +++ b/tail-blog-post-notes.txt @@ -0,0 +1,173 @@ +Initial speed test for `tail` - Cyclone does very poorly compared to other Schemes: + + [justin@justin-pc r7rs-benchmarks]$ ./bench cyclone tail + + Testing tail under Cyclone + Including postlude /home/justin/Documents/r7rs-benchmarks/src/Cyclone-postlude.scm + Compiling... + Running... + Running tail:25 + Elapsed time: 32.261715 seconds (32) for tail:25 + +!CSVLINE!+cyclone-0.5.1,tail:25,32.261715 + + real 0m32.379s + user 0m31.783s + sys 0m0.513s + + +Change `Makefile.config` in cyclone-bootstrap to do profiling. The `-O2` option in the lines below are replaced with `-g -pg`: + + CFLAGS ?= -g -pg -fPIC -rdynamic -Wall -Iinclude -L. + COMP_CFLAGS ?= -g -pg -fPIC -rdynamic -Wall -I$(PREFIX)/include -L$(PREFIX)/lib + +Then Cyclone is rebuilt: + + [justin@justin-pc cyclone-bootstrap]$ sudo make clean ; ./install.sh + +Once this is done a `gmon.out` file will be generated each time Cyclone or a compiled Cyclone program is executed. + + +Set up for running the `tail` benchmark directly: + + [justin@justin-pc r7rs-benchmarks]$ cd /tmp/larcenous/Cyclone/ + [justin@justin-pc Cyclone]$ cp -r ~/Documents/r7rs-benchmarks/inputs/ . + [justin@justin-pc Cyclone]$ mkdir outputs + +Recompile `tail.scm` to get a version with profiling, and then run it to generate a `gmon.out` file: + + [justin@justin-pc Cyclone]$ cyclone tail.scm + [justin@justin-pc Cyclone]$ ./tail < inputs/tail.input + +[justin@justin-pc Cyclone]$ ls +gmon.out inputs outputs tail tail.c tail.o tail.scm +[justin@justin-pc Cyclone]$ gprof ./tail gmon.out > report.txt + +Let's examine the start of `report.txt` to see the functions that are taking up the most of the program's runtime: + + Flat profile: + + Each sample counts as 0.01 seconds. + % cumulative self self total + time seconds seconds calls s/call s/call name + 99.19 61.09 61.09 2331598 0.00 0.00 Cyc_length + 0.10 61.15 0.06 11221 0.00 0.00 gc_minor + 0.08 61.20 0.05 777200 0.00 0.00 __lambda_3 + 0.08 61.25 0.05 777097 0.00 0.00 __lambda_300 + 0.03 61.27 0.02 4664280 0.00 0.00 Cyc_st_add + 0.03 61.29 0.02 1562923 0.00 0.00 gc_thr_add_to_move_buffer + 0.03 61.31 0.02 777572 0.00 0.00 __lambda_278 + 0.03 61.33 0.02 777178 0.00 0.00 dispatch + 0.03 61.35 0.02 777147 0.00 0.00 __lambda_297 + 0.03 61.37 0.02 777135 0.00 0.00 __lambda_368 + 0.03 61.39 0.02 59 0.00 0.00 gc_empty_collector_stack + 0.02 61.41 0.02 5441402 0.00 0.00 Cyc_is_pair + 0.02 61.42 0.02 ck_pr_cas_char + 0.02 61.43 0.01 3109193 0.00 0.00 Cyc_is_null + 0.02 61.44 0.01 3109170 0.00 0.00 Cyc_car + +Well that's interesting, `tail` is spending all of its time computing `Cyc_length`. + + +Run gdb: + + [justin@justin-pc Cyclone]$ cat inputs/tail.input + 25 + "inputs/bib" + "outputs/tail.output" + ignored + +A compiled Cyclone program is just a regular C program so we can use `gdb` to debug it: + + [justin@justin-pc Cyclone]$ gdb ./tail + GNU gdb (GDB) 7.12.1 + Copyright (C) 2017 Free Software Foundation, Inc. + License GPLv3+: GNU GPL version 3 or later + This is free software: you are free to change and redistribute it. + There is NO WARRANTY, to the extent permitted by law. Type "show copying" + and "show warranty" for details. + This GDB was configured as "x86_64-pc-linux-gnu". + Type "show configuration" for configuration details. + For bug reporting instructions, please see: + . + Find the GDB manual and other documentation resources online at: + . + For help, type "help". + Type "apropos word" to search for commands related to "word"... + Reading symbols from ./tail...done. + (gdb) run + Starting program: /tmp/larcenous/Cyclone/tail + [Thread debugging using libthread_db enabled] + Using host libthread_db library "/usr/lib/libthread_db.so.1". + [New Thread 0x7ffff6031700 (LWP 1850)] + 25 + "inputs/bib" + "outputs/tail.output" + ^C + Thread 1 "tail" received signal SIGINT, Interrupt. + 0x00007ffff6fd44ed in read () from /usr/lib/libc.so.6 + (gdb) break Cyc_length + Breakpoint 1 at 0x53b145: file runtime.c, line 1713. + (gdb) c + Continuing. + ignored + +After continuing a few times, the code breaks here: + +#0 Cyc_length (data=0x7cd4e0, l=0x7ffffffbc660) at runtime.c:1713 +#1 0x00000000004af96b in __lambda_368 (data=0x7cd4e0, argc=3, _=0x7ffff6635340, k_734906=0x7ffff63331c0, f_731928=0x7ffffffb2dd0, lis1_731927=0x7ffffffbc660, + lists_731926_raw=0x7ca420 ) at scheme/base.c:22680 +#2 0x00000000004afde1 in __lambda_367 (data=0x7cd4e0, argc=1, self_736727=0x7ffffff9d4c0, r_734921=0x7ca420 ) at scheme/base.c:22703 +#3 0x00000000004b7aa7 in __lambda_299 (data=0x7cd4e0, argc=3, _=0x7ffff66350e0, k_735061=0x7ffffff9d4c0, char_731994=0x2a, port_731993_raw=0x7ffff6333200) at scheme/base.c:23706 +#4 0x000000000055cf5e in do_dispatch (data=0x7cd4e0, argc=3, func=0x4b7523 <__lambda_299>, clo=0x7ffff66350e0, b=0x7ffffff9ccf0) at dispatch.c:6 + +Opening the source code for `scheme/base.c` you can see this is in the `for-each` function: + + static void __lambda_368(void *data, int argc, closure _,object k_734906, object f_731928, object lis1_731927, object lists_731926_raw, ...) { + load_varargs(lists_731926, lists_731926_raw, argc - 3); + Cyc_st_add(data, "scheme/base.sld:for-each"); + if( (boolean_f != Cyc_is_null(lis1_731927)) ){ + +And in `scheme/base.sld` you can see where `length` is being called: + + (define (for-each f lis1 . lists) + (if (not (null? lis1)) + (if (pair? lists) + (let recur ((lists (cons lis1 lists))) + (receive (cars cdrs) (%cars+cdrs lists) + (if (pair? cars) + (begin + (apply f cars) + (recur cdrs))))) + ;; Fast path. + (if (eq? 1 (length lis1)) + (f (car lis1)) + (begin (f (car lis1)) + (for-each f (cdr lis1))))))) + +The code can be simplified to make it more obvious what is going on: + + (define (for-each f lis1 . lists) + (if (not (null? lis1)) + (if (eq? 1 (length lis1)) + (f (car lis1)) + (begin (f (car lis1)) + (for-each f (cdr lis1)))))) + +Basically on every iteration of `for-each` the code is calling `length` to see if `f` can be called directly. Well, that's not good - the main `for-each` loop itself has a time complexity of O(N). The runtime depends directly on the length of `lis1`. But each time `length` is called it must examine the entire contents of `lis1`, which is another O(N) operation. Combined with the outer loop this raises the overall time complexity to O(N^2) - which can really add up for large values of N. + + +This reminds me of an old article from Joel Spolsky that talks about the same issue with respect to strings: +http://global.joelonsoftware.com/English/Articles/Interviewing.html + +Is their function fast? Look at how many times they call strlen. I've seen O(n^2) algorithms for strrev when it should be O(n), because they are calling strlen again and again in a loop. + + +Anyway the solution is to check directly for null, instead of scanning the whole string: + + (if (null? (cdr lis1)) + (f (car lis1)) + (begin (f (car lis1)) + +After rebuilding with this fix we can re-run the tail benchmark. Remember the older code took over 32 seconds to finish: + + From bfd826d4eedb7f2832f275d7973e2492ee223815 Mon Sep 17 00:00:00 2001 From: Justin Ethier Date: Sat, 3 Jun 2017 18:35:33 -0400 Subject: [PATCH 13/18] WIP --- tail-blog-post-notes.txt | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/tail-blog-post-notes.txt b/tail-blog-post-notes.txt index 6de5d7b0..6672d88d 100644 --- a/tail-blog-post-notes.txt +++ b/tail-blog-post-notes.txt @@ -168,6 +168,20 @@ Anyway the solution is to check directly for null, instead of scanning the whole (f (car lis1)) (begin (f (car lis1)) -After rebuilding with this fix we can re-run the tail benchmark. Remember the older code took over 32 seconds to finish: +After rebuilding with this fix we can re-run the tail benchmark: + [justin@justin-pc r7rs-benchmarks]$ ./bench cyclone tail + + Testing tail under Cyclone + Including postlude /home/justin/Documents/r7rs-benchmarks/src/Cyclone-postlude.scm + Compiling... + Running... + Running tail:25 + Elapsed time: 0.72314 seconds (1) for tail:25 + +!CSVLINE!+cyclone-0.5.1,tail:25,0.72314 + + real 0m0.729s + user 0m0.540s + sys 0m0.187s +Whoa! Remember the older code took over 32 seconds to finish, and now its less than a second. Not bad. From 938df60cac788d2a9408efbfc9973da6208dfaf8 Mon Sep 17 00:00:00 2001 From: Justin Ethier Date: Sat, 3 Jun 2017 18:59:52 -0400 Subject: [PATCH 14/18] Revised --- tail-blog-post-notes.txt | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/tail-blog-post-notes.txt b/tail-blog-post-notes.txt index 6672d88d..15769acd 100644 --- a/tail-blog-post-notes.txt +++ b/tail-blog-post-notes.txt @@ -14,8 +14,9 @@ Initial speed test for `tail` - Cyclone does very poorly compared to other Schem user 0m31.783s sys 0m0.513s +One of the easiest things to do is run a profiler on the code. This lets us see if there is something in the runtime or compiled code that is dominating the runtime, and possibly slowing things down. This isn't a catch-all - for example, it can't show us if a compiler optimization is needed. But it helps paint a picture of what is going on. -Change `Makefile.config` in cyclone-bootstrap to do profiling. The `-O2` option in the lines below are replaced with `-g -pg`: +To get started we change `Makefile.config` in cyclone-bootstrap to enable profiling. The `-O2` option in the lines below are replaced with `-g -pg`: CFLAGS ?= -g -pg -fPIC -rdynamic -Wall -Iinclude -L. COMP_CFLAGS ?= -g -pg -fPIC -rdynamic -Wall -I$(PREFIX)/include -L$(PREFIX)/lib @@ -24,23 +25,24 @@ Then Cyclone is rebuilt: [justin@justin-pc cyclone-bootstrap]$ sudo make clean ; ./install.sh -Once this is done a `gmon.out` file will be generated each time Cyclone or a compiled Cyclone program is executed. +Once this is done a `gmon.out` file will be generated each time Cyclone or a compiled Cyclone program is executed. This can be used to create a detailed analysis of what the program was doing at runtime. - -Set up for running the `tail` benchmark directly: +Now we perform set up for running the `tail` benchmark directly: [justin@justin-pc r7rs-benchmarks]$ cd /tmp/larcenous/Cyclone/ [justin@justin-pc Cyclone]$ cp -r ~/Documents/r7rs-benchmarks/inputs/ . [justin@justin-pc Cyclone]$ mkdir outputs -Recompile `tail.scm` to get a version with profiling, and then run it to generate a `gmon.out` file: +And recompile `tail.scm` to get a version with profiling. We then run it to generate a `gmon.out` file: [justin@justin-pc Cyclone]$ cyclone tail.scm [justin@justin-pc Cyclone]$ ./tail < inputs/tail.input -[justin@justin-pc Cyclone]$ ls -gmon.out inputs outputs tail tail.c tail.o tail.scm -[justin@justin-pc Cyclone]$ gprof ./tail gmon.out > report.txt +Then we run `gprof` to create a report: + + [justin@justin-pc Cyclone]$ ls + gmon.out inputs outputs tail tail.c tail.o tail.scm + [justin@justin-pc Cyclone]$ gprof ./tail gmon.out > report.txt Let's examine the start of `report.txt` to see the functions that are taking up the most of the program's runtime: @@ -67,8 +69,9 @@ Let's examine the start of `report.txt` to see the functions that are taking up Well that's interesting, `tail` is spending all of its time computing `Cyc_length`. +A compiled Cyclone program is just a regular C program so we can use `gdb` to debug it and figure out how `Cyc_length` is being called. -Run gdb: +First we need to know what inputs to use: [justin@justin-pc Cyclone]$ cat inputs/tail.input 25 @@ -76,7 +79,7 @@ Run gdb: "outputs/tail.output" ignored -A compiled Cyclone program is just a regular C program so we can use `gdb` to debug it: +To run gdb: [justin@justin-pc Cyclone]$ gdb ./tail GNU gdb (GDB) 7.12.1 @@ -120,7 +123,7 @@ After continuing a few times, the code breaks here: #3 0x00000000004b7aa7 in __lambda_299 (data=0x7cd4e0, argc=3, _=0x7ffff66350e0, k_735061=0x7ffffff9d4c0, char_731994=0x2a, port_731993_raw=0x7ffff6333200) at scheme/base.c:23706 #4 0x000000000055cf5e in do_dispatch (data=0x7cd4e0, argc=3, func=0x4b7523 <__lambda_299>, clo=0x7ffff66350e0, b=0x7ffffff9ccf0) at dispatch.c:6 -Opening the source code for `scheme/base.c` you can see this is in the `for-each` function: +Opening the source code for `scheme/base.c` you can see the code breaks in the `for-each` function: static void __lambda_368(void *data, int argc, closure _,object k_734906, object f_731928, object lis1_731927, object lists_731926_raw, ...) { load_varargs(lists_731926, lists_731926_raw, argc - 3); @@ -155,14 +158,11 @@ The code can be simplified to make it more obvious what is going on: Basically on every iteration of `for-each` the code is calling `length` to see if `f` can be called directly. Well, that's not good - the main `for-each` loop itself has a time complexity of O(N). The runtime depends directly on the length of `lis1`. But each time `length` is called it must examine the entire contents of `lis1`, which is another O(N) operation. Combined with the outer loop this raises the overall time complexity to O(N^2) - which can really add up for large values of N. +This reminds me of [an old article from Joel Spolsky](http://global.joelonsoftware.com/English/Articles/Interviewing.html) that talks about the same issue with respect to strings: -This reminds me of an old article from Joel Spolsky that talks about the same issue with respect to strings: -http://global.joelonsoftware.com/English/Articles/Interviewing.html +> Is their function fast? Look at how many times they call strlen. I've seen O(n^2) algorithms for strrev when it should be O(n), because they are calling strlen again and again in a loop. -Is their function fast? Look at how many times they call strlen. I've seen O(n^2) algorithms for strrev when it should be O(n), because they are calling strlen again and again in a loop. - - -Anyway the solution is to check directly for null, instead of scanning the whole string: +The solution is to check directly for null, instead of scanning the whole string: (if (null? (cdr lis1)) (f (car lis1)) @@ -184,4 +184,4 @@ After rebuilding with this fix we can re-run the tail benchmark: user 0m0.540s sys 0m0.187s -Whoa! Remember the older code took over 32 seconds to finish, and now its less than a second. Not bad. +Whoa! Remember the older code took over 32 seconds to finish? Now it finishes in less than a second. Not bad. From 1fbe7af7285168ab035614aaeb653150ddd1add2 Mon Sep 17 00:00:00 2001 From: Justin Ethier Date: Sat, 3 Jun 2017 19:00:13 -0400 Subject: [PATCH 15/18] Renamed --- tail-blog-post-notes.txt => tail-blog-post-notes.md | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tail-blog-post-notes.txt => tail-blog-post-notes.md (100%) diff --git a/tail-blog-post-notes.txt b/tail-blog-post-notes.md similarity index 100% rename from tail-blog-post-notes.txt rename to tail-blog-post-notes.md From 2cd0374d2406111fc4f8eb29293f5c558fe4ace3 Mon Sep 17 00:00:00 2001 From: Justin Ethier Date: Sat, 3 Jun 2017 19:01:51 -0400 Subject: [PATCH 16/18] Indent back trace --- tail-blog-post-notes.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tail-blog-post-notes.md b/tail-blog-post-notes.md index 15769acd..f91f69c0 100644 --- a/tail-blog-post-notes.md +++ b/tail-blog-post-notes.md @@ -116,12 +116,12 @@ To run gdb: After continuing a few times, the code breaks here: -#0 Cyc_length (data=0x7cd4e0, l=0x7ffffffbc660) at runtime.c:1713 -#1 0x00000000004af96b in __lambda_368 (data=0x7cd4e0, argc=3, _=0x7ffff6635340, k_734906=0x7ffff63331c0, f_731928=0x7ffffffb2dd0, lis1_731927=0x7ffffffbc660, - lists_731926_raw=0x7ca420 ) at scheme/base.c:22680 -#2 0x00000000004afde1 in __lambda_367 (data=0x7cd4e0, argc=1, self_736727=0x7ffffff9d4c0, r_734921=0x7ca420 ) at scheme/base.c:22703 -#3 0x00000000004b7aa7 in __lambda_299 (data=0x7cd4e0, argc=3, _=0x7ffff66350e0, k_735061=0x7ffffff9d4c0, char_731994=0x2a, port_731993_raw=0x7ffff6333200) at scheme/base.c:23706 -#4 0x000000000055cf5e in do_dispatch (data=0x7cd4e0, argc=3, func=0x4b7523 <__lambda_299>, clo=0x7ffff66350e0, b=0x7ffffff9ccf0) at dispatch.c:6 + #0 Cyc_length (data=0x7cd4e0, l=0x7ffffffbc660) at runtime.c:1713 + #1 0x00000000004af96b in __lambda_368 (data=0x7cd4e0, argc=3, _=0x7ffff6635340, k_734906=0x7ffff63331c0, f_731928=0x7ffffffb2dd0, lis1_731927=0x7ffffffbc660, + lists_731926_raw=0x7ca420 ) at scheme/base.c:22680 + #2 0x00000000004afde1 in __lambda_367 (data=0x7cd4e0, argc=1, self_736727=0x7ffffff9d4c0, r_734921=0x7ca420 ) at scheme/base.c:22703 + #3 0x00000000004b7aa7 in __lambda_299 (data=0x7cd4e0, argc=3, _=0x7ffff66350e0, k_735061=0x7ffffff9d4c0, char_731994=0x2a, port_731993_raw=0x7ffff6333200) at scheme/base.c:23706 + #4 0x000000000055cf5e in do_dispatch (data=0x7cd4e0, argc=3, func=0x4b7523 <__lambda_299>, clo=0x7ffff66350e0, b=0x7ffffff9ccf0) at dispatch.c:6 Opening the source code for `scheme/base.c` you can see the code breaks in the `for-each` function: From 54386c444aa1d3bd8c68f224fcc7a3833c213e98 Mon Sep 17 00:00:00 2001 From: Justin Ethier Date: Sat, 3 Jun 2017 19:27:07 -0400 Subject: [PATCH 17/18] More revisions --- tail-blog-post-notes.md | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tail-blog-post-notes.md b/tail-blog-post-notes.md index f91f69c0..f7213778 100644 --- a/tail-blog-post-notes.md +++ b/tail-blog-post-notes.md @@ -14,14 +14,16 @@ Initial speed test for `tail` - Cyclone does very poorly compared to other Schem user 0m31.783s sys 0m0.513s -One of the easiest things to do is run a profiler on the code. This lets us see if there is something in the runtime or compiled code that is dominating the runtime, and possibly slowing things down. This isn't a catch-all - for example, it can't show us if a compiler optimization is needed. But it helps paint a picture of what is going on. +One of the easiest things to do is run a profiler on the code to figure out what is going on. This lets us see if there is something in the runtime or compiled code that is dominating the runtime, and possibly slowing things down. This isn't a catch-all - for example, it can't show us if a compiler optimization is needed. But it helps paint a picture of what is going on. + +A compiled Cyclone program is just a regular C program so we can use the standard GNU tools for profiling and debugging. To get started we change `Makefile.config` in cyclone-bootstrap to enable profiling. The `-O2` option in the lines below are replaced with `-g -pg`: CFLAGS ?= -g -pg -fPIC -rdynamic -Wall -Iinclude -L. COMP_CFLAGS ?= -g -pg -fPIC -rdynamic -Wall -I$(PREFIX)/include -L$(PREFIX)/lib -Then Cyclone is rebuilt: +Then Cyclone must be rebuilt: [justin@justin-pc cyclone-bootstrap]$ sudo make clean ; ./install.sh @@ -69,7 +71,7 @@ Let's examine the start of `report.txt` to see the functions that are taking up Well that's interesting, `tail` is spending all of its time computing `Cyc_length`. -A compiled Cyclone program is just a regular C program so we can use `gdb` to debug it and figure out how `Cyc_length` is being called. +Again, Cyclone compiles programs to C, so we can use `gdb` to debug them and figure out how `Cyc_length` is being called. First we need to know what inputs to use: From 199e75414d923edf233808948ce9dcc76c9b4b7c Mon Sep 17 00:00:00 2001 From: Justin Ethier Date: Mon, 5 Jun 2017 09:57:02 +0000 Subject: [PATCH 18/18] Revisions --- tail-blog-post-notes.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tail-blog-post-notes.md b/tail-blog-post-notes.md index f7213778..d579a5cd 100644 --- a/tail-blog-post-notes.md +++ b/tail-blog-post-notes.md @@ -1,4 +1,4 @@ -Initial speed test for `tail` - Cyclone does very poorly compared to other Schemes: +While going through a new run of the [R7RS benchmarks from Larceny](http://www.larcenists.org/benchmarksGenuineR7Linux.html), I noticed Cyclone performed significantly worse than other schemes on the tail benchmark. Certainly when testing locally the results are less than impressive: [justin@justin-pc r7rs-benchmarks]$ ./bench cyclone tail @@ -14,7 +14,7 @@ Initial speed test for `tail` - Cyclone does very poorly compared to other Schem user 0m31.783s sys 0m0.513s -One of the easiest things to do is run a profiler on the code to figure out what is going on. This lets us see if there is something in the runtime or compiled code that is dominating the runtime, and possibly slowing things down. This isn't a catch-all - for example, it can't show us if a compiler optimization is needed. But it helps paint a picture of what is going on. +One of the easiest things to do is run a profiler on the code to figure out what is going on. This lets us see if there is something in the runtime or compiled code that is dominating the execution time, and possibly slowing things down. This isn't a catch-all - for example, it can't show us if a compiler optimization is needed. But it helps paint a picture of what is going on. A compiled Cyclone program is just a regular C program so we can use the standard GNU tools for profiling and debugging. @@ -27,7 +27,7 @@ Then Cyclone must be rebuilt: [justin@justin-pc cyclone-bootstrap]$ sudo make clean ; ./install.sh -Once this is done a `gmon.out` file will be generated each time Cyclone or a compiled Cyclone program is executed. This can be used to create a detailed analysis of what the program was doing at runtime. +Once this is done a `gmon.out` file will be generated each time Cyclone or a compiled Cyclone program is executed. This can be used to create a detailed analysis of what the program is doing at runtime. Now we perform set up for running the `tail` benchmark directly: @@ -158,7 +158,7 @@ The code can be simplified to make it more obvious what is going on: (begin (f (car lis1)) (for-each f (cdr lis1)))))) -Basically on every iteration of `for-each` the code is calling `length` to see if `f` can be called directly. Well, that's not good - the main `for-each` loop itself has a time complexity of O(N). The runtime depends directly on the length of `lis1`. But each time `length` is called it must examine the entire contents of `lis1`, which is another O(N) operation. Combined with the outer loop this raises the overall time complexity to O(N^2) - which can really add up for large values of N. +Basically on every iteration of `for-each` the code is calling `length` to see if `f` can be called directly. Well, that's not good - the main `for-each` loop itself has a [time complexity of `O(n)`](https://en.wikipedia.org/wiki/Big_O_notation). The runtime depends directly on the length of `lis1`. But each time `length` is called it must examine the entire contents of `lis1`, which is another `O(n)` operation. Combined with the outer loop this raises the overall time complexity to `O(n^2)` - which can really add up for large values of `n`. This reminds me of [an old article from Joel Spolsky](http://global.joelonsoftware.com/English/Articles/Interviewing.html) that talks about the same issue with respect to strings: