From ab29a2b973fad1f159b631e95229862c672495b8 Mon Sep 17 00:00:00 2001 From: Alex Shinn Date: Tue, 28 Dec 2021 16:07:43 +0900 Subject: [PATCH] skip common prefix/suffix in diff --- lib/chibi/diff-test.sld | 16 ++++++++++++++ lib/chibi/diff.scm | 46 ++++++++++++++++++++++++++++++++++++++--- 2 files changed, 59 insertions(+), 3 deletions(-) diff --git a/lib/chibi/diff-test.sld b/lib/chibi/diff-test.sld index 0ecc2b38..e53b56d0 100644 --- a/lib/chibi/diff-test.sld +++ b/lib/chibi/diff-test.sld @@ -35,6 +35,22 @@ (lcs '(#\G #\A #\C) '(#\A #\G #\C #\A #\T))) (test '((#\G #\A #\C) (#\A #\G #\C #\A #\T) ((#\A 1 0) (#\C 2 2))) (diff "GAC" "AGCAT" read-char)) + (test '((#\A #\G #\C #\A #\T) (#\A #\G #\C #\A #\T) + ((#\A 0 0) (#\G 1 1) (#\C 2 2) (#\A 3 3) (#\T 4 4))) + (diff "AGCAT" "AGCAT" read-char)) + (test '((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 #\. + #\G #\A #\C #\. #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9) + (#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 #\. + #\A #\G #\C #\A #\T #\. #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9) + ((#\0 0 0) (#\1 1 1) (#\2 2 2) (#\3 3 3) (#\4 4 4) (#\5 5 5) + (#\6 6 6) (#\7 7 7) (#\8 8 8) (#\9 9 9) (#\. 10 10) + (#\A 12 11) (#\C 13 13) + (#\. 14 16) (#\0 15 17) (#\1 16 18) (#\2 17 19) (#\3 18 20) + (#\4 19 21) (#\5 20 22) (#\6 21 23) (#\7 22 24) (#\8 23 25) + (#\9 24 26))) + (diff "0123456789.GAC.0123456789" + "0123456789.AGCAT.0123456789" + read-char)) (let ((d (diff "GAC" "AGCAT" read-char))) (test " »G« AC" (edits->string (car d) (car (cddr d)) 1)) diff --git a/lib/chibi/diff.scm b/lib/chibi/diff.scm index ed7607eb..2e97988b 100644 --- a/lib/chibi/diff.scm +++ b/lib/chibi/diff.scm @@ -67,13 +67,53 @@ ;;> ports, which are tokenized into a sequence by calling \var{reader} ;;> until \var{eof-object} is found. Returns a list of three values, ;;> the sequences read from \var{a} and \var{b}, and the \scheme{lcs} -;;> result. +;;> result. Unless \var{minimal?} is set, we trim common +;;> prefixes/suffixes before computing the lcs. (define (diff a b . o) (let-optionals o ((reader read-line) - (eq equal?)) + (eq equal?) + (optimal? #f)) (let ((a-ls (source->list a reader)) (b-ls (source->list b reader))) - (list a-ls b-ls (lcs-with-positions a-ls b-ls eq))))) + (if optimal? + (list a-ls b-ls (lcs-with-positions a-ls b-ls eq)) + (let lp1 ((i 0) (a a-ls) (b b-ls)) + (cond + ((or (null? a) (null? b)) ;; prefix or equal + (if (and (null? a) (null? b)) + (let ((n-ls (iota (length a-ls)))) ;; equal + (list a-ls b-ls (map list a-ls n-ls n-ls))) + (list a-ls b-ls (lcs-with-positions a-ls b-ls eq)))) + ((eq (car a) (car b)) + (lp1 (+ i 1) (cdr a) (cdr b))) + (else + (let lp2 ((j 0) (ra (reverse a)) (rb (reverse b))) + (cond + ((or (null? ra) (null? rb)) ;; can't happen + (list a-ls b-ls (lcs-with-positions a-ls b-ls eq))) + ((eq (car ra) (car rb)) + (lp2 (+ j 1) (cdr ra) (cdr rb))) + (else + (let* ((a-ls2 (reverse ra)) + (b-ls2 (reverse rb)) + (a-left-len (+ i (length a-ls2))) + (b-left-len (+ i (length b-ls2)))) + (list a-ls + b-ls + (append + (map (lambda (x i) (list x i i)) + (take a-ls i) + (iota i)) + (map (lambda (x) + (list (car x) + (+ i (cadr x)) + (+ i (car (cddr x))))) + (lcs-with-positions a-ls2 b-ls2 eq)) + (map (lambda (x i) + (list x (+ i a-left-len) (+ i b-left-len))) + (take-right a j) + (iota j)))))) + ))))))))) ;;> Utility to format the result of a \var{diff} to output port ;;> \var{out} (default \scheme{(current-output-port)}). Applies