skip common prefix/suffix in diff

This commit is contained in:
Alex Shinn 2021-12-28 16:07:43 +09:00
parent 9cd9ec1cda
commit ab29a2b973
2 changed files with 59 additions and 3 deletions

View file

@ -35,6 +35,22 @@
(lcs '(#\G #\A #\C) '(#\A #\G #\C #\A #\T)))
(test '((#\G #\A #\C) (#\A #\G #\C #\A #\T) ((#\A 1 0) (#\C 2 2)))
(diff "GAC" "AGCAT" read-char))
(test '((#\A #\G #\C #\A #\T) (#\A #\G #\C #\A #\T)
((#\A 0 0) (#\G 1 1) (#\C 2 2) (#\A 3 3) (#\T 4 4)))
(diff "AGCAT" "AGCAT" read-char))
(test '((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 #\.
#\G #\A #\C #\. #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9)
(#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 #\.
#\A #\G #\C #\A #\T #\. #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9)
((#\0 0 0) (#\1 1 1) (#\2 2 2) (#\3 3 3) (#\4 4 4) (#\5 5 5)
(#\6 6 6) (#\7 7 7) (#\8 8 8) (#\9 9 9) (#\. 10 10)
(#\A 12 11) (#\C 13 13)
(#\. 14 16) (#\0 15 17) (#\1 16 18) (#\2 17 19) (#\3 18 20)
(#\4 19 21) (#\5 20 22) (#\6 21 23) (#\7 22 24) (#\8 23 25)
(#\9 24 26)))
(diff "0123456789.GAC.0123456789"
"0123456789.AGCAT.0123456789"
read-char))
(let ((d (diff "GAC" "AGCAT" read-char)))
(test " »G« AC"
(edits->string (car d) (car (cddr d)) 1))

View file

@ -67,13 +67,53 @@
;;> ports, which are tokenized into a sequence by calling \var{reader}
;;> until \var{eof-object} is found. Returns a list of three values,
;;> the sequences read from \var{a} and \var{b}, and the \scheme{lcs}
;;> result.
;;> result. Unless \var{minimal?} is set, we trim common
;;> prefixes/suffixes before computing the lcs.
(define (diff a b . o)
(let-optionals o ((reader read-line)
(eq equal?))
(eq equal?)
(optimal? #f))
(let ((a-ls (source->list a reader))
(b-ls (source->list b reader)))
(list a-ls b-ls (lcs-with-positions a-ls b-ls eq)))))
(if optimal?
(list a-ls b-ls (lcs-with-positions a-ls b-ls eq))
(let lp1 ((i 0) (a a-ls) (b b-ls))
(cond
((or (null? a) (null? b)) ;; prefix or equal
(if (and (null? a) (null? b))
(let ((n-ls (iota (length a-ls)))) ;; equal
(list a-ls b-ls (map list a-ls n-ls n-ls)))
(list a-ls b-ls (lcs-with-positions a-ls b-ls eq))))
((eq (car a) (car b))
(lp1 (+ i 1) (cdr a) (cdr b)))
(else
(let lp2 ((j 0) (ra (reverse a)) (rb (reverse b)))
(cond
((or (null? ra) (null? rb)) ;; can't happen
(list a-ls b-ls (lcs-with-positions a-ls b-ls eq)))
((eq (car ra) (car rb))
(lp2 (+ j 1) (cdr ra) (cdr rb)))
(else
(let* ((a-ls2 (reverse ra))
(b-ls2 (reverse rb))
(a-left-len (+ i (length a-ls2)))
(b-left-len (+ i (length b-ls2))))
(list a-ls
b-ls
(append
(map (lambda (x i) (list x i i))
(take a-ls i)
(iota i))
(map (lambda (x)
(list (car x)
(+ i (cadr x))
(+ i (car (cddr x)))))
(lcs-with-positions a-ls2 b-ls2 eq))
(map (lambda (x i)
(list x (+ i a-left-len) (+ i b-left-len)))
(take-right a j)
(iota j))))))
)))))))))
;;> Utility to format the result of a \var{diff} to output port
;;> \var{out} (default \scheme{(current-output-port)}). Applies