;;;		      Buffer Merge Facility for GNU Emacs
;;;
;;;				 Joe Marshall
;;;
;;;				 jrm@lucid.com
;;;			      Lucid, Incorporated
;;;			       16 September 1988
;;;
;;;
;;; The Buffer Merge Facility provides the following functions:
;;;
;;;	line-merge
;;;	goto-next-merge-conflict
;;;	choose-first-merge-conflict
;;;	choose-second-merge-conflict
;;;	choose-both-merge-conflicts
;;;
;;; and the variable sandbar-lookahead.
;;;
;;; line-merge will prompt for the names of two buffers and produce or
;;; overwrite the buffer "*Merge*".  The *Merge* buffer will contain the
;;; contents of both buffers arranged in the following manner: Lines that are
;;; the same in both buffers will just be output.  Lines that differ will have
;;; headers around them like so:
;;; 
;;; (defun foo (x)
;;;   (let ((bar-result (bar x)))
;;; ********* Merge Conflict *******
;;; ********* From Buffer1   *******
;;;     (declare (integer bar-result))
;;; ********* From Buffer2   *******
;;; ********* End of Merge Conflict
;;;     (hack bar-result)
;;;     (format t "Hacking done.")))
;;; 
;;; (defun bar (x)
;;; ********* Merge Conflict ******
;;; ********* From Buffer1   ******
;;;   (check-type x integer)
;;; ********* From Buffer2   ******
;;;   (check-type x (or integer null))
;;; ********* End of Merge Conflict
;;;   (+ (or x 0) 7))
;;; 
;;; There will be a merge conflict header, the text from buffer 1, the text
;;; from buffer 2, and an end conflict trailer.  It is frequently the case that
;;; one or the other of the buffers will have no text where the other has text.
;;; 
;;; goto-next-merge-conflict will position the point at the next merge
;;;	conflict.
;;; 
;;; choose-first-merge-conflict will remove the headers and the text from the
;;;     second buffer leaving only the text from the first buffer.
;;; 
;;; choose-second-merge-conflict will remove the headers and the text from the
;;;     first buffer leaving only the text from the second buffer.
;;; 
;;; choose-both-merge-conflicts will remove the headers only and leave the text
;;;     from both buffers.
;;; 
;;; sandbar-lookahead variable controls how many matching lines must be between
;;;     merge conflicts to report an end of conflict.  This is to prevent
;;;     "sandbars" of merge conflicts in code where several small changes
;;;     have been made.  It's default value is 2.
;;; 
;;; Minutae
;;; 
;;; The algorithm is as follows:
;;; 
;;; 1. Read out all the lines from each buffer.
;;; 
;;; 2. Filter the lines removing blank lines and lines that contain only
;;;    one set of semicolons.  (lines that match the regexp "[ \t]*;*[ \t]*$")
;;;    This is so that the line merger will not try to optimize matches among
;;;    blank lines and comments.
;;; 
;;; 3. Compute a hash code for each line of each buffer in order to speed up
;;;    the matching algorithm.
;;; 
;;; 4. Find the longest set of lines that are common to both buffers.  This
;;;    takes on the order of 4 minutes for a pair of 1000 line buffers.
;;;    Instead of manipulating line lists, I put the lines into a vector and
;;;    manipulate vector indices.
;;; 
;;; 5. Output into the *Merge* buffer the lines of the other buffers.  If they
;;;    match the common lines, just write out one copy, if not, write out the
;;;    lines from buffer1 until meeting a match, and then the lines from
;;;    buffer2, putting headers around them.
;;;    
;;; The merging is done on a line by line basis, lines must match exactly.
;;; 
;;; Because of the filtering of blank lines and the sandbar lookahead,
;;; this may produce different output than the equivalent "diff".  The
;;; output should be mostly the same.  In cases I have looked at, the
;;; merge output has been more useful than the diff output.
;;; 
;;; 
;;; Bugs and other problems
;;; 
;;; Finding the longest subsequence of matching lines takes far too long,
;;; conses too much, and uses too much stack.  I rebind "max-lisp-eval-depth"
;;; and "max-specpdl-size" to keep emacs from barfing.  This seems a little
;;; weird to me, but it hasn't lost yet.
;;; 
;;; The code is opaque.  I have bummed the code to make it fast by removing
;;; function calls and recursion.  Since emacs has only one iteration construct
;;; (the while loop), the resulting code is rather hard to read and debug.
;;; There is also too much dead code from experiments in the file.
;;; 
;;; The filter for removing blank lines and blank comment lines is specific for
;;; LISP.  It should check the comment character in the buffer that it is being
;;; called on.
;;; 
;;; There should be a way to choose-both-merge-conflicts, but with the text
;;; from the second buffer appearing first.
;;; 
;;; There are cases where you can't make the correct choice:
;;;    buffer1        buffer2
;;;    (bad-line)     (good-line1)
;;;    (good-line)    (bad-line1)
;;; will leave you choice of choosing buffer1 or buffer2, but not the good
;;; lines from each.  This is made worse by the sandbar lookahead.
;;; 
;;; Merging may fail to produce useful output in the following case:
;;; 
;;;   buffer1                    buffer2
;;; 
;;; (defun foo (x)            (defun foo (x)          
;;;   (declare (integer x))     (add-3 x))               
;;;   (+ x 3))		                          
;;; 			  (defun bar (x)          
;;; 			    (declare (integer x)) 
;;; 			    (+ x 3))              
;;; 			                          
;;;    output                      
;;; 
;;;    (defun foo (x)
;;;    **** Merge conflict ***
;;;    **** From Buffer1 *****
;;;    **** From Buffer2 *****
;;;      (add-3 x))
;;; 
;;;    (defun bar (x)
;;;    **** End of conflict **
;;;      (declare (integer x))
;;;      (+ x 3))
;;; 
;;; This sort of problem occurs more often on short buffers with lots of
;;; changes rather than long buffers with fewer changes.  This is an artifact
;;; of merging on a line by line basis instead of syntactically parsing the
;;; buffer.
;;; 
;;; This is rather ungraceful when one buffer ends with a newline and the 
;;; other doesn't.
;;; 
;;; The merger only merges buffers, it would be useful to merge files also.
;;; 
;;; The hashing code is ad-hoc.  Better hashing could improve performance.


;;; Try M-x line-merge.  You might want to bind keys as follows:
;;; C-c 1 --> choose from first buffer
;;; C-c 2 --> choose from second buffer
;;; C-c b --> choose both
;;; C-c n --> goto next merge conflict.

(defun buffer/count-lines (buffer)
  (save-excursion
    (set-buffer buffer)
    (beginning-of-buffer)
    (let ((start (point)))
      (end-of-buffer)
      (count-lines start (point)))))

(defun buffer-to-line-list (buffer)
  (save-excursion
    (set-buffer buffer)
    (let* ((limit (buffer/count-lines buffer))
	   (bufend (progn (end-of-buffer) (point)))
	   (count 0)
	   (answer nil)
	   (loop t))
      (beginning-of-buffer)
      (while loop
	(let ((start (point)))
	  (end-of-line nil)
	  (if (not (= (point) bufend))
	      (forward-char))
	  (let ((line (buffer-substring (point) start)))
	    (if line (setq answer (cons line answer))))
	  (setq count (1+ count))
	  (if (= count limit)
	      (setq loop nil))
	  ))
      (nreverse answer))))

(defvar hash-mask (1- (ash 1 16)))
(defvar top-bit-mask (ash 7 16))

;;; Note: If the hashing algorithm is good enough, you can dispense with
;;; the string-equal comparisons in iposition and ifind-longest-subsequence.
;;; I don't quite trust this one yet.

(defun hash-string (string)
  (let ((limit (length string))
	(answer 0)
	(index 0))
    (while (< index limit)
      (setq answer (logand hash-mask
			   (+ (ash answer 3)
			      (ash answer -13)
			      (aref string index))))
      (setq index (1+ index)))
    answer))

(defun bind-subsequence (a b function)
  (let* ((end-a (length a))
	 (end-b (length b))
	 (subsequence-a (make-vector end-a nil))
	 (subsequence-b (make-vector end-b nil))
	 (b-supermap (make-vector (ash 1 16) nil))
	 (subsequence-table (make-vector (ash 1 16) nil)))
    (message "Hashing first buffer...")
    (let ((index  0))
      (while (< index end-a)
	(aset subsequence-a index (cons (hash-string (car a)) (car a)))
	(setq index (1+ index))
	(setq a (cdr a))))
    (message "Hashing second buffer...")
    (let ((index  0))
      (while (< index end-b)
	(let ((b-code (hash-string (car b))))
	  (aset b-supermap b-code 
		(cons index (aref b-supermap b-code)))
	  (aset subsequence-b index (cons b-code (car b)))
	  (setq index (1+ index))
	  (setq b (cdr b)))))
    (message "Finding longest subsequence")
    (funcall function)))

(defvar *hash-matches*   0)
(defvar *string-matches* 0)

(defun iposition (ia ib)
  (let* ((loop t)
	 (element (aref subsequence-a ia))
	 (search-chain (aref b-supermap (car element)))
	 (answer nil))
    (while loop
      (cond ((null search-chain) (setq loop nil))
	    ((< (car search-chain) ib) (setq search-chain (cdr search-chain)))
	    ((let ((eltb (aref subsequence-b (car search-chain))))
	       (and (= (car element) (car eltb))
		    (string-equal (cdr element) (cdr eltb))
		    ))
	     (setq answer (car search-chain))
	     (setq loop nil))
	    (t (setq search-chain (cdr search-chain)))))
    answer))

(defun isubsequence-p (ia ib test)
  (let ((loop t)
	(answer nil))
    (while loop
      (cond ((= ia end-a) (setq answer t) (setq loop nil))
	    ((= ib end-b) (setq answer nil) (setq loop nil))
	    ((funcall test 
		      (cdr (aref subsequence-a ia))
		      (cdr (aref subsequence-b ib)))
	     (setq ia (1+ ia))
	     (setq ib (1+ ib)))
	    (t (setq ib (1+ ib)))))
    answer))

(defun remove-if-not (rtest list)
  (let ((answer '()))
    (while list
      (if (funcall rtest (car list))
	  (setq answer (cons (car list) answer)))
      (setq list (cdr list)))
    (nreverse answer)))

(defun remove-if (rtest list)
  (let ((answer '()))
    (while list
      (if (not (funcall rtest (car list)))
	  (setq answer (cons (car list) answer)))
      (setq list (cdr list)))
    (nreverse answer)))

(defun ifind-longest-subsequence (a b)
  (let* ((reduced-a a)
	 (reduced-b b)
	 ;; Cough choke ack!!
	 (max-lisp-eval-depth (* (+ (length a) (length b)) 3))
	 (max-specpdl-size (* max-lisp-eval-depth 2)))
    (if (zerop (length reduced-a))
	reduced-a
	(if (> (length reduced-b)
	       (length reduced-a))
	    (bind-subsequence reduced-a reduced-b
	      '(lambda ()
		(ifind-longest-subsequence-helper 0 0 )))
	    (bind-subsequence reduced-b reduced-a
	      '(lambda ()
		(ifind-longest-subsequence-helper 0 0 )))))))

(defun sequence-index-to-sequence (sequence index)
  (let ((l (- (length sequence) 1))
	(answer '()))
    (while (not (< l index))
      (setq answer (cons (elt sequence l) answer))
      (setq l (- l 1)))
    answer))

(defun bump-to-mismatch (ia ib)
  (while (and (< ia end-a)
	      (< ib end-b)
	      (funcall test (aref subsequence-a ia) (aref subsequence-b ib)))
    (setq ia (1+ ia))
    (setq ib (1+ ib)))
  (cons ia ib))

(defun isubsequence (subsequence ifirst ilast)
  (let ((answer '()))
    (while (> ilast ifirst)
      (setq ilast (1- ilast))
      (setq answer (cons (aref subsequence ilast) answer)))
    answer))

(defun sequence-table-address (ia ib)
  (logand hash-mask
	  (logxor ia
		  (ash (- ia ib) 8))))

;;; I may want to store nil in my subsequence table, so T means no entry.
(defun lookup-in-subsequence-table (address)
  (let ((bucket (aref subsequence-table address))
	(answer t))
    (while bucket
      (let* ((entry (car bucket))
	     (key   (car entry))
	     (value (cdr entry)))
	(if (and (= ia (car key))
		 (= ib (cdr key)))
	    (progn (setq answer value) 
		   (setq bucket nil))
	    (progn (setq bucket (cdr bucket))))))
    answer))

(defun store-in-subsequence-table (address value)
  (let ((bucket  (aref subsequence-table address)))
    (aset subsequence-table address 
	  (cons (cons (cons ia ib) value) bucket))))

(defun ifind-longest-subsequence-helper (ia ib)
  (let* ((address (sequence-table-address ia ib))
	 (cached-value (lookup-in-subsequence-table address)))
    (if (eq cached-value t)
	(let ((answer
	       (cond ((or (= ia end-a) (= ib end-b)) nil)
		     ((let ((elta (aref subsequence-a ia))
			    (eltb (aref subsequence-b ib)))
			(and (= (car elta) (car eltb))
			     (string-equal (cdr elta) (cdr eltb))
			     ))
		      (cons (cdr (aref subsequence-a ia))
			    (ifind-longest-subsequence-helper
			      (1+ ia)
			      (1+ ib))))
		     (t
		      ;; Well, if (car a) is not in B, we know that it isn't
		      ;; in the final sequence.  Keep cdring down a until
		      ;; we find an element in b.
		      (let ((pos (iposition ia ib)))
			(if pos
			    (let ((tseq (cons (cdr (elt subsequence-a ia))
					      (ifind-longest-subsequence-helper
						(1+ ia) (1+ pos))))
				  (oseq (ifind-longest-subsequence-helper
					  (1+ ia) ib)))
			      (if (> (length tseq) (length oseq))
				  tseq
				  oseq))
			    (ifind-longest-subsequence-helper
			      (1+ ia)
			      ib)
			    ))))))
	  (store-in-subsequence-table address answer)
	  answer)
	cached-value)))

(defun stupid-line-p (line)
  (let ((index (string-match "[ \t]*;*[ \t]*$" line)))
    (and index
	 (zerop index))))

(defvar sandbar-lookahead 2
  "How many lines must match after a merge conflict to end the conflict.")

(defun sandbar-lookahead (lines1 lines2 common-lines)
  (let ((scan sandbar-lookahead)
	(l1   lines1)
	(l2   lines2)
	(c    common-lines)
	(answer    t))
    (while (and l1 l2 (not (zerop scan)) c answer)
      (setq answer (and (string-equal (car l1) (car l2))
			(or (stupid-line-p (car l1))
			    (string-equal (car l1) (car c)))))
      (if (not (stupid-line-p (car l1)))
	  (setq c (cdr c)))
      (setq l1 (cdr l1))
      (setq l2 (cdr l2))
      (setq scan (1- scan)))
    answer))

(defun merge-buffers (buf1 buf2 output-buffer)
  (let* ((buf1-lines (buffer-to-line-list buf1))
	 (buf2-lines (buffer-to-line-list buf2))
	 (common-lines (ifind-longest-subsequence
			 ;; Don't try to maximize newline runs.
			 (progn (message "Filtering buf1 lines...")
				(remove-if 'stupid-line-p buf1-lines))
			 (progn (message "Filtering buf2 lines...")
				(remove-if 'stupid-line-p buf2-lines)))))
    (message "Generating output...")
    (set-buffer output-buffer)
    (while (or buf1-lines
	       buf2-lines
	       common-lines)
      (let ((next-target (car common-lines)))
	(setq common-lines (cdr common-lines))
	;;; Strip off common newlines.  
	(while (and (car buf1-lines)
		    (car buf2-lines)
		    (string-equal (car buf1-lines) (car buf2-lines))
		    (stupid-line-p (car buf1-lines)))
	  (insert (car buf1-lines))
	  (setq buf1-lines (cdr buf1-lines))
	  (setq buf2-lines (cdr buf2-lines)))
	(if (not (and next-target
		      (car buf1-lines)
		      (car buf2-lines)
		      (string-equal (car buf1-lines) next-target)
		      (string-equal (car buf2-lines) next-target)))
	    ;; Conflict, strip off obnoxious stupid lines from end of
	    ;; conflict.  Look ahead a few lines to avoid "sandbarring".
	    (let ((buf1-conflicting-lines '())
		  (buf2-conflicting-lines '())
		  (common-stupid-lines '())
		  (loop t))
	      (while loop
		(if next-target
		    (while (not (string-equal (car buf1-lines) next-target))
		      (setq buf1-conflicting-lines 
			    (cons (car buf1-lines)
				  buf1-conflicting-lines))
		      (setq buf1-lines (cdr buf1-lines)))
		    (progn (setq buf1-conflicting-lines (nreverse buf1-lines))
			   (setq buf1-lines nil)))
		(if next-target
		    (while (not (string-equal (car buf2-lines) next-target))
		      (setq buf2-conflicting-lines 
			    (cons (car buf2-lines)
				  buf2-conflicting-lines))
		      (setq buf2-lines (cdr buf2-lines)))
		    (progn (setq buf2-conflicting-lines (nreverse buf2-lines))
			   (setq buf2-lines nil)))
		(if (sandbar-lookahead
		      buf1-lines buf2-lines 
		      (cons next-target common-lines))
		    (setq loop nil)
		    (progn (setq next-target (car common-lines))
			   (setq common-lines (cdr common-lines)))))

	      (while (and buf1-conflicting-lines
			  buf2-conflicting-lines
			  (string-equal (car buf1-conflicting-lines)
					(car buf2-conflicting-lines)))
		(setq common-stupid-lines 
		      (cons (car buf1-conflicting-lines) common-stupid-lines))
		(setq buf1-conflicting-lines (cdr buf1-conflicting-lines))
		(setq buf2-conflicting-lines (cdr buf2-conflicting-lines)))
	      (setq buf1-conflicting-lines (nreverse buf1-conflicting-lines))
	      (setq buf2-conflicting-lines (nreverse buf2-conflicting-lines))

	    (progn
	      ;; Real conflict, or stupid line conflict?
	      (if (or buf1-conflicting-lines
		      buf2-conflicting-lines)
		  (progn
		    (insert merge-conflict-message)
		    (insert buf1-contents-message)
		    (while buf1-conflicting-lines
		      (insert (car buf1-conflicting-lines))
		      (setq buf1-conflicting-lines
			    (cdr buf1-conflicting-lines)))
		    (insert buf2-contents-message)
		    (while buf2-conflicting-lines
		      (insert (car buf2-conflicting-lines))
		      (setq buf2-conflicting-lines 
			    (cdr buf2-conflicting-lines)))
		    (insert end-of-conflict-message)))
	      (while common-stupid-lines
		(insert (car common-stupid-lines))
		(setq common-stupid-lines (cdr common-stupid-lines)))
	      )))
	(if next-target
	    (progn
	      (insert next-target)
	      (and buf1-lines
		   (setq buf1-lines (cdr buf1-lines)))
	      (and buf2-lines
		   (setq buf2-lines (cdr buf2-lines)))))))))

(defun corelate-sequences (s1 s2 test)
  (let ((common (ifind-longest-subsequence
		  s1
		  s2
		  test)))
    (/ (* (length common) 200) (+ (length s1) (length s2)))))

(defvar merge-buffer "*Merge*")

(defvar merge-conflict-message "********* Merge Conflict ********\n")
(defvar contents-message  "********* From ")
(defvar contents-message-cont " *********\n")
(defvar end-of-conflict-message "********* End of Merge Conflict *\n")

(defun corelate-buffers (buf1 buf2)
  (let* ((buf1-lines (remove-if-not '(lambda (line)
				      (not (string-equal line "")))
				    (buffer-to-line-list buf1)))
	 (buf2-lines (remove-if-not '(lambda (line)
				      (not (string-equal line "")))
				    (buffer-to-line-list buf2))))
    (corelate-sequences buf1-lines buf2-lines 'string-equal)))

(defun line-merge (arg)
  "Merge two buffers on a line by line basis.  Result in *Merge*"
  (interactive "p")
  (let* ((source-buffer (read-buffer "Merge buffer?" (current-buffer) t))
	 (dest-buffer (read-buffer (concat "Merge "
					   source-buffer
					   " with buffer?")
				   (current-buffer)
				   t))
	 (output-buffer (progn (and (get-buffer merge-buffer)
				    (kill-buffer merge-buffer))
			       (get-buffer-create merge-buffer)))
	 (buf1-contents-message (concat contents-message
					source-buffer
					contents-message-cont))
	 (buf2-contents-message (concat contents-message
					dest-buffer
					contents-message-cont)))
      (merge-buffers source-buffer dest-buffer output-buffer)
      ;; Check to see that all the lines from the source and the destination
      ;; are in the merged buffer.
      (if t
	  (progn
	    (pop-to-buffer output-buffer)
	    (beginning-of-buffer))
	  (error "Line Merge failed, output buffer is missing lines."))))

(defun goto-next-merge-conflict ()
  "Put point at the beginning of the next merge conflict."
  (interactive)
  (let ((start (point)))
    (if (not (string-equal
	       (buffer-substring start (progn (next-line 1) (point)))
	       merge-conflict-message))
	(goto-char start))
    (search-forward merge-conflict-message)
    (previous-line 1)))

(defun at-merge-conflict ()
  (save-excursion
  (let ((start (point)))
    (if (not (string-equal
	       (buffer-substring start (progn (next-line 1) (point)))
	       merge-conflict-message))
      (error "Not at a merge conflict")))))

(defun choose-first-merge-conflict ()
  "Delete merge conflict messages and text from second buffer."
  (interactive)
  (at-merge-conflict)
  (let ((start (point)))
    (search-forward contents-message)
    (end-of-line)
    (forward-char)
    (kill-region start (point)))
  (search-forward contents-message)
  (beginning-of-line)
  (let ((start (point)))
    (search-forward end-of-conflict-message)
    (kill-region start (point))))

(defun choose-second-merge-conflict ()
  "Delete merge conflict messages and text from first buffer."
  (interactive)
  (at-merge-conflict)
  (let ((start (point)))
    (search-forward contents-message)
    (search-forward contents-message)
    (end-of-line)
    (forward-char)
    (kill-region start (point)))
  (search-forward end-of-conflict-message)
  (previous-line 1)
  (kill-line)
  (kill-line)
  )

(defun choose-both-merge-conflicts ()
  "Delete merge conflict messages and leave text from both buffers."
  (interactive)
  (at-merge-conflict)
  (let ((start (point)))
    (search-forward contents-message)
    (end-of-line)
    (forward-char)
    (kill-region start (point)))
  (search-forward contents-message)
  (beginning-of-line)
  (kill-line)
  (kill-line)
  (search-forward end-of-conflict-message)
  (previous-line 1)
  (kill-line)
  (kill-line)
  )
	
(defun verify-merge (buf1 buf2 output)
  ;; Simple-minded verify, buf1 and buf2 are subsequences of the output.
  ;; This means that the output has at least all the lines in buf1 and buf2
  ;; in the correct order, but not that the merge conflicts are correctly
  ;; reported.
  (let ((olines (buffer-to-line-list output)))
    (and
      (bind-subsequence (buffer-to-line-list buf1)
			 olines
			 '(lambda () (isubsequence-p 0 0 'string-equal)))
      (bind-subsequence (buffer-to-line-list buf2)
			 olines
			 '(lambda () (isubsequence-p 0 0 'string-equal))))))
