gopls/internal/diff/lcs/old.go

1	// Copyright 2022 The Go Authors. All rights reserved.
2	// Use of this source code is governed by a BSD-style
3	// license that can be found in the LICENSE file.
4
5	package lcs
6
7	// TODO(adonovan): remove unclear references to "old" in this package.
8
9	import (
10	"fmt"
11	)
12
13	// A Diff is a replacement of a portion of A by a portion of B.
14	type Diff struct {
15	Start, End int // offsets of portion to delete in A
16	ReplStart, ReplEnd int // offset of replacement text in B
17	}
18
19	// DiffStrings returns the differences between two strings.
20	// It does not respect rune boundaries.
21	func DiffStrings(a, b string) []Diff { return diff(stringSeqs{a, b}) }
22
23	// DiffBytes returns the differences between two byte sequences.
24	// It does not respect rune boundaries.
25	func DiffBytes(a, b []byte) []Diff { return diff(bytesSeqs{a, b}) }
26
27	// DiffRunes returns the differences between two rune sequences.
28	func DiffRunes(a, b []rune) []Diff { return diff(runesSeqs{a, b}) }
29
30	func diff(seqs sequences) []Diff {
31	// A limit on how deeply the LCS algorithm should search. The value is just a guess.
32	const maxDiffs = 30
33	diff, _ := compute(seqs, twosided, maxDiffs/2)
34	return diff
35	}
36
37	// compute computes the list of differences between two sequences,
38	// along with the LCS. It is exercised directly by tests.
39	// The algorithm is one of {forward, backward, twosided}.
40	func compute(seqs sequences, algo func(*editGraph) lcs, limit int) ([]Diff, lcs) {
41	if limit <= 0 {
42	limit = 1 << 25 // effectively infinity
43	}
44	alen, blen := seqs.lengths()
45	g := &editGraph{
46	seqs: seqs,
47	vf: newtriang(limit),
48	vb: newtriang(limit),
49	limit: limit,
50	ux: alen,
51	uy: blen,
52	delta: alen - blen,
53	}
54	lcs := algo(g)
55	diffs := lcs.toDiffs(alen, blen)
56	return diffs, lcs
57	}
58
59	// editGraph carries the information for computing the lcs of two sequences.
60	type editGraph struct {
61	seqs sequences
62	vf, vb label // forward and backward labels
63
64	limit int // maximal value of D
65	// the bounding rectangle of the current edit graph
66	lx, ly, ux, uy int
67	delta int // common subexpression: (ux-lx)-(uy-ly)
68	}
69
70	// toDiffs converts an LCS to a list of edits.
71	func (lcs lcs) toDiffs(alen, blen int) []Diff {
72	var diffs []Diff
73	var pa, pb int // offsets in a, b
74	for _, l := range lcs {
75	if pa < l.X \|\| pb < l.Y {
76	diffs = append(diffs, Diff{pa, l.X, pb, l.Y})
77	}
78	pa = l.X + l.Len
79	pb = l.Y + l.Len
80	}
81	if pa < alen \|\| pb < blen {
82	diffs = append(diffs, Diff{pa, alen, pb, blen})
83	}
84	return diffs
85	}
86
87	// --- FORWARD ---
88
89	// fdone decides if the forwward path has reached the upper right
90	// corner of the rectangle. If so, it also returns the computed lcs.
91	func (e *editGraph) fdone(D, k int) (bool, lcs) {
92	// x, y, k are relative to the rectangle
93	x := e.vf.get(D, k)
94	y := x - k
95	if x == e.ux && y == e.uy {
96	return true, e.forwardlcs(D, k)
97	}
98	return false, nil
99	}
100
101	// run the forward algorithm, until success or up to the limit on D.
102	func forward(e *editGraph) lcs {
103	e.setForward(0, 0, e.lx)
104	if ok, ans := e.fdone(0, 0); ok {
105	return ans
106	}
107	// from D to D+1
108	for D := 0; D < e.limit; D++ {
109	e.setForward(D+1, -(D + 1), e.getForward(D, -D))
110	if ok, ans := e.fdone(D+1, -(D + 1)); ok {
111	return ans
112	}
113	e.setForward(D+1, D+1, e.getForward(D, D)+1)
114	if ok, ans := e.fdone(D+1, D+1); ok {
115	return ans
116	}
117	for k := -D + 1; k <= D-1; k += 2 {
118	// these are tricky and easy to get backwards
119	lookv := e.lookForward(k, e.getForward(D, k-1)+1)
120	lookh := e.lookForward(k, e.getForward(D, k+1))
121	if lookv > lookh {
122	e.setForward(D+1, k, lookv)
123	} else {
124	e.setForward(D+1, k, lookh)
125	}
126	if ok, ans := e.fdone(D+1, k); ok {
127	return ans
128	}
129	}
130	}
131	// D is too large
132	// find the D path with maximal x+y inside the rectangle and
133	// use that to compute the found part of the lcs
134	kmax := -e.limit - 1
135	diagmax := -1
136	for k := -e.limit; k <= e.limit; k += 2 {
137	x := e.getForward(e.limit, k)
138	y := x - k
139	if x+y > diagmax && x <= e.ux && y <= e.uy {
140	diagmax, kmax = x+y, k
141	}
142	}
143	return e.forwardlcs(e.limit, kmax)
144	}
145
146	// recover the lcs by backtracking from the farthest point reached
147	func (e *editGraph) forwardlcs(D, k int) lcs {
148	var ans lcs
149	for x := e.getForward(D, k); x != 0 \|\| x-k != 0; {
150	if ok(D-1, k-1) && x-1 == e.getForward(D-1, k-1) {
151	// if (x-1,y) is labelled D-1, x--,D--,k--,continue
152	D, k, x = D-1, k-1, x-1
153	continue
154	} else if ok(D-1, k+1) && x == e.getForward(D-1, k+1) {
155	// if (x,y-1) is labelled D-1, x, D--,k++, continue
156	D, k = D-1, k+1
157	continue
158	}
159	// if (x-1,y-1)--(x,y) is a diagonal, prepend,x--,y--, continue
160	y := x - k
161	ans = ans.prepend(x+e.lx-1, y+e.ly-1)
162	x--
163	}
164	return ans
165	}
166
167	// start at (x,y), go up the diagonal as far as possible,
168	// and label the result with d
169	func (e *editGraph) lookForward(k, relx int) int {
170	rely := relx - k
171	x, y := relx+e.lx, rely+e.ly
172	if x < e.ux && y < e.uy {
173	x += e.seqs.commonPrefixLen(x, e.ux, y, e.uy)
174	}
175	return x
176	}
177
178	func (e *editGraph) setForward(d, k, relx int) {
179	x := e.lookForward(k, relx)
180	e.vf.set(d, k, x-e.lx)
181	}
182
183	func (e *editGraph) getForward(d, k int) int {
184	x := e.vf.get(d, k)
185	return x
186	}
187
188	// --- BACKWARD ---
189
190	// bdone decides if the backward path has reached the lower left corner
191	func (e *editGraph) bdone(D, k int) (bool, lcs) {
192	// x, y, k are relative to the rectangle
193	x := e.vb.get(D, k)
194	y := x - (k + e.delta)
195	if x == 0 && y == 0 {
196	return true, e.backwardlcs(D, k)
197	}
198	return false, nil
199	}
200
201	// run the backward algorithm, until success or up to the limit on D.
202	func backward(e *editGraph) lcs {
203	e.setBackward(0, 0, e.ux)
204	if ok, ans := e.bdone(0, 0); ok {
205	return ans
206	}
207	// from D to D+1
208	for D := 0; D < e.limit; D++ {
209	e.setBackward(D+1, -(D + 1), e.getBackward(D, -D)-1)
210	if ok, ans := e.bdone(D+1, -(D + 1)); ok {
211	return ans
212	}
213	e.setBackward(D+1, D+1, e.getBackward(D, D))
214	if ok, ans := e.bdone(D+1, D+1); ok {
215	return ans
216	}
217	for k := -D + 1; k <= D-1; k += 2 {
218	// these are tricky and easy to get wrong
219	lookv := e.lookBackward(k, e.getBackward(D, k-1))
220	lookh := e.lookBackward(k, e.getBackward(D, k+1)-1)
221	if lookv < lookh {
222	e.setBackward(D+1, k, lookv)
223	} else {
224	e.setBackward(D+1, k, lookh)
225	}
226	if ok, ans := e.bdone(D+1, k); ok {
227	return ans
228	}
229	}
230	}
231
232	// D is too large
233	// find the D path with minimal x+y inside the rectangle and
234	// use that to compute the part of the lcs found
235	kmax := -e.limit - 1
236	diagmin := 1 << 25
237	for k := -e.limit; k <= e.limit; k += 2 {
238	x := e.getBackward(e.limit, k)
239	y := x - (k + e.delta)
240	if x+y < diagmin && x >= 0 && y >= 0 {
241	diagmin, kmax = x+y, k
242	}
243	}
244	if kmax < -e.limit {
245	panic(fmt.Sprintf("no paths when limit=%d?", e.limit))
246	}
247	return e.backwardlcs(e.limit, kmax)
248	}
249
250	// recover the lcs by backtracking
251	func (e *editGraph) backwardlcs(D, k int) lcs {
252	var ans lcs
253	for x := e.getBackward(D, k); x != e.ux \|\| x-(k+e.delta) != e.uy; {
254	if ok(D-1, k-1) && x == e.getBackward(D-1, k-1) {
255	// D--, k--, x unchanged
256	D, k = D-1, k-1
257	continue
258	} else if ok(D-1, k+1) && x+1 == e.getBackward(D-1, k+1) {
259	// D--, k++, x++
260	D, k, x = D-1, k+1, x+1
261	continue
262	}
263	y := x - (k + e.delta)
264	ans = ans.append(x+e.lx, y+e.ly)
265	x++
266	}
267	return ans
268	}
269
270	// start at (x,y), go down the diagonal as far as possible,
271	func (e *editGraph) lookBackward(k, relx int) int {
272	rely := relx - (k + e.delta) // forward k = k + e.delta
273	x, y := relx+e.lx, rely+e.ly
274	if x > 0 && y > 0 {
275	x -= e.seqs.commonSuffixLen(0, x, 0, y)
276	}
277	return x
278	}
279
280	// convert to rectangle, and label the result with d
281	func (e *editGraph) setBackward(d, k, relx int) {
282	x := e.lookBackward(k, relx)
283	e.vb.set(d, k, x-e.lx)
284	}
285
286	func (e *editGraph) getBackward(d, k int) int {
287	x := e.vb.get(d, k)
288	return x
289	}
290
291	// -- TWOSIDED ---
292
293	func twosided(e *editGraph) lcs {
294	// The termination condition could be improved, as either the forward
295	// or backward pass could succeed before Myers' Lemma applies.
296	// Aside from questions of efficiency (is the extra testing cost-effective)
297	// this is more likely to matter when e.limit is reached.
298	e.setForward(0, 0, e.lx)
299	e.setBackward(0, 0, e.ux)
300
301	// from D to D+1
302	for D := 0; D < e.limit; D++ {
303	// just finished a backwards pass, so check
304	if got, ok := e.twoDone(D, D); ok {
305	return e.twolcs(D, D, got)
306	}
307	// do a forwards pass (D to D+1)
308	e.setForward(D+1, -(D + 1), e.getForward(D, -D))
309	e.setForward(D+1, D+1, e.getForward(D, D)+1)
310	for k := -D + 1; k <= D-1; k += 2 {
311	// these are tricky and easy to get backwards
312	lookv := e.lookForward(k, e.getForward(D, k-1)+1)
313	lookh := e.lookForward(k, e.getForward(D, k+1))
314	if lookv > lookh {
315	e.setForward(D+1, k, lookv)
316	} else {
317	e.setForward(D+1, k, lookh)
318	}
319	}
320	// just did a forward pass, so check
321	if got, ok := e.twoDone(D+1, D); ok {
322	return e.twolcs(D+1, D, got)
323	}
324	// do a backward pass, D to D+1
325	e.setBackward(D+1, -(D + 1), e.getBackward(D, -D)-1)
326	e.setBackward(D+1, D+1, e.getBackward(D, D))
327	for k := -D + 1; k <= D-1; k += 2 {
328	// these are tricky and easy to get wrong
329	lookv := e.lookBackward(k, e.getBackward(D, k-1))
330	lookh := e.lookBackward(k, e.getBackward(D, k+1)-1)
331	if lookv < lookh {
332	e.setBackward(D+1, k, lookv)
333	} else {
334	e.setBackward(D+1, k, lookh)
335	}
336	}
337	}
338
339	// D too large. combine a forward and backward partial lcs
340	// first, a forward one
341	kmax := -e.limit - 1
342	diagmax := -1
343	for k := -e.limit; k <= e.limit; k += 2 {
344	x := e.getForward(e.limit, k)
345	y := x - k
346	if x+y > diagmax && x <= e.ux && y <= e.uy {
347	diagmax, kmax = x+y, k
348	}
349	}
350	if kmax < -e.limit {
351	panic(fmt.Sprintf("no forward paths when limit=%d?", e.limit))
352	}
353	lcs := e.forwardlcs(e.limit, kmax)
354	// now a backward one
355	// find the D path with minimal x+y inside the rectangle and
356	// use that to compute the lcs
357	diagmin := 1 << 25 // infinity
358	for k := -e.limit; k <= e.limit; k += 2 {
359	x := e.getBackward(e.limit, k)
360	y := x - (k + e.delta)
361	if x+y < diagmin && x >= 0 && y >= 0 {
362	diagmin, kmax = x+y, k
363	}
364	}
365	if kmax < -e.limit {
366	panic(fmt.Sprintf("no backward paths when limit=%d?", e.limit))
367	}
368	lcs = append(lcs, e.backwardlcs(e.limit, kmax)...)
369	// These may overlap (e.forwardlcs and e.backwardlcs return sorted lcs)
370	ans := lcs.fix()
371	return ans
372	}
373
374	// Does Myers' Lemma apply?
375	func (e *editGraph) twoDone(df, db int) (int, bool) {
376	if (df+db+e.delta)%2 != 0 {
377	return 0, false // diagonals cannot overlap
378	}
379	kmin := -db + e.delta
380	if -df > kmin {
381	kmin = -df
382	}
383	kmax := db + e.delta
384	if df < kmax {
385	kmax = df
386	}
387	for k := kmin; k <= kmax; k += 2 {
388	x := e.vf.get(df, k)
389	u := e.vb.get(db, k-e.delta)
390	if u <= x {
391	// is it worth looking at all the other k?
392	for l := k; l <= kmax; l += 2 {
393	x := e.vf.get(df, l)
394	y := x - l
395	u := e.vb.get(db, l-e.delta)
396	v := u - l
397	if x == u \|\| u == 0 \|\| v == 0 \|\| y == e.uy \|\| x == e.ux {
398	return l, true
399	}
400	}
401	return k, true
402	}
403	}
404	return 0, false
405	}
406
407	func (e *editGraph) twolcs(df, db, kf int) lcs {
408	// db==df \|\| db+1==df
409	x := e.vf.get(df, kf)
410	y := x - kf
411	kb := kf - e.delta
412	u := e.vb.get(db, kb)
413	v := u - kf
414
415	// Myers proved there is a df-path from (0,0) to (u,v)
416	// and a db-path from (x,y) to (N,M).
417	// In the first case the overall path is the forward path
418	// to (u,v) followed by the backward path to (N,M).
419	// In the second case the path is the backward path to (x,y)
420	// followed by the forward path to (x,y) from (0,0).
421
422	// Look for some special cases to avoid computing either of these paths.
423	if x == u {
424	// "babaab" "cccaba"
425	// already patched together
426	lcs := e.forwardlcs(df, kf)
427	lcs = append(lcs, e.backwardlcs(db, kb)...)
428	return lcs.sort()
429	}
430
431	// is (u-1,v) or (u,v-1) labelled df-1?
432	// if so, that forward df-1-path plus a horizontal or vertical edge
433	// is the df-path to (u,v), then plus the db-path to (N,M)
434	if u > 0 && ok(df-1, u-1-v) && e.vf.get(df-1, u-1-v) == u-1 {
435	// "aabbab" "cbcabc"
436	lcs := e.forwardlcs(df-1, u-1-v)
437	lcs = append(lcs, e.backwardlcs(db, kb)...)
438	return lcs.sort()
439	}
440	if v > 0 && ok(df-1, (u-(v-1))) && e.vf.get(df-1, u-(v-1)) == u {
441	// "abaabb" "bcacab"
442	lcs := e.forwardlcs(df-1, u-(v-1))
443	lcs = append(lcs, e.backwardlcs(db, kb)...)
444	return lcs.sort()
445	}
446
447	// The path can't possibly contribute to the lcs because it
448	// is all horizontal or vertical edges
449	if u == 0 \|\| v == 0 \|\| x == e.ux \|\| y == e.uy {
450	// "abaabb" "abaaaa"
451	if u == 0 \|\| v == 0 {
452	return e.backwardlcs(db, kb)
453	}
454	return e.forwardlcs(df, kf)
455	}
456
457	// is (x+1,y) or (x,y+1) labelled db-1?
458	if x+1 <= e.ux && ok(db-1, x+1-y-e.delta) && e.vb.get(db-1, x+1-y-e.delta) == x+1 {
459	// "bababb" "baaabb"
460	lcs := e.backwardlcs(db-1, kb+1)
461	lcs = append(lcs, e.forwardlcs(df, kf)...)
462	return lcs.sort()
463	}
464	if y+1 <= e.uy && ok(db-1, x-(y+1)-e.delta) && e.vb.get(db-1, x-(y+1)-e.delta) == x {
465	// "abbbaa" "cabacc"
466	lcs := e.backwardlcs(db-1, kb-1)
467	lcs = append(lcs, e.forwardlcs(df, kf)...)
468	return lcs.sort()
469	}
470
471	// need to compute another path
472	// "aabbaa" "aacaba"
473	lcs := e.backwardlcs(db, kb)
474	oldx, oldy := e.ux, e.uy
475	e.ux = u
476	e.uy = v
477	lcs = append(lcs, forward(e)...)
478	e.ux, e.uy = oldx, oldy
479	return lcs.sort()
480	}
481