gopls/go/ssa/lift.go

1	// Copyright 2013 The Go Authors. All rights reserved.
2	// Use of this source code is governed by a BSD-style
3	// license that can be found in the LICENSE file.
4
5	package ssa
6
7	// This file defines the lifting pass which tries to "lift" Alloc
8	// cells (new/local variables) into SSA registers, replacing loads
9	// with the dominating stored value, eliminating loads and stores, and
10	// inserting φ-nodes as needed.
11
12	// Cited papers and resources:
13	//
14	// Ron Cytron et al. 1991. Efficiently computing SSA form...
15	// http://doi.acm.org/10.1145/115372.115320
16	//
17	// Cooper, Harvey, Kennedy. 2001. A Simple, Fast Dominance Algorithm.
18	// Software Practice and Experience 2001, 4:1-10.
19	// http://www.hipersoft.rice.edu/grads/publications/dom14.pdf
20	//
21	// Daniel Berlin, llvmdev mailing list, 2012.
22	// http://lists.cs.uiuc.edu/pipermail/llvmdev/2012-January/046638.html
23	// (Be sure to expand the whole thread.)
24
25	// TODO(adonovan): opt: there are many optimizations worth evaluating, and
26	// the conventional wisdom for SSA construction is that a simple
27	// algorithm well engineered often beats those of better asymptotic
28	// complexity on all but the most egregious inputs.
29	//
30	// Danny Berlin suggests that the Cooper et al. algorithm for
31	// computing the dominance frontier is superior to Cytron et al.
32	// Furthermore he recommends that rather than computing the DF for the
33	// whole function then renaming all alloc cells, it may be cheaper to
34	// compute the DF for each alloc cell separately and throw it away.
35	//
36	// Consider exploiting liveness information to avoid creating dead
37	// φ-nodes which we then immediately remove.
38	//
39	// Also see many other "TODO: opt" suggestions in the code.
40
41	import (
42	"fmt"
43	"go/token"
44	"go/types"
45	"math/big"
46	"os"
47
48	"golang.org/x/tools/internal/typeparams"
49	)
50
51	// If true, show diagnostic information at each step of lifting.
52	// Very verbose.
53	const debugLifting = false
54
55	// domFrontier maps each block to the set of blocks in its dominance
56	// frontier. The outer slice is conceptually a map keyed by
57	// Block.Index. The inner slice is conceptually a set, possibly
58	// containing duplicates.
59	//
60	// TODO(adonovan): opt: measure impact of dups; consider a packed bit
61	// representation, e.g. big.Int, and bitwise parallel operations for
62	// the union step in the Children loop.
63	//
64	// domFrontier's methods mutate the slice's elements but not its
65	// length, so their receivers needn't be pointers.
66	type domFrontier [][]*BasicBlock
67
68	func (df domFrontier) add(u, v *BasicBlock) {
69	p := &df[u.Index]
70	p = append(p, v)
71	}
72
73	// build builds the dominance frontier df for the dominator (sub)tree
74	// rooted at u, using the Cytron et al. algorithm.
75	//
76	// TODO(adonovan): opt: consider Berlin approach, computing pruned SSA
77	// by pruning the entire IDF computation, rather than merely pruning
78	// the DF -> IDF step.
79	func (df domFrontier) build(u *BasicBlock) {
80	// Encounter each node u in postorder of dom tree.
81	for _, child := range u.dom.children {
82	df.build(child)
83	}
84	for _, vb := range u.Succs {
85	if v := vb.dom; v.idom != u {
86	df.add(u, vb)
87	}
88	}
89	for _, w := range u.dom.children {
90	for _, vb := range df[w.Index] {
91	// TODO(adonovan): opt: use word-parallel bitwise union.
92	if v := vb.dom; v.idom != u {
93	df.add(u, vb)
94	}
95	}
96	}
97	}
98
99	func buildDomFrontier(fn *Function) domFrontier {
100	df := make(domFrontier, len(fn.Blocks))
101	df.build(fn.Blocks[0])
102	if fn.Recover != nil {
103	df.build(fn.Recover)
104	}
105	return df
106	}
107
108	func removeInstr(refs []Instruction, instr Instruction) []Instruction {
109	i := 0
110	for _, ref := range refs {
111	if ref == instr {
112	continue
113	}
114	refs[i] = ref
115	i++
116	}
117	for j := i; j != len(refs); j++ {
118	refs[j] = nil // aid GC
119	}
120	return refs[:i]
121	}
122
123	// lift replaces local and new Allocs accessed only with
124	// load/store by SSA registers, inserting φ-nodes where necessary.
125	// The result is a program in classical pruned SSA form.
126	//
127	// Preconditions:
128	// - fn has no dead blocks (blockopt has run).
129	// - Def/use info (Operands and Referrers) is up-to-date.
130	// - The dominator tree is up-to-date.
131	func lift(fn *Function) {
132	// TODO(adonovan): opt: lots of little optimizations may be
133	// worthwhile here, especially if they cause us to avoid
134	// buildDomFrontier. For example:
135	//
136	// - Alloc never loaded? Eliminate.
137	// - Alloc never stored? Replace all loads with a zero constant.
138	// - Alloc stored once? Replace loads with dominating store;
139	// don't forget that an Alloc is itself an effective store
140	// of zero.
141	// - Alloc used only within a single block?
142	// Use degenerate algorithm avoiding φ-nodes.
143	// - Consider synergy with scalar replacement of aggregates (SRA).
144	// e.g. *(&x.f) where x is an Alloc.
145	// Perhaps we'd get better results if we generated this as x.f
146	// i.e. Field(x, .f) instead of Load(FieldIndex(x, .f)).
147	// Unclear.
148	//
149	// But we will start with the simplest correct code.
150	df := buildDomFrontier(fn)
151
152	if debugLifting {
153	title := false
154	for i, blocks := range df {
155	if blocks != nil {
156	if !title {
157	fmt.Fprintf(os.Stderr, "Dominance frontier of %s:\n", fn)
158	title = true
159	}
160	fmt.Fprintf(os.Stderr, "\t%s: %s\n", fn.Blocks[i], blocks)
161	}
162	}
163	}
164
165	newPhis := make(newPhiMap)
166
167	// During this pass we will replace some BasicBlock.Instrs
168	// (allocs, loads and stores) with nil, keeping a count in
169	// BasicBlock.gaps. At the end we will reset Instrs to the
170	// concatenation of all non-dead newPhis and non-nil Instrs
171	// for the block, reusing the original array if space permits.
172
173	// While we're here, we also eliminate 'rundefers'
174	// instructions in functions that contain no 'defer'
175	// instructions.
176	usesDefer := false
177
178	// A counter used to generate ~unique ids for Phi nodes, as an
179	// aid to debugging. We use large numbers to make them highly
180	// visible. All nodes are renumbered later.
181	fresh := 1000
182
183	// Determine which allocs we can lift and number them densely.
184	// The renaming phase uses this numbering for compact maps.
185	numAllocs := 0
186	for _, b := range fn.Blocks {
187	b.gaps = 0
188	b.rundefers = 0
189	for _, instr := range b.Instrs {
190	switch instr := instr.(type) {
191	case *Alloc:
192	index := -1
193	if liftAlloc(df, instr, newPhis, &fresh) {
194	index = numAllocs
195	numAllocs++
196	}
197	instr.index = index
198	case *Defer:
199	usesDefer = true
200	case *RunDefers:
201	b.rundefers++
202	}
203	}
204	}
205
206	// renaming maps an alloc (keyed by index) to its replacement
207	// value. Initially the renaming contains nil, signifying the
208	// zero constant of the appropriate type; we construct the
209	// Const lazily at most once on each path through the domtree.
210	// TODO(adonovan): opt: cache per-function not per subtree.
211	renaming := make([]Value, numAllocs)
212
213	// Renaming.
214	rename(fn.Blocks[0], renaming, newPhis)
215
216	// Eliminate dead φ-nodes.
217	removeDeadPhis(fn.Blocks, newPhis)
218
219	// Prepend remaining live φ-nodes to each block.
220	for _, b := range fn.Blocks {
221	nps := newPhis[b]
222	j := len(nps)
223
224	rundefersToKill := b.rundefers
225	if usesDefer {
226	rundefersToKill = 0
227	}
228
229	if j+b.gaps+rundefersToKill == 0 {
230	continue // fast path: no new phis or gaps
231	}
232
233	// Compact nps + non-nil Instrs into a new slice.
234	// TODO(adonovan): opt: compact in situ (rightwards)
235	// if Instrs has sufficient space or slack.
236	dst := make([]Instruction, len(b.Instrs)+j-b.gaps-rundefersToKill)
237	for i, np := range nps {
238	dst[i] = np.phi
239	}
240	for _, instr := range b.Instrs {
241	if instr == nil {
242	continue
243	}
244	if !usesDefer {
245	if _, ok := instr.(*RunDefers); ok {
246	continue
247	}
248	}
249	dst[j] = instr
250	j++
251	}
252	b.Instrs = dst
253	}
254
255	// Remove any fn.Locals that were lifted.
256	j := 0
257	for _, l := range fn.Locals {
258	if l.index < 0 {
259	fn.Locals[j] = l
260	j++
261	}
262	}
263	// Nil out fn.Locals[j:] to aid GC.
264	for i := j; i < len(fn.Locals); i++ {
265	fn.Locals[i] = nil
266	}
267	fn.Locals = fn.Locals[:j]
268	}
269
270	// removeDeadPhis removes φ-nodes not transitively needed by a
271	// non-Phi, non-DebugRef instruction.
272	func removeDeadPhis(blocks []*BasicBlock, newPhis newPhiMap) {
273	// First pass: find the set of "live" φ-nodes: those reachable
274	// from some non-Phi instruction.
275	//
276	// We compute reachability in reverse, starting from each φ,
277	// rather than forwards, starting from each live non-Phi
278	// instruction, because this way visits much less of the
279	// Value graph.
280	livePhis := make(map[*Phi]bool)
281	for _, npList := range newPhis {
282	for _, np := range npList {
283	phi := np.phi
284	if !livePhis[phi] && phiHasDirectReferrer(phi) {
285	markLivePhi(livePhis, phi)
286	}
287	}
288	}
289
290	// Existing φ-nodes due to && and \|\| operators
291	// are all considered live (see Go issue 19622).
292	for _, b := range blocks {
293	for _, phi := range b.phis() {
294	markLivePhi(livePhis, phi.(*Phi))
295	}
296	}
297
298	// Second pass: eliminate unused phis from newPhis.
299	for block, npList := range newPhis {
300	j := 0
301	for _, np := range npList {
302	if livePhis[np.phi] {
303	npList[j] = np
304	j++
305	} else {
306	// discard it, first removing it from referrers
307	for _, val := range np.phi.Edges {
308	if refs := val.Referrers(); refs != nil {
309	refs = removeInstr(refs, np.phi)
310	}
311	}
312	np.phi.block = nil
313	}
314	}
315	newPhis[block] = npList[:j]
316	}
317	}
318
319	// markLivePhi marks phi, and all φ-nodes transitively reachable via
320	// its Operands, live.
321	func markLivePhi(livePhis map[Phi]bool, phi Phi) {
322	livePhis[phi] = true
323	for _, rand := range phi.Operands(nil) {
324	if q, ok := (rand).(Phi); ok {
325	if !livePhis[q] {
326	markLivePhi(livePhis, q)
327	}
328	}
329	}
330	}
331
332	// phiHasDirectReferrer reports whether phi is directly referred to by
333	// a non-Phi instruction. Such instructions are the
334	// roots of the liveness traversal.
335	func phiHasDirectReferrer(phi *Phi) bool {
336	for _, instr := range *phi.Referrers() {
337	if _, ok := instr.(*Phi); !ok {
338	return true
339	}
340	}
341	return false
342	}
343
344	type blockSet struct{ big.Int } // (inherit methods from Int)
345
346	// add adds b to the set and returns true if the set changed.
347	func (s blockSet) add(b BasicBlock) bool {
348	i := b.Index
349	if s.Bit(i) != 0 {
350	return false
351	}
352	s.SetBit(&s.Int, i, 1)
353	return true
354	}
355
356	// take removes an arbitrary element from a set s and
357	// returns its index, or returns -1 if empty.
358	func (s *blockSet) take() int {
359	l := s.BitLen()
360	for i := 0; i < l; i++ {
361	if s.Bit(i) == 1 {
362	s.SetBit(&s.Int, i, 0)
363	return i
364	}
365	}
366	return -1
367	}
368
369	// newPhi is a pair of a newly introduced φ-node and the lifted Alloc
370	// it replaces.
371	type newPhi struct {
372	phi *Phi
373	alloc *Alloc
374	}
375
376	// newPhiMap records for each basic block, the set of newPhis that
377	// must be prepended to the block.
378	type newPhiMap map[*BasicBlock][]newPhi
379
380	// liftAlloc determines whether alloc can be lifted into registers,
381	// and if so, it populates newPhis with all the φ-nodes it may require
382	// and returns true.
383	//
384	// fresh is a source of fresh ids for phi nodes.
385	func liftAlloc(df domFrontier, alloc Alloc, newPhis newPhiMap, fresh int) bool {
386	// TODO(taking): zero constants of aggregated types can now be lifted.
387	switch deref(alloc.Type()).Underlying().(type) {
388	case types.Array, types.Struct, *typeparams.TypeParam:
389	return false
390	}
391
392	// Don't lift named return values in functions that defer
393	// calls that may recover from panic.
394	if fn := alloc.Parent(); fn.Recover != nil {
395	for _, nr := range fn.namedResults {
396	if nr == alloc {
397	return false
398	}
399	}
400	}
401
402	// Compute defblocks, the set of blocks containing a
403	// definition of the alloc cell.
404	var defblocks blockSet
405	for _, instr := range *alloc.Referrers() {
406	// Bail out if we discover the alloc is not liftable;
407	// the only operations permitted to use the alloc are
408	// loads/stores into the cell, and DebugRef.
409	switch instr := instr.(type) {
410	case *Store:
411	if instr.Val == alloc {
412	return false // address used as value
413	}
414	if instr.Addr != alloc {
415	panic("Alloc.Referrers is inconsistent")
416	}
417	defblocks.add(instr.Block())
418	case *UnOp:
419	if instr.Op != token.MUL {
420	return false // not a load
421	}
422	if instr.X != alloc {
423	panic("Alloc.Referrers is inconsistent")
424	}
425	case *DebugRef:
426	// ok
427	default:
428	return false // some other instruction
429	}
430	}
431	// The Alloc itself counts as a (zero) definition of the cell.
432	defblocks.add(alloc.Block())
433
434	if debugLifting {
435	fmt.Fprintln(os.Stderr, "\tlifting ", alloc, alloc.Name())
436	}
437
438	fn := alloc.Parent()
439
440	// Φ-insertion.
441	//
442	// What follows is the body of the main loop of the insert-φ
443	// function described by Cytron et al, but instead of using
444	// counter tricks, we just reset the 'hasAlready' and 'work'
445	// sets each iteration. These are bitmaps so it's pretty cheap.
446	//
447	// TODO(adonovan): opt: recycle slice storage for W,
448	// hasAlready, defBlocks across liftAlloc calls.
449	var hasAlready blockSet
450
451	// Initialize W and work to defblocks.
452	var work blockSet = defblocks // blocks seen
453	var W blockSet // blocks to do
454	W.Set(&defblocks.Int)
455
456	// Traverse iterated dominance frontier, inserting φ-nodes.
457	for i := W.take(); i != -1; i = W.take() {
458	u := fn.Blocks[i]
459	for _, v := range df[u.Index] {
460	if hasAlready.add(v) {
461	// Create φ-node.
462	// It will be prepended to v.Instrs later, if needed.
463	phi := &Phi{
464	Edges: make([]Value, len(v.Preds)),
465	Comment: alloc.Comment,
466	}
467	// This is merely a debugging aid:
468	phi.setNum(*fresh)
469	*fresh++
470
471	phi.pos = alloc.Pos()
472	phi.setType(deref(alloc.Type()))
473	phi.block = v
474	if debugLifting {
475	fmt.Fprintf(os.Stderr, "\tplace %s = %s at block %s\n", phi.Name(), phi, v)
476	}
477	newPhis[v] = append(newPhis[v], newPhi{phi, alloc})
478
479	if work.add(v) {
480	W.add(v)
481	}
482	}
483	}
484	}
485
486	return true
487	}
488
489	// replaceAll replaces all intraprocedural uses of x with y,
490	// updating x.Referrers and y.Referrers.
491	// Precondition: x.Referrers() != nil, i.e. x must be local to some function.
492	func replaceAll(x, y Value) {
493	var rands []*Value
494	pxrefs := x.Referrers()
495	pyrefs := y.Referrers()
496	for _, instr := range *pxrefs {
497	rands = instr.Operands(rands[:0]) // recycle storage
498	for _, rand := range rands {
499	if *rand != nil {
500	if *rand == x {
501	*rand = y
502	}
503	}
504	}
505	if pyrefs != nil {
506	pyrefs = append(pyrefs, instr) // dups ok
507	}
508	}
509	*pxrefs = nil // x is now unreferenced
510	}
511
512	// renamed returns the value to which alloc is being renamed,
513	// constructing it lazily if it's the implicit zero initialization.
514	func renamed(renaming []Value, alloc *Alloc) Value {
515	v := renaming[alloc.index]
516	if v == nil {
517	v = zeroConst(deref(alloc.Type()))
518	renaming[alloc.index] = v
519	}
520	return v
521	}
522
523	// rename implements the (Cytron et al) SSA renaming algorithm, a
524	// preorder traversal of the dominator tree replacing all loads of
525	// Alloc cells with the value stored to that cell by the dominating
526	// store instruction. For lifting, we need only consider loads,
527	// stores and φ-nodes.
528	//
529	// renaming is a map from *Alloc (keyed by index number) to its
530	// dominating stored value; newPhis[x] is the set of new φ-nodes to be
531	// prepended to block x.
532	func rename(u *BasicBlock, renaming []Value, newPhis newPhiMap) {
533	// Each φ-node becomes the new name for its associated Alloc.
534	for _, np := range newPhis[u] {
535	phi := np.phi
536	alloc := np.alloc
537	renaming[alloc.index] = phi
538	}
539
540	// Rename loads and stores of allocs.
541	for i, instr := range u.Instrs {
542	switch instr := instr.(type) {
543	case *Alloc:
544	if instr.index >= 0 { // store of zero to Alloc cell
545	// Replace dominated loads by the zero value.
546	renaming[instr.index] = nil
547	if debugLifting {
548	fmt.Fprintf(os.Stderr, "\tkill alloc %s\n", instr)
549	}
550	// Delete the Alloc.
551	u.Instrs[i] = nil
552	u.gaps++
553	}
554
555	case *Store:
556	if alloc, ok := instr.Addr.(*Alloc); ok && alloc.index >= 0 { // store to Alloc cell
557	// Replace dominated loads by the stored value.
558	renaming[alloc.index] = instr.Val
559	if debugLifting {
560	fmt.Fprintf(os.Stderr, "\tkill store %s; new value: %s\n",
561	instr, instr.Val.Name())
562	}
563	// Remove the store from the referrer list of the stored value.
564	if refs := instr.Val.Referrers(); refs != nil {
565	refs = removeInstr(refs, instr)
566	}
567	// Delete the Store.
568	u.Instrs[i] = nil
569	u.gaps++
570	}
571
572	case *UnOp:
573	if instr.Op == token.MUL {
574	if alloc, ok := instr.X.(*Alloc); ok && alloc.index >= 0 { // load of Alloc cell
575	newval := renamed(renaming, alloc)
576	if debugLifting {
577	fmt.Fprintf(os.Stderr, "\tupdate load %s = %s with %s\n",
578	instr.Name(), instr, newval.Name())
579	}
580	// Replace all references to
581	// the loaded value by the
582	// dominating stored value.
583	replaceAll(instr, newval)
584	// Delete the Load.
585	u.Instrs[i] = nil
586	u.gaps++
587	}
588	}
589
590	case *DebugRef:
591	if alloc, ok := instr.X.(*Alloc); ok && alloc.index >= 0 { // ref of Alloc cell
592	if instr.IsAddr {
593	instr.X = renamed(renaming, alloc)
594	instr.IsAddr = false
595
596	// Add DebugRef to instr.X's referrers.
597	if refs := instr.X.Referrers(); refs != nil {
598	refs = append(refs, instr)
599	}
600	} else {
601	// A source expression denotes the address
602	// of an Alloc that was optimized away.
603	instr.X = nil
604
605	// Delete the DebugRef.
606	u.Instrs[i] = nil
607	u.gaps++
608	}
609	}
610	}
611	}
612
613	// For each φ-node in a CFG successor, rename the edge.
614	for _, v := range u.Succs {
615	phis := newPhis[v]
616	if len(phis) == 0 {
617	continue
618	}
619	i := v.predIndex(u)
620	for _, np := range phis {
621	phi := np.phi
622	alloc := np.alloc
623	newval := renamed(renaming, alloc)
624	if debugLifting {
625	fmt.Fprintf(os.Stderr, "\tsetphi %s edge %s -> %s (#%d) (alloc=%s) := %s\n",
626	phi.Name(), u, v, i, alloc.Name(), newval.Name())
627	}
628	phi.Edges[i] = newval
629	if prefs := newval.Referrers(); prefs != nil {
630	prefs = append(prefs, phi)
631	}
632	}
633	}
634
635	// Continue depth-first recursion over domtree, pushing a
636	// fresh copy of the renaming map for each subtree.
637	for i, v := range u.dom.children {
638	r := renaming
639	if i < len(u.dom.children)-1 {
640	// On all but the final iteration, we must make
641	// a copy to avoid destructive update.
642	r = make([]Value, len(renaming))
643	copy(r, renaming)
644	}
645	rename(v, r, newPhis)
646	}
647
648	}
649