| 1 | // Copyright 2013 The Go Authors. All rights reserved. |
|---|---|
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
| 5 | // This package provides Rapid Type Analysis (RTA) for Go, a fast |
| 6 | // algorithm for call graph construction and discovery of reachable code |
| 7 | // (and hence dead code) and runtime types. The algorithm was first |
| 8 | // described in: |
| 9 | // |
| 10 | // David F. Bacon and Peter F. Sweeney. 1996. |
| 11 | // Fast static analysis of C++ virtual function calls. (OOPSLA '96) |
| 12 | // http://doi.acm.org/10.1145/236337.236371 |
| 13 | // |
| 14 | // The algorithm uses dynamic programming to tabulate the cross-product |
| 15 | // of the set of known "address taken" functions with the set of known |
| 16 | // dynamic calls of the same type. As each new address-taken function |
| 17 | // is discovered, call graph edges are added from each known callsite, |
| 18 | // and as each new call site is discovered, call graph edges are added |
| 19 | // from it to each known address-taken function. |
| 20 | // |
| 21 | // A similar approach is used for dynamic calls via interfaces: it |
| 22 | // tabulates the cross-product of the set of known "runtime types", |
| 23 | // i.e. types that may appear in an interface value, or be derived from |
| 24 | // one via reflection, with the set of known "invoke"-mode dynamic |
| 25 | // calls. As each new "runtime type" is discovered, call edges are |
| 26 | // added from the known call sites, and as each new call site is |
| 27 | // discovered, call graph edges are added to each compatible |
| 28 | // method. |
| 29 | // |
| 30 | // In addition, we must consider all exported methods of any runtime type |
| 31 | // as reachable, since they may be called via reflection. |
| 32 | // |
| 33 | // Each time a newly added call edge causes a new function to become |
| 34 | // reachable, the code of that function is analyzed for more call sites, |
| 35 | // address-taken functions, and runtime types. The process continues |
| 36 | // until a fixed point is achieved. |
| 37 | // |
| 38 | // The resulting call graph is less precise than one produced by pointer |
| 39 | // analysis, but the algorithm is much faster. For example, running the |
| 40 | // cmd/callgraph tool on its own source takes ~2.1s for RTA and ~5.4s |
| 41 | // for points-to analysis. |
| 42 | package rta // import "golang.org/x/tools/go/callgraph/rta" |
| 43 | |
| 44 | // TODO(adonovan): test it by connecting it to the interpreter and |
| 45 | // replacing all "unreachable" functions by a special intrinsic, and |
| 46 | // ensure that that intrinsic is never called. |
| 47 | |
| 48 | // TODO(zpavlinovic): decide if the clients must use ssa.InstantiateGenerics |
| 49 | // mode when building programs with generics. It might be possible to |
| 50 | // extend rta to accurately support generics with just ssa.BuilderMode(0). |
| 51 | |
| 52 | import ( |
| 53 | "fmt" |
| 54 | "go/types" |
| 55 | |
| 56 | "golang.org/x/tools/go/callgraph" |
| 57 | "golang.org/x/tools/go/ssa" |
| 58 | "golang.org/x/tools/go/types/typeutil" |
| 59 | ) |
| 60 | |
| 61 | // A Result holds the results of Rapid Type Analysis, which includes the |
| 62 | // set of reachable functions/methods, runtime types, and the call graph. |
| 63 | type Result struct { |
| 64 | // CallGraph is the discovered callgraph. |
| 65 | // It does not include edges for calls made via reflection. |
| 66 | CallGraph *callgraph.Graph |
| 67 | |
| 68 | // Reachable contains the set of reachable functions and methods. |
| 69 | // This includes exported methods of runtime types, since |
| 70 | // they may be accessed via reflection. |
| 71 | // The value indicates whether the function is address-taken. |
| 72 | // |
| 73 | // (We wrap the bool in a struct to avoid inadvertent use of |
| 74 | // "if Reachable[f] {" to test for set membership.) |
| 75 | Reachable map[*ssa.Function]struct{ AddrTaken bool } |
| 76 | |
| 77 | // RuntimeTypes contains the set of types that are needed at |
| 78 | // runtime, for interfaces or reflection. |
| 79 | // |
| 80 | // The value indicates whether the type is inaccessible to reflection. |
| 81 | // Consider: |
| 82 | // type A struct{B} |
| 83 | // fmt.Println(new(A)) |
| 84 | // Types *A, A and B are accessible to reflection, but the unnamed |
| 85 | // type struct{B} is not. |
| 86 | RuntimeTypes typeutil.Map |
| 87 | } |
| 88 | |
| 89 | // Working state of the RTA algorithm. |
| 90 | type rta struct { |
| 91 | result *Result |
| 92 | |
| 93 | prog *ssa.Program |
| 94 | |
| 95 | worklist []*ssa.Function // list of functions to visit |
| 96 | |
| 97 | // addrTakenFuncsBySig contains all address-taken *Functions, grouped by signature. |
| 98 | // Keys are *types.Signature, values are map[*ssa.Function]bool sets. |
| 99 | addrTakenFuncsBySig typeutil.Map |
| 100 | |
| 101 | // dynCallSites contains all dynamic "call"-mode call sites, grouped by signature. |
| 102 | // Keys are *types.Signature, values are unordered []ssa.CallInstruction. |
| 103 | dynCallSites typeutil.Map |
| 104 | |
| 105 | // invokeSites contains all "invoke"-mode call sites, grouped by interface. |
| 106 | // Keys are *types.Interface (never *types.Named), |
| 107 | // Values are unordered []ssa.CallInstruction sets. |
| 108 | invokeSites typeutil.Map |
| 109 | |
| 110 | // The following two maps together define the subset of the |
| 111 | // m:n "implements" relation needed by the algorithm. |
| 112 | |
| 113 | // concreteTypes maps each concrete type to the set of interfaces that it implements. |
| 114 | // Keys are types.Type, values are unordered []*types.Interface. |
| 115 | // Only concrete types used as MakeInterface operands are included. |
| 116 | concreteTypes typeutil.Map |
| 117 | |
| 118 | // interfaceTypes maps each interface type to |
| 119 | // the set of concrete types that implement it. |
| 120 | // Keys are *types.Interface, values are unordered []types.Type. |
| 121 | // Only interfaces used in "invoke"-mode CallInstructions are included. |
| 122 | interfaceTypes typeutil.Map |
| 123 | } |
| 124 | |
| 125 | // addReachable marks a function as potentially callable at run-time, |
| 126 | // and ensures that it gets processed. |
| 127 | func (r *rta) addReachable(f *ssa.Function, addrTaken bool) { |
| 128 | reachable := r.result.Reachable |
| 129 | n := len(reachable) |
| 130 | v := reachable[f] |
| 131 | if addrTaken { |
| 132 | v.AddrTaken = true |
| 133 | } |
| 134 | reachable[f] = v |
| 135 | if len(reachable) > n { |
| 136 | // First time seeing f. Add it to the worklist. |
| 137 | r.worklist = append(r.worklist, f) |
| 138 | } |
| 139 | } |
| 140 | |
| 141 | // addEdge adds the specified call graph edge, and marks it reachable. |
| 142 | // addrTaken indicates whether to mark the callee as "address-taken". |
| 143 | func (r *rta) addEdge(site ssa.CallInstruction, callee *ssa.Function, addrTaken bool) { |
| 144 | r.addReachable(callee, addrTaken) |
| 145 | |
| 146 | if g := r.result.CallGraph; g != nil { |
| 147 | if site.Parent() == nil { |
| 148 | panic(site) |
| 149 | } |
| 150 | from := g.CreateNode(site.Parent()) |
| 151 | to := g.CreateNode(callee) |
| 152 | callgraph.AddEdge(from, site, to) |
| 153 | } |
| 154 | } |
| 155 | |
| 156 | // ---------- addrTakenFuncs × dynCallSites ---------- |
| 157 | |
| 158 | // visitAddrTakenFunc is called each time we encounter an address-taken function f. |
| 159 | func (r *rta) visitAddrTakenFunc(f *ssa.Function) { |
| 160 | // Create two-level map (Signature -> Function -> bool). |
| 161 | S := f.Signature |
| 162 | funcs, _ := r.addrTakenFuncsBySig.At(S).(map[*ssa.Function]bool) |
| 163 | if funcs == nil { |
| 164 | funcs = make(map[*ssa.Function]bool) |
| 165 | r.addrTakenFuncsBySig.Set(S, funcs) |
| 166 | } |
| 167 | if !funcs[f] { |
| 168 | // First time seeing f. |
| 169 | funcs[f] = true |
| 170 | |
| 171 | // If we've seen any dyncalls of this type, mark it reachable, |
| 172 | // and add call graph edges. |
| 173 | sites, _ := r.dynCallSites.At(S).([]ssa.CallInstruction) |
| 174 | for _, site := range sites { |
| 175 | r.addEdge(site, f, true) |
| 176 | } |
| 177 | } |
| 178 | } |
| 179 | |
| 180 | // visitDynCall is called each time we encounter a dynamic "call"-mode call. |
| 181 | func (r *rta) visitDynCall(site ssa.CallInstruction) { |
| 182 | S := site.Common().Signature() |
| 183 | |
| 184 | // Record the call site. |
| 185 | sites, _ := r.dynCallSites.At(S).([]ssa.CallInstruction) |
| 186 | r.dynCallSites.Set(S, append(sites, site)) |
| 187 | |
| 188 | // For each function of signature S that we know is address-taken, |
| 189 | // add an edge and mark it reachable. |
| 190 | funcs, _ := r.addrTakenFuncsBySig.At(S).(map[*ssa.Function]bool) |
| 191 | for g := range funcs { |
| 192 | r.addEdge(site, g, true) |
| 193 | } |
| 194 | } |
| 195 | |
| 196 | // ---------- concrete types × invoke sites ---------- |
| 197 | |
| 198 | // addInvokeEdge is called for each new pair (site, C) in the matrix. |
| 199 | func (r *rta) addInvokeEdge(site ssa.CallInstruction, C types.Type) { |
| 200 | // Ascertain the concrete method of C to be called. |
| 201 | imethod := site.Common().Method |
| 202 | cmethod := r.prog.MethodValue(r.prog.MethodSets.MethodSet(C).Lookup(imethod.Pkg(), imethod.Name())) |
| 203 | r.addEdge(site, cmethod, true) |
| 204 | } |
| 205 | |
| 206 | // visitInvoke is called each time the algorithm encounters an "invoke"-mode call. |
| 207 | func (r *rta) visitInvoke(site ssa.CallInstruction) { |
| 208 | I := site.Common().Value.Type().Underlying().(*types.Interface) |
| 209 | |
| 210 | // Record the invoke site. |
| 211 | sites, _ := r.invokeSites.At(I).([]ssa.CallInstruction) |
| 212 | r.invokeSites.Set(I, append(sites, site)) |
| 213 | |
| 214 | // Add callgraph edge for each existing |
| 215 | // address-taken concrete type implementing I. |
| 216 | for _, C := range r.implementations(I) { |
| 217 | r.addInvokeEdge(site, C) |
| 218 | } |
| 219 | } |
| 220 | |
| 221 | // ---------- main algorithm ---------- |
| 222 | |
| 223 | // visitFunc processes function f. |
| 224 | func (r *rta) visitFunc(f *ssa.Function) { |
| 225 | var space [32]*ssa.Value // preallocate space for common case |
| 226 | |
| 227 | for _, b := range f.Blocks { |
| 228 | for _, instr := range b.Instrs { |
| 229 | rands := instr.Operands(space[:0]) |
| 230 | |
| 231 | switch instr := instr.(type) { |
| 232 | case ssa.CallInstruction: |
| 233 | call := instr.Common() |
| 234 | if call.IsInvoke() { |
| 235 | r.visitInvoke(instr) |
| 236 | } else if g := call.StaticCallee(); g != nil { |
| 237 | r.addEdge(instr, g, false) |
| 238 | } else if _, ok := call.Value.(*ssa.Builtin); !ok { |
| 239 | r.visitDynCall(instr) |
| 240 | } |
| 241 | |
| 242 | // Ignore the call-position operand when |
| 243 | // looking for address-taken Functions. |
| 244 | // Hack: assume this is rands[0]. |
| 245 | rands = rands[1:] |
| 246 | |
| 247 | case *ssa.MakeInterface: |
| 248 | r.addRuntimeType(instr.X.Type(), false) |
| 249 | } |
| 250 | |
| 251 | // Process all address-taken functions. |
| 252 | for _, op := range rands { |
| 253 | if g, ok := (*op).(*ssa.Function); ok { |
| 254 | r.visitAddrTakenFunc(g) |
| 255 | } |
| 256 | } |
| 257 | } |
| 258 | } |
| 259 | } |
| 260 | |
| 261 | // Analyze performs Rapid Type Analysis, starting at the specified root |
| 262 | // functions. It returns nil if no roots were specified. |
| 263 | // |
| 264 | // If buildCallGraph is true, Result.CallGraph will contain a call |
| 265 | // graph; otherwise, only the other fields (reachable functions) are |
| 266 | // populated. |
| 267 | func Analyze(roots []*ssa.Function, buildCallGraph bool) *Result { |
| 268 | if len(roots) == 0 { |
| 269 | return nil |
| 270 | } |
| 271 | |
| 272 | r := &rta{ |
| 273 | result: &Result{Reachable: make(map[*ssa.Function]struct{ AddrTaken bool })}, |
| 274 | prog: roots[0].Prog, |
| 275 | } |
| 276 | |
| 277 | if buildCallGraph { |
| 278 | // TODO(adonovan): change callgraph API to eliminate the |
| 279 | // notion of a distinguished root node. Some callgraphs |
| 280 | // have many roots, or none. |
| 281 | r.result.CallGraph = callgraph.New(roots[0]) |
| 282 | } |
| 283 | |
| 284 | hasher := typeutil.MakeHasher() |
| 285 | r.result.RuntimeTypes.SetHasher(hasher) |
| 286 | r.addrTakenFuncsBySig.SetHasher(hasher) |
| 287 | r.dynCallSites.SetHasher(hasher) |
| 288 | r.invokeSites.SetHasher(hasher) |
| 289 | r.concreteTypes.SetHasher(hasher) |
| 290 | r.interfaceTypes.SetHasher(hasher) |
| 291 | |
| 292 | // Visit functions, processing their instructions, and adding |
| 293 | // new functions to the worklist, until a fixed point is |
| 294 | // reached. |
| 295 | var shadow []*ssa.Function // for efficiency, we double-buffer the worklist |
| 296 | r.worklist = append(r.worklist, roots...) |
| 297 | for len(r.worklist) > 0 { |
| 298 | shadow, r.worklist = r.worklist, shadow[:0] |
| 299 | for _, f := range shadow { |
| 300 | r.visitFunc(f) |
| 301 | } |
| 302 | } |
| 303 | return r.result |
| 304 | } |
| 305 | |
| 306 | // interfaces(C) returns all currently known interfaces implemented by C. |
| 307 | func (r *rta) interfaces(C types.Type) []*types.Interface { |
| 308 | // Ascertain set of interfaces C implements |
| 309 | // and update 'implements' relation. |
| 310 | var ifaces []*types.Interface |
| 311 | r.interfaceTypes.Iterate(func(I types.Type, concs interface{}) { |
| 312 | if I := I.(*types.Interface); types.Implements(C, I) { |
| 313 | concs, _ := concs.([]types.Type) |
| 314 | r.interfaceTypes.Set(I, append(concs, C)) |
| 315 | ifaces = append(ifaces, I) |
| 316 | } |
| 317 | }) |
| 318 | r.concreteTypes.Set(C, ifaces) |
| 319 | return ifaces |
| 320 | } |
| 321 | |
| 322 | // implementations(I) returns all currently known concrete types that implement I. |
| 323 | func (r *rta) implementations(I *types.Interface) []types.Type { |
| 324 | var concs []types.Type |
| 325 | if v := r.interfaceTypes.At(I); v != nil { |
| 326 | concs = v.([]types.Type) |
| 327 | } else { |
| 328 | // First time seeing this interface. |
| 329 | // Update the 'implements' relation. |
| 330 | r.concreteTypes.Iterate(func(C types.Type, ifaces interface{}) { |
| 331 | if types.Implements(C, I) { |
| 332 | ifaces, _ := ifaces.([]*types.Interface) |
| 333 | r.concreteTypes.Set(C, append(ifaces, I)) |
| 334 | concs = append(concs, C) |
| 335 | } |
| 336 | }) |
| 337 | r.interfaceTypes.Set(I, concs) |
| 338 | } |
| 339 | return concs |
| 340 | } |
| 341 | |
| 342 | // addRuntimeType is called for each concrete type that can be the |
| 343 | // dynamic type of some interface or reflect.Value. |
| 344 | // Adapted from needMethods in go/ssa/builder.go |
| 345 | func (r *rta) addRuntimeType(T types.Type, skip bool) { |
| 346 | if prev, ok := r.result.RuntimeTypes.At(T).(bool); ok { |
| 347 | if skip && !prev { |
| 348 | r.result.RuntimeTypes.Set(T, skip) |
| 349 | } |
| 350 | return |
| 351 | } |
| 352 | r.result.RuntimeTypes.Set(T, skip) |
| 353 | |
| 354 | mset := r.prog.MethodSets.MethodSet(T) |
| 355 | |
| 356 | if _, ok := T.Underlying().(*types.Interface); !ok { |
| 357 | // T is a new concrete type. |
| 358 | for i, n := 0, mset.Len(); i < n; i++ { |
| 359 | sel := mset.At(i) |
| 360 | m := sel.Obj() |
| 361 | |
| 362 | if m.Exported() { |
| 363 | // Exported methods are always potentially callable via reflection. |
| 364 | r.addReachable(r.prog.MethodValue(sel), true) |
| 365 | } |
| 366 | } |
| 367 | |
| 368 | // Add callgraph edge for each existing dynamic |
| 369 | // "invoke"-mode call via that interface. |
| 370 | for _, I := range r.interfaces(T) { |
| 371 | sites, _ := r.invokeSites.At(I).([]ssa.CallInstruction) |
| 372 | for _, site := range sites { |
| 373 | r.addInvokeEdge(site, T) |
| 374 | } |
| 375 | } |
| 376 | } |
| 377 | |
| 378 | // Precondition: T is not a method signature (*Signature with Recv()!=nil). |
| 379 | // Recursive case: skip => don't call makeMethods(T). |
| 380 | // Each package maintains its own set of types it has visited. |
| 381 | |
| 382 | var n *types.Named |
| 383 | switch T := T.(type) { |
| 384 | case *types.Named: |
| 385 | n = T |
| 386 | case *types.Pointer: |
| 387 | n, _ = T.Elem().(*types.Named) |
| 388 | } |
| 389 | if n != nil { |
| 390 | owner := n.Obj().Pkg() |
| 391 | if owner == nil { |
| 392 | return // built-in error type |
| 393 | } |
| 394 | } |
| 395 | |
| 396 | // Recursion over signatures of each exported method. |
| 397 | for i := 0; i < mset.Len(); i++ { |
| 398 | if mset.At(i).Obj().Exported() { |
| 399 | sig := mset.At(i).Type().(*types.Signature) |
| 400 | r.addRuntimeType(sig.Params(), true) // skip the Tuple itself |
| 401 | r.addRuntimeType(sig.Results(), true) // skip the Tuple itself |
| 402 | } |
| 403 | } |
| 404 | |
| 405 | switch t := T.(type) { |
| 406 | case *types.Basic: |
| 407 | // nop |
| 408 | |
| 409 | case *types.Interface: |
| 410 | // nop---handled by recursion over method set. |
| 411 | |
| 412 | case *types.Pointer: |
| 413 | r.addRuntimeType(t.Elem(), false) |
| 414 | |
| 415 | case *types.Slice: |
| 416 | r.addRuntimeType(t.Elem(), false) |
| 417 | |
| 418 | case *types.Chan: |
| 419 | r.addRuntimeType(t.Elem(), false) |
| 420 | |
| 421 | case *types.Map: |
| 422 | r.addRuntimeType(t.Key(), false) |
| 423 | r.addRuntimeType(t.Elem(), false) |
| 424 | |
| 425 | case *types.Signature: |
| 426 | if t.Recv() != nil { |
| 427 | panic(fmt.Sprintf("Signature %s has Recv %s", t, t.Recv())) |
| 428 | } |
| 429 | r.addRuntimeType(t.Params(), true) // skip the Tuple itself |
| 430 | r.addRuntimeType(t.Results(), true) // skip the Tuple itself |
| 431 | |
| 432 | case *types.Named: |
| 433 | // A pointer-to-named type can be derived from a named |
| 434 | // type via reflection. It may have methods too. |
| 435 | r.addRuntimeType(types.NewPointer(T), false) |
| 436 | |
| 437 | // Consider 'type T struct{S}' where S has methods. |
| 438 | // Reflection provides no way to get from T to struct{S}, |
| 439 | // only to S, so the method set of struct{S} is unwanted, |
| 440 | // so set 'skip' flag during recursion. |
| 441 | r.addRuntimeType(t.Underlying(), true) |
| 442 | |
| 443 | case *types.Array: |
| 444 | r.addRuntimeType(t.Elem(), false) |
| 445 | |
| 446 | case *types.Struct: |
| 447 | for i, n := 0, t.NumFields(); i < n; i++ { |
| 448 | r.addRuntimeType(t.Field(i).Type(), false) |
| 449 | } |
| 450 | |
| 451 | case *types.Tuple: |
| 452 | for i, n := 0, t.Len(); i < n; i++ { |
| 453 | r.addRuntimeType(t.At(i).Type(), false) |
| 454 | } |
| 455 | |
| 456 | default: |
| 457 | panic(T) |
| 458 | } |
| 459 | } |
| 460 |
Members