| 1 | // Copyright 2021 The Go Authors. All rights reserved. |
|---|---|
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
| 5 | package pkgbits |
| 6 | |
| 7 | import ( |
| 8 | "encoding/binary" |
| 9 | "errors" |
| 10 | "fmt" |
| 11 | "go/constant" |
| 12 | "go/token" |
| 13 | "io" |
| 14 | "math/big" |
| 15 | "os" |
| 16 | "runtime" |
| 17 | "strings" |
| 18 | ) |
| 19 | |
| 20 | // A PkgDecoder provides methods for decoding a package's Unified IR |
| 21 | // export data. |
| 22 | type PkgDecoder struct { |
| 23 | // version is the file format version. |
| 24 | version uint32 |
| 25 | |
| 26 | // sync indicates whether the file uses sync markers. |
| 27 | sync bool |
| 28 | |
| 29 | // pkgPath is the package path for the package to be decoded. |
| 30 | // |
| 31 | // TODO(mdempsky): Remove; unneeded since CL 391014. |
| 32 | pkgPath string |
| 33 | |
| 34 | // elemData is the full data payload of the encoded package. |
| 35 | // Elements are densely and contiguously packed together. |
| 36 | // |
| 37 | // The last 8 bytes of elemData are the package fingerprint. |
| 38 | elemData string |
| 39 | |
| 40 | // elemEnds stores the byte-offset end positions of element |
| 41 | // bitstreams within elemData. |
| 42 | // |
| 43 | // For example, element I's bitstream data starts at elemEnds[I-1] |
| 44 | // (or 0, if I==0) and ends at elemEnds[I]. |
| 45 | // |
| 46 | // Note: elemEnds is indexed by absolute indices, not |
| 47 | // section-relative indices. |
| 48 | elemEnds []uint32 |
| 49 | |
| 50 | // elemEndsEnds stores the index-offset end positions of relocation |
| 51 | // sections within elemEnds. |
| 52 | // |
| 53 | // For example, section K's end positions start at elemEndsEnds[K-1] |
| 54 | // (or 0, if K==0) and end at elemEndsEnds[K]. |
| 55 | elemEndsEnds [numRelocs]uint32 |
| 56 | |
| 57 | scratchRelocEnt []RelocEnt |
| 58 | } |
| 59 | |
| 60 | // PkgPath returns the package path for the package |
| 61 | // |
| 62 | // TODO(mdempsky): Remove; unneeded since CL 391014. |
| 63 | func (pr *PkgDecoder) PkgPath() string { return pr.pkgPath } |
| 64 | |
| 65 | // SyncMarkers reports whether pr uses sync markers. |
| 66 | func (pr *PkgDecoder) SyncMarkers() bool { return pr.sync } |
| 67 | |
| 68 | // NewPkgDecoder returns a PkgDecoder initialized to read the Unified |
| 69 | // IR export data from input. pkgPath is the package path for the |
| 70 | // compilation unit that produced the export data. |
| 71 | // |
| 72 | // TODO(mdempsky): Remove pkgPath parameter; unneeded since CL 391014. |
| 73 | func NewPkgDecoder(pkgPath, input string) PkgDecoder { |
| 74 | pr := PkgDecoder{ |
| 75 | pkgPath: pkgPath, |
| 76 | } |
| 77 | |
| 78 | // TODO(mdempsky): Implement direct indexing of input string to |
| 79 | // avoid copying the position information. |
| 80 | |
| 81 | r := strings.NewReader(input) |
| 82 | |
| 83 | assert(binary.Read(r, binary.LittleEndian, &pr.version) == nil) |
| 84 | |
| 85 | switch pr.version { |
| 86 | default: |
| 87 | panic(fmt.Errorf("unsupported version: %v", pr.version)) |
| 88 | case 0: |
| 89 | // no flags |
| 90 | case 1: |
| 91 | var flags uint32 |
| 92 | assert(binary.Read(r, binary.LittleEndian, &flags) == nil) |
| 93 | pr.sync = flags&flagSyncMarkers != 0 |
| 94 | } |
| 95 | |
| 96 | assert(binary.Read(r, binary.LittleEndian, pr.elemEndsEnds[:]) == nil) |
| 97 | |
| 98 | pr.elemEnds = make([]uint32, pr.elemEndsEnds[len(pr.elemEndsEnds)-1]) |
| 99 | assert(binary.Read(r, binary.LittleEndian, pr.elemEnds[:]) == nil) |
| 100 | |
| 101 | pos, err := r.Seek(0, io.SeekCurrent) |
| 102 | assert(err == nil) |
| 103 | |
| 104 | pr.elemData = input[pos:] |
| 105 | assert(len(pr.elemData)-8 == int(pr.elemEnds[len(pr.elemEnds)-1])) |
| 106 | |
| 107 | return pr |
| 108 | } |
| 109 | |
| 110 | // NumElems returns the number of elements in section k. |
| 111 | func (pr *PkgDecoder) NumElems(k RelocKind) int { |
| 112 | count := int(pr.elemEndsEnds[k]) |
| 113 | if k > 0 { |
| 114 | count -= int(pr.elemEndsEnds[k-1]) |
| 115 | } |
| 116 | return count |
| 117 | } |
| 118 | |
| 119 | // TotalElems returns the total number of elements across all sections. |
| 120 | func (pr *PkgDecoder) TotalElems() int { |
| 121 | return len(pr.elemEnds) |
| 122 | } |
| 123 | |
| 124 | // Fingerprint returns the package fingerprint. |
| 125 | func (pr *PkgDecoder) Fingerprint() [8]byte { |
| 126 | var fp [8]byte |
| 127 | copy(fp[:], pr.elemData[len(pr.elemData)-8:]) |
| 128 | return fp |
| 129 | } |
| 130 | |
| 131 | // AbsIdx returns the absolute index for the given (section, index) |
| 132 | // pair. |
| 133 | func (pr *PkgDecoder) AbsIdx(k RelocKind, idx Index) int { |
| 134 | absIdx := int(idx) |
| 135 | if k > 0 { |
| 136 | absIdx += int(pr.elemEndsEnds[k-1]) |
| 137 | } |
| 138 | if absIdx >= int(pr.elemEndsEnds[k]) { |
| 139 | errorf("%v:%v is out of bounds; %v", k, idx, pr.elemEndsEnds) |
| 140 | } |
| 141 | return absIdx |
| 142 | } |
| 143 | |
| 144 | // DataIdx returns the raw element bitstream for the given (section, |
| 145 | // index) pair. |
| 146 | func (pr *PkgDecoder) DataIdx(k RelocKind, idx Index) string { |
| 147 | absIdx := pr.AbsIdx(k, idx) |
| 148 | |
| 149 | var start uint32 |
| 150 | if absIdx > 0 { |
| 151 | start = pr.elemEnds[absIdx-1] |
| 152 | } |
| 153 | end := pr.elemEnds[absIdx] |
| 154 | |
| 155 | return pr.elemData[start:end] |
| 156 | } |
| 157 | |
| 158 | // StringIdx returns the string value for the given string index. |
| 159 | func (pr *PkgDecoder) StringIdx(idx Index) string { |
| 160 | return pr.DataIdx(RelocString, idx) |
| 161 | } |
| 162 | |
| 163 | // NewDecoder returns a Decoder for the given (section, index) pair, |
| 164 | // and decodes the given SyncMarker from the element bitstream. |
| 165 | func (pr *PkgDecoder) NewDecoder(k RelocKind, idx Index, marker SyncMarker) Decoder { |
| 166 | r := pr.NewDecoderRaw(k, idx) |
| 167 | r.Sync(marker) |
| 168 | return r |
| 169 | } |
| 170 | |
| 171 | // TempDecoder returns a Decoder for the given (section, index) pair, |
| 172 | // and decodes the given SyncMarker from the element bitstream. |
| 173 | // If possible the Decoder should be RetireDecoder'd when it is no longer |
| 174 | // needed, this will avoid heap allocations. |
| 175 | func (pr *PkgDecoder) TempDecoder(k RelocKind, idx Index, marker SyncMarker) Decoder { |
| 176 | r := pr.TempDecoderRaw(k, idx) |
| 177 | r.Sync(marker) |
| 178 | return r |
| 179 | } |
| 180 | |
| 181 | func (pr *PkgDecoder) RetireDecoder(d *Decoder) { |
| 182 | pr.scratchRelocEnt = d.Relocs |
| 183 | d.Relocs = nil |
| 184 | } |
| 185 | |
| 186 | // NewDecoderRaw returns a Decoder for the given (section, index) pair. |
| 187 | // |
| 188 | // Most callers should use NewDecoder instead. |
| 189 | func (pr *PkgDecoder) NewDecoderRaw(k RelocKind, idx Index) Decoder { |
| 190 | r := Decoder{ |
| 191 | common: pr, |
| 192 | k: k, |
| 193 | Idx: idx, |
| 194 | } |
| 195 | |
| 196 | // TODO(mdempsky) r.data.Reset(...) after #44505 is resolved. |
| 197 | r.Data = *strings.NewReader(pr.DataIdx(k, idx)) |
| 198 | |
| 199 | r.Sync(SyncRelocs) |
| 200 | r.Relocs = make([]RelocEnt, r.Len()) |
| 201 | for i := range r.Relocs { |
| 202 | r.Sync(SyncReloc) |
| 203 | r.Relocs[i] = RelocEnt{RelocKind(r.Len()), Index(r.Len())} |
| 204 | } |
| 205 | |
| 206 | return r |
| 207 | } |
| 208 | |
| 209 | func (pr *PkgDecoder) TempDecoderRaw(k RelocKind, idx Index) Decoder { |
| 210 | r := Decoder{ |
| 211 | common: pr, |
| 212 | k: k, |
| 213 | Idx: idx, |
| 214 | } |
| 215 | |
| 216 | r.Data.Reset(pr.DataIdx(k, idx)) |
| 217 | r.Sync(SyncRelocs) |
| 218 | l := r.Len() |
| 219 | if cap(pr.scratchRelocEnt) >= l { |
| 220 | r.Relocs = pr.scratchRelocEnt[:l] |
| 221 | pr.scratchRelocEnt = nil |
| 222 | } else { |
| 223 | r.Relocs = make([]RelocEnt, l) |
| 224 | } |
| 225 | for i := range r.Relocs { |
| 226 | r.Sync(SyncReloc) |
| 227 | r.Relocs[i] = RelocEnt{RelocKind(r.Len()), Index(r.Len())} |
| 228 | } |
| 229 | |
| 230 | return r |
| 231 | } |
| 232 | |
| 233 | // A Decoder provides methods for decoding an individual element's |
| 234 | // bitstream data. |
| 235 | type Decoder struct { |
| 236 | common *PkgDecoder |
| 237 | |
| 238 | Relocs []RelocEnt |
| 239 | Data strings.Reader |
| 240 | |
| 241 | k RelocKind |
| 242 | Idx Index |
| 243 | } |
| 244 | |
| 245 | func (r *Decoder) checkErr(err error) { |
| 246 | if err != nil { |
| 247 | errorf("unexpected decoding error: %w", err) |
| 248 | } |
| 249 | } |
| 250 | |
| 251 | func (r *Decoder) rawUvarint() uint64 { |
| 252 | x, err := readUvarint(&r.Data) |
| 253 | r.checkErr(err) |
| 254 | return x |
| 255 | } |
| 256 | |
| 257 | // readUvarint is a type-specialized copy of encoding/binary.ReadUvarint. |
| 258 | // This avoids the interface conversion and thus has better escape properties, |
| 259 | // which flows up the stack. |
| 260 | func readUvarint(r *strings.Reader) (uint64, error) { |
| 261 | var x uint64 |
| 262 | var s uint |
| 263 | for i := 0; i < binary.MaxVarintLen64; i++ { |
| 264 | b, err := r.ReadByte() |
| 265 | if err != nil { |
| 266 | if i > 0 && err == io.EOF { |
| 267 | err = io.ErrUnexpectedEOF |
| 268 | } |
| 269 | return x, err |
| 270 | } |
| 271 | if b < 0x80 { |
| 272 | if i == binary.MaxVarintLen64-1 && b > 1 { |
| 273 | return x, overflow |
| 274 | } |
| 275 | return x | uint64(b)<<s, nil |
| 276 | } |
| 277 | x |= uint64(b&0x7f) << s |
| 278 | s += 7 |
| 279 | } |
| 280 | return x, overflow |
| 281 | } |
| 282 | |
| 283 | var overflow = errors.New("pkgbits: readUvarint overflows a 64-bit integer") |
| 284 | |
| 285 | func (r *Decoder) rawVarint() int64 { |
| 286 | ux := r.rawUvarint() |
| 287 | |
| 288 | // Zig-zag decode. |
| 289 | x := int64(ux >> 1) |
| 290 | if ux&1 != 0 { |
| 291 | x = ^x |
| 292 | } |
| 293 | return x |
| 294 | } |
| 295 | |
| 296 | func (r *Decoder) rawReloc(k RelocKind, idx int) Index { |
| 297 | e := r.Relocs[idx] |
| 298 | assert(e.Kind == k) |
| 299 | return e.Idx |
| 300 | } |
| 301 | |
| 302 | // Sync decodes a sync marker from the element bitstream and asserts |
| 303 | // that it matches the expected marker. |
| 304 | // |
| 305 | // If r.common.sync is false, then Sync is a no-op. |
| 306 | func (r *Decoder) Sync(mWant SyncMarker) { |
| 307 | if !r.common.sync { |
| 308 | return |
| 309 | } |
| 310 | |
| 311 | pos, _ := r.Data.Seek(0, io.SeekCurrent) |
| 312 | mHave := SyncMarker(r.rawUvarint()) |
| 313 | writerPCs := make([]int, r.rawUvarint()) |
| 314 | for i := range writerPCs { |
| 315 | writerPCs[i] = int(r.rawUvarint()) |
| 316 | } |
| 317 | |
| 318 | if mHave == mWant { |
| 319 | return |
| 320 | } |
| 321 | |
| 322 | // There's some tension here between printing: |
| 323 | // |
| 324 | // (1) full file paths that tools can recognize (e.g., so emacs |
| 325 | // hyperlinks the "file:line" text for easy navigation), or |
| 326 | // |
| 327 | // (2) short file paths that are easier for humans to read (e.g., by |
| 328 | // omitting redundant or irrelevant details, so it's easier to |
| 329 | // focus on the useful bits that remain). |
| 330 | // |
| 331 | // The current formatting favors the former, as it seems more |
| 332 | // helpful in practice. But perhaps the formatting could be improved |
| 333 | // to better address both concerns. For example, use relative file |
| 334 | // paths if they would be shorter, or rewrite file paths to contain |
| 335 | // "$GOROOT" (like objabi.AbsFile does) if tools can be taught how |
| 336 | // to reliably expand that again. |
| 337 | |
| 338 | fmt.Printf("export data desync: package %q, section %v, index %v, offset %v\n", r.common.pkgPath, r.k, r.Idx, pos) |
| 339 | |
| 340 | fmt.Printf("\nfound %v, written at:\n", mHave) |
| 341 | if len(writerPCs) == 0 { |
| 342 | fmt.Printf("\t[stack trace unavailable; recompile package %q with -d=syncframes]\n", r.common.pkgPath) |
| 343 | } |
| 344 | for _, pc := range writerPCs { |
| 345 | fmt.Printf("\t%s\n", r.common.StringIdx(r.rawReloc(RelocString, pc))) |
| 346 | } |
| 347 | |
| 348 | fmt.Printf("\nexpected %v, reading at:\n", mWant) |
| 349 | var readerPCs [32]uintptr // TODO(mdempsky): Dynamically size? |
| 350 | n := runtime.Callers(2, readerPCs[:]) |
| 351 | for _, pc := range fmtFrames(readerPCs[:n]...) { |
| 352 | fmt.Printf("\t%s\n", pc) |
| 353 | } |
| 354 | |
| 355 | // We already printed a stack trace for the reader, so now we can |
| 356 | // simply exit. Printing a second one with panic or base.Fatalf |
| 357 | // would just be noise. |
| 358 | os.Exit(1) |
| 359 | } |
| 360 | |
| 361 | // Bool decodes and returns a bool value from the element bitstream. |
| 362 | func (r *Decoder) Bool() bool { |
| 363 | r.Sync(SyncBool) |
| 364 | x, err := r.Data.ReadByte() |
| 365 | r.checkErr(err) |
| 366 | assert(x < 2) |
| 367 | return x != 0 |
| 368 | } |
| 369 | |
| 370 | // Int64 decodes and returns an int64 value from the element bitstream. |
| 371 | func (r *Decoder) Int64() int64 { |
| 372 | r.Sync(SyncInt64) |
| 373 | return r.rawVarint() |
| 374 | } |
| 375 | |
| 376 | // Int64 decodes and returns a uint64 value from the element bitstream. |
| 377 | func (r *Decoder) Uint64() uint64 { |
| 378 | r.Sync(SyncUint64) |
| 379 | return r.rawUvarint() |
| 380 | } |
| 381 | |
| 382 | // Len decodes and returns a non-negative int value from the element bitstream. |
| 383 | func (r *Decoder) Len() int { x := r.Uint64(); v := int(x); assert(uint64(v) == x); return v } |
| 384 | |
| 385 | // Int decodes and returns an int value from the element bitstream. |
| 386 | func (r *Decoder) Int() int { x := r.Int64(); v := int(x); assert(int64(v) == x); return v } |
| 387 | |
| 388 | // Uint decodes and returns a uint value from the element bitstream. |
| 389 | func (r *Decoder) Uint() uint { x := r.Uint64(); v := uint(x); assert(uint64(v) == x); return v } |
| 390 | |
| 391 | // Code decodes a Code value from the element bitstream and returns |
| 392 | // its ordinal value. It's the caller's responsibility to convert the |
| 393 | // result to an appropriate Code type. |
| 394 | // |
| 395 | // TODO(mdempsky): Ideally this method would have signature "Code[T |
| 396 | // Code] T" instead, but we don't allow generic methods and the |
| 397 | // compiler can't depend on generics yet anyway. |
| 398 | func (r *Decoder) Code(mark SyncMarker) int { |
| 399 | r.Sync(mark) |
| 400 | return r.Len() |
| 401 | } |
| 402 | |
| 403 | // Reloc decodes a relocation of expected section k from the element |
| 404 | // bitstream and returns an index to the referenced element. |
| 405 | func (r *Decoder) Reloc(k RelocKind) Index { |
| 406 | r.Sync(SyncUseReloc) |
| 407 | return r.rawReloc(k, r.Len()) |
| 408 | } |
| 409 | |
| 410 | // String decodes and returns a string value from the element |
| 411 | // bitstream. |
| 412 | func (r *Decoder) String() string { |
| 413 | r.Sync(SyncString) |
| 414 | return r.common.StringIdx(r.Reloc(RelocString)) |
| 415 | } |
| 416 | |
| 417 | // Strings decodes and returns a variable-length slice of strings from |
| 418 | // the element bitstream. |
| 419 | func (r *Decoder) Strings() []string { |
| 420 | res := make([]string, r.Len()) |
| 421 | for i := range res { |
| 422 | res[i] = r.String() |
| 423 | } |
| 424 | return res |
| 425 | } |
| 426 | |
| 427 | // Value decodes and returns a constant.Value from the element |
| 428 | // bitstream. |
| 429 | func (r *Decoder) Value() constant.Value { |
| 430 | r.Sync(SyncValue) |
| 431 | isComplex := r.Bool() |
| 432 | val := r.scalar() |
| 433 | if isComplex { |
| 434 | val = constant.BinaryOp(val, token.ADD, constant.MakeImag(r.scalar())) |
| 435 | } |
| 436 | return val |
| 437 | } |
| 438 | |
| 439 | func (r *Decoder) scalar() constant.Value { |
| 440 | switch tag := CodeVal(r.Code(SyncVal)); tag { |
| 441 | default: |
| 442 | panic(fmt.Errorf("unexpected scalar tag: %v", tag)) |
| 443 | |
| 444 | case ValBool: |
| 445 | return constant.MakeBool(r.Bool()) |
| 446 | case ValString: |
| 447 | return constant.MakeString(r.String()) |
| 448 | case ValInt64: |
| 449 | return constant.MakeInt64(r.Int64()) |
| 450 | case ValBigInt: |
| 451 | return constant.Make(r.bigInt()) |
| 452 | case ValBigRat: |
| 453 | num := r.bigInt() |
| 454 | denom := r.bigInt() |
| 455 | return constant.Make(new(big.Rat).SetFrac(num, denom)) |
| 456 | case ValBigFloat: |
| 457 | return constant.Make(r.bigFloat()) |
| 458 | } |
| 459 | } |
| 460 | |
| 461 | func (r *Decoder) bigInt() *big.Int { |
| 462 | v := new(big.Int).SetBytes([]byte(r.String())) |
| 463 | if r.Bool() { |
| 464 | v.Neg(v) |
| 465 | } |
| 466 | return v |
| 467 | } |
| 468 | |
| 469 | func (r *Decoder) bigFloat() *big.Float { |
| 470 | v := new(big.Float).SetPrec(512) |
| 471 | assert(v.UnmarshalText([]byte(r.String())) == nil) |
| 472 | return v |
| 473 | } |
| 474 | |
| 475 | // @@@ Helpers |
| 476 | |
| 477 | // TODO(mdempsky): These should probably be removed. I think they're a |
| 478 | // smell that the export data format is not yet quite right. |
| 479 | |
| 480 | // PeekPkgPath returns the package path for the specified package |
| 481 | // index. |
| 482 | func (pr *PkgDecoder) PeekPkgPath(idx Index) string { |
| 483 | var path string |
| 484 | { |
| 485 | r := pr.TempDecoder(RelocPkg, idx, SyncPkgDef) |
| 486 | path = r.String() |
| 487 | pr.RetireDecoder(&r) |
| 488 | } |
| 489 | if path == "" { |
| 490 | path = pr.pkgPath |
| 491 | } |
| 492 | return path |
| 493 | } |
| 494 | |
| 495 | // PeekObj returns the package path, object name, and CodeObj for the |
| 496 | // specified object index. |
| 497 | func (pr *PkgDecoder) PeekObj(idx Index) (string, string, CodeObj) { |
| 498 | var ridx Index |
| 499 | var name string |
| 500 | var rcode int |
| 501 | { |
| 502 | r := pr.TempDecoder(RelocName, idx, SyncObject1) |
| 503 | r.Sync(SyncSym) |
| 504 | r.Sync(SyncPkg) |
| 505 | ridx = r.Reloc(RelocPkg) |
| 506 | name = r.String() |
| 507 | rcode = r.Code(SyncCodeObj) |
| 508 | pr.RetireDecoder(&r) |
| 509 | } |
| 510 | |
| 511 | path := pr.PeekPkgPath(ridx) |
| 512 | assert(name != "") |
| 513 | |
| 514 | tag := CodeObj(rcode) |
| 515 | |
| 516 | return path, name, tag |
| 517 | } |
| 518 |
Members