| 1 | // Copyright 2021 The Go Authors. All rights reserved. |
|---|---|
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
| 5 | package pkgbits |
| 6 | |
| 7 | import ( |
| 8 | "bytes" |
| 9 | "crypto/md5" |
| 10 | "encoding/binary" |
| 11 | "go/constant" |
| 12 | "io" |
| 13 | "math/big" |
| 14 | "runtime" |
| 15 | ) |
| 16 | |
| 17 | // currentVersion is the current version number. |
| 18 | // |
| 19 | // - v0: initial prototype |
| 20 | // |
| 21 | // - v1: adds the flags uint32 word |
| 22 | const currentVersion uint32 = 1 |
| 23 | |
| 24 | // A PkgEncoder provides methods for encoding a package's Unified IR |
| 25 | // export data. |
| 26 | type PkgEncoder struct { |
| 27 | // elems holds the bitstream for previously encoded elements. |
| 28 | elems [numRelocs][]string |
| 29 | |
| 30 | // stringsIdx maps previously encoded strings to their index within |
| 31 | // the RelocString section, to allow deduplication. That is, |
| 32 | // elems[RelocString][stringsIdx[s]] == s (if present). |
| 33 | stringsIdx map[string]Index |
| 34 | |
| 35 | // syncFrames is the number of frames to write at each sync |
| 36 | // marker. A negative value means sync markers are omitted. |
| 37 | syncFrames int |
| 38 | } |
| 39 | |
| 40 | // SyncMarkers reports whether pw uses sync markers. |
| 41 | func (pw *PkgEncoder) SyncMarkers() bool { return pw.syncFrames >= 0 } |
| 42 | |
| 43 | // NewPkgEncoder returns an initialized PkgEncoder. |
| 44 | // |
| 45 | // syncFrames is the number of caller frames that should be serialized |
| 46 | // at Sync points. Serializing additional frames results in larger |
| 47 | // export data files, but can help diagnosing desync errors in |
| 48 | // higher-level Unified IR reader/writer code. If syncFrames is |
| 49 | // negative, then sync markers are omitted entirely. |
| 50 | func NewPkgEncoder(syncFrames int) PkgEncoder { |
| 51 | return PkgEncoder{ |
| 52 | stringsIdx: make(map[string]Index), |
| 53 | syncFrames: syncFrames, |
| 54 | } |
| 55 | } |
| 56 | |
| 57 | // DumpTo writes the package's encoded data to out0 and returns the |
| 58 | // package fingerprint. |
| 59 | func (pw *PkgEncoder) DumpTo(out0 io.Writer) (fingerprint [8]byte) { |
| 60 | h := md5.New() |
| 61 | out := io.MultiWriter(out0, h) |
| 62 | |
| 63 | writeUint32 := func(x uint32) { |
| 64 | assert(binary.Write(out, binary.LittleEndian, x) == nil) |
| 65 | } |
| 66 | |
| 67 | writeUint32(currentVersion) |
| 68 | |
| 69 | var flags uint32 |
| 70 | if pw.SyncMarkers() { |
| 71 | flags |= flagSyncMarkers |
| 72 | } |
| 73 | writeUint32(flags) |
| 74 | |
| 75 | // Write elemEndsEnds. |
| 76 | var sum uint32 |
| 77 | for _, elems := range &pw.elems { |
| 78 | sum += uint32(len(elems)) |
| 79 | writeUint32(sum) |
| 80 | } |
| 81 | |
| 82 | // Write elemEnds. |
| 83 | sum = 0 |
| 84 | for _, elems := range &pw.elems { |
| 85 | for _, elem := range elems { |
| 86 | sum += uint32(len(elem)) |
| 87 | writeUint32(sum) |
| 88 | } |
| 89 | } |
| 90 | |
| 91 | // Write elemData. |
| 92 | for _, elems := range &pw.elems { |
| 93 | for _, elem := range elems { |
| 94 | _, err := io.WriteString(out, elem) |
| 95 | assert(err == nil) |
| 96 | } |
| 97 | } |
| 98 | |
| 99 | // Write fingerprint. |
| 100 | copy(fingerprint[:], h.Sum(nil)) |
| 101 | _, err := out0.Write(fingerprint[:]) |
| 102 | assert(err == nil) |
| 103 | |
| 104 | return |
| 105 | } |
| 106 | |
| 107 | // StringIdx adds a string value to the strings section, if not |
| 108 | // already present, and returns its index. |
| 109 | func (pw *PkgEncoder) StringIdx(s string) Index { |
| 110 | if idx, ok := pw.stringsIdx[s]; ok { |
| 111 | assert(pw.elems[RelocString][idx] == s) |
| 112 | return idx |
| 113 | } |
| 114 | |
| 115 | idx := Index(len(pw.elems[RelocString])) |
| 116 | pw.elems[RelocString] = append(pw.elems[RelocString], s) |
| 117 | pw.stringsIdx[s] = idx |
| 118 | return idx |
| 119 | } |
| 120 | |
| 121 | // NewEncoder returns an Encoder for a new element within the given |
| 122 | // section, and encodes the given SyncMarker as the start of the |
| 123 | // element bitstream. |
| 124 | func (pw *PkgEncoder) NewEncoder(k RelocKind, marker SyncMarker) Encoder { |
| 125 | e := pw.NewEncoderRaw(k) |
| 126 | e.Sync(marker) |
| 127 | return e |
| 128 | } |
| 129 | |
| 130 | // NewEncoderRaw returns an Encoder for a new element within the given |
| 131 | // section. |
| 132 | // |
| 133 | // Most callers should use NewEncoder instead. |
| 134 | func (pw *PkgEncoder) NewEncoderRaw(k RelocKind) Encoder { |
| 135 | idx := Index(len(pw.elems[k])) |
| 136 | pw.elems[k] = append(pw.elems[k], "") // placeholder |
| 137 | |
| 138 | return Encoder{ |
| 139 | p: pw, |
| 140 | k: k, |
| 141 | Idx: idx, |
| 142 | } |
| 143 | } |
| 144 | |
| 145 | // An Encoder provides methods for encoding an individual element's |
| 146 | // bitstream data. |
| 147 | type Encoder struct { |
| 148 | p *PkgEncoder |
| 149 | |
| 150 | Relocs []RelocEnt |
| 151 | RelocMap map[RelocEnt]uint32 |
| 152 | Data bytes.Buffer // accumulated element bitstream data |
| 153 | |
| 154 | encodingRelocHeader bool |
| 155 | |
| 156 | k RelocKind |
| 157 | Idx Index // index within relocation section |
| 158 | } |
| 159 | |
| 160 | // Flush finalizes the element's bitstream and returns its Index. |
| 161 | func (w *Encoder) Flush() Index { |
| 162 | var sb bytes.Buffer // TODO(mdempsky): strings.Builder after #44505 is resolved |
| 163 | |
| 164 | // Backup the data so we write the relocations at the front. |
| 165 | var tmp bytes.Buffer |
| 166 | io.Copy(&tmp, &w.Data) |
| 167 | |
| 168 | // TODO(mdempsky): Consider writing these out separately so they're |
| 169 | // easier to strip, along with function bodies, so that we can prune |
| 170 | // down to just the data that's relevant to go/types. |
| 171 | if w.encodingRelocHeader { |
| 172 | panic("encodingRelocHeader already true; recursive flush?") |
| 173 | } |
| 174 | w.encodingRelocHeader = true |
| 175 | w.Sync(SyncRelocs) |
| 176 | w.Len(len(w.Relocs)) |
| 177 | for _, rEnt := range w.Relocs { |
| 178 | w.Sync(SyncReloc) |
| 179 | w.Len(int(rEnt.Kind)) |
| 180 | w.Len(int(rEnt.Idx)) |
| 181 | } |
| 182 | |
| 183 | io.Copy(&sb, &w.Data) |
| 184 | io.Copy(&sb, &tmp) |
| 185 | w.p.elems[w.k][w.Idx] = sb.String() |
| 186 | |
| 187 | return w.Idx |
| 188 | } |
| 189 | |
| 190 | func (w *Encoder) checkErr(err error) { |
| 191 | if err != nil { |
| 192 | errorf("unexpected encoding error: %v", err) |
| 193 | } |
| 194 | } |
| 195 | |
| 196 | func (w *Encoder) rawUvarint(x uint64) { |
| 197 | var buf [binary.MaxVarintLen64]byte |
| 198 | n := binary.PutUvarint(buf[:], x) |
| 199 | _, err := w.Data.Write(buf[:n]) |
| 200 | w.checkErr(err) |
| 201 | } |
| 202 | |
| 203 | func (w *Encoder) rawVarint(x int64) { |
| 204 | // Zig-zag encode. |
| 205 | ux := uint64(x) << 1 |
| 206 | if x < 0 { |
| 207 | ux = ^ux |
| 208 | } |
| 209 | |
| 210 | w.rawUvarint(ux) |
| 211 | } |
| 212 | |
| 213 | func (w *Encoder) rawReloc(r RelocKind, idx Index) int { |
| 214 | e := RelocEnt{r, idx} |
| 215 | if w.RelocMap != nil { |
| 216 | if i, ok := w.RelocMap[e]; ok { |
| 217 | return int(i) |
| 218 | } |
| 219 | } else { |
| 220 | w.RelocMap = make(map[RelocEnt]uint32) |
| 221 | } |
| 222 | |
| 223 | i := len(w.Relocs) |
| 224 | w.RelocMap[e] = uint32(i) |
| 225 | w.Relocs = append(w.Relocs, e) |
| 226 | return i |
| 227 | } |
| 228 | |
| 229 | func (w *Encoder) Sync(m SyncMarker) { |
| 230 | if !w.p.SyncMarkers() { |
| 231 | return |
| 232 | } |
| 233 | |
| 234 | // Writing out stack frame string references requires working |
| 235 | // relocations, but writing out the relocations themselves involves |
| 236 | // sync markers. To prevent infinite recursion, we simply trim the |
| 237 | // stack frame for sync markers within the relocation header. |
| 238 | var frames []string |
| 239 | if !w.encodingRelocHeader && w.p.syncFrames > 0 { |
| 240 | pcs := make([]uintptr, w.p.syncFrames) |
| 241 | n := runtime.Callers(2, pcs) |
| 242 | frames = fmtFrames(pcs[:n]...) |
| 243 | } |
| 244 | |
| 245 | // TODO(mdempsky): Save space by writing out stack frames as a |
| 246 | // linked list so we can share common stack frames. |
| 247 | w.rawUvarint(uint64(m)) |
| 248 | w.rawUvarint(uint64(len(frames))) |
| 249 | for _, frame := range frames { |
| 250 | w.rawUvarint(uint64(w.rawReloc(RelocString, w.p.StringIdx(frame)))) |
| 251 | } |
| 252 | } |
| 253 | |
| 254 | // Bool encodes and writes a bool value into the element bitstream, |
| 255 | // and then returns the bool value. |
| 256 | // |
| 257 | // For simple, 2-alternative encodings, the idiomatic way to call Bool |
| 258 | // is something like: |
| 259 | // |
| 260 | // if w.Bool(x != 0) { |
| 261 | // // alternative #1 |
| 262 | // } else { |
| 263 | // // alternative #2 |
| 264 | // } |
| 265 | // |
| 266 | // For multi-alternative encodings, use Code instead. |
| 267 | func (w *Encoder) Bool(b bool) bool { |
| 268 | w.Sync(SyncBool) |
| 269 | var x byte |
| 270 | if b { |
| 271 | x = 1 |
| 272 | } |
| 273 | err := w.Data.WriteByte(x) |
| 274 | w.checkErr(err) |
| 275 | return b |
| 276 | } |
| 277 | |
| 278 | // Int64 encodes and writes an int64 value into the element bitstream. |
| 279 | func (w *Encoder) Int64(x int64) { |
| 280 | w.Sync(SyncInt64) |
| 281 | w.rawVarint(x) |
| 282 | } |
| 283 | |
| 284 | // Uint64 encodes and writes a uint64 value into the element bitstream. |
| 285 | func (w *Encoder) Uint64(x uint64) { |
| 286 | w.Sync(SyncUint64) |
| 287 | w.rawUvarint(x) |
| 288 | } |
| 289 | |
| 290 | // Len encodes and writes a non-negative int value into the element bitstream. |
| 291 | func (w *Encoder) Len(x int) { assert(x >= 0); w.Uint64(uint64(x)) } |
| 292 | |
| 293 | // Int encodes and writes an int value into the element bitstream. |
| 294 | func (w *Encoder) Int(x int) { w.Int64(int64(x)) } |
| 295 | |
| 296 | // Len encodes and writes a uint value into the element bitstream. |
| 297 | func (w *Encoder) Uint(x uint) { w.Uint64(uint64(x)) } |
| 298 | |
| 299 | // Reloc encodes and writes a relocation for the given (section, |
| 300 | // index) pair into the element bitstream. |
| 301 | // |
| 302 | // Note: Only the index is formally written into the element |
| 303 | // bitstream, so bitstream decoders must know from context which |
| 304 | // section an encoded relocation refers to. |
| 305 | func (w *Encoder) Reloc(r RelocKind, idx Index) { |
| 306 | w.Sync(SyncUseReloc) |
| 307 | w.Len(w.rawReloc(r, idx)) |
| 308 | } |
| 309 | |
| 310 | // Code encodes and writes a Code value into the element bitstream. |
| 311 | func (w *Encoder) Code(c Code) { |
| 312 | w.Sync(c.Marker()) |
| 313 | w.Len(c.Value()) |
| 314 | } |
| 315 | |
| 316 | // String encodes and writes a string value into the element |
| 317 | // bitstream. |
| 318 | // |
| 319 | // Internally, strings are deduplicated by adding them to the strings |
| 320 | // section (if not already present), and then writing a relocation |
| 321 | // into the element bitstream. |
| 322 | func (w *Encoder) String(s string) { |
| 323 | w.Sync(SyncString) |
| 324 | w.Reloc(RelocString, w.p.StringIdx(s)) |
| 325 | } |
| 326 | |
| 327 | // Strings encodes and writes a variable-length slice of strings into |
| 328 | // the element bitstream. |
| 329 | func (w *Encoder) Strings(ss []string) { |
| 330 | w.Len(len(ss)) |
| 331 | for _, s := range ss { |
| 332 | w.String(s) |
| 333 | } |
| 334 | } |
| 335 | |
| 336 | // Value encodes and writes a constant.Value into the element |
| 337 | // bitstream. |
| 338 | func (w *Encoder) Value(val constant.Value) { |
| 339 | w.Sync(SyncValue) |
| 340 | if w.Bool(val.Kind() == constant.Complex) { |
| 341 | w.scalar(constant.Real(val)) |
| 342 | w.scalar(constant.Imag(val)) |
| 343 | } else { |
| 344 | w.scalar(val) |
| 345 | } |
| 346 | } |
| 347 | |
| 348 | func (w *Encoder) scalar(val constant.Value) { |
| 349 | switch v := constant.Val(val).(type) { |
| 350 | default: |
| 351 | errorf("unhandled %v (%v)", val, val.Kind()) |
| 352 | case bool: |
| 353 | w.Code(ValBool) |
| 354 | w.Bool(v) |
| 355 | case string: |
| 356 | w.Code(ValString) |
| 357 | w.String(v) |
| 358 | case int64: |
| 359 | w.Code(ValInt64) |
| 360 | w.Int64(v) |
| 361 | case *big.Int: |
| 362 | w.Code(ValBigInt) |
| 363 | w.bigInt(v) |
| 364 | case *big.Rat: |
| 365 | w.Code(ValBigRat) |
| 366 | w.bigInt(v.Num()) |
| 367 | w.bigInt(v.Denom()) |
| 368 | case *big.Float: |
| 369 | w.Code(ValBigFloat) |
| 370 | w.bigFloat(v) |
| 371 | } |
| 372 | } |
| 373 | |
| 374 | func (w *Encoder) bigInt(v *big.Int) { |
| 375 | b := v.Bytes() |
| 376 | w.String(string(b)) // TODO: More efficient encoding. |
| 377 | w.Bool(v.Sign() < 0) |
| 378 | } |
| 379 | |
| 380 | func (w *Encoder) bigFloat(v *big.Float) { |
| 381 | b := v.Append(nil, 'p', -1) |
| 382 | w.String(string(b)) // TODO: More efficient encoding. |
| 383 | } |
| 384 |
Members