| 1 | // Copyright 2021 The Go Authors. All rights reserved. |
|---|---|
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
| 5 | package trie |
| 6 | |
| 7 | import ( |
| 8 | "math/bits" |
| 9 | ) |
| 10 | |
| 11 | // This file contains bit twiddling functions for Patricia tries. |
| 12 | // Consult this paper for details. |
| 13 | // C. Okasaki and A. Gill, “Fast mergeable integer maps,” in ACM SIGPLAN |
| 14 | // Workshop on ML, September 1998, pp. 77–86. |
| 15 | |
| 16 | // key is a key in a Map. |
| 17 | type key uint64 |
| 18 | |
| 19 | // bitpos is the position of a bit. A position is represented by having a 1 |
| 20 | // bit in that position. |
| 21 | // Examples: |
| 22 | // - 0b0010 is the position of the `1` bit in 2. |
| 23 | // It is the 3rd most specific bit position in big endian encoding |
| 24 | // (0b0 and 0b1 are more specific). |
| 25 | // - 0b0100 is the position of the bit that 1 and 5 disagree on. |
| 26 | // - 0b0 is a special value indicating that all bit agree. |
| 27 | type bitpos uint64 |
| 28 | |
| 29 | // prefixes represent a set of keys that all agree with the |
| 30 | // prefix up to a bitpos m. |
| 31 | // |
| 32 | // The value for a prefix is determined by the mask(k, m) function. |
| 33 | // (See mask for details on the values.) |
| 34 | // A `p` prefix for position `m` matches a key `k` iff mask(k, m) == p. |
| 35 | // A prefix always mask(p, m) == p. |
| 36 | // |
| 37 | // A key is its own prefix for the bit position 64, |
| 38 | // e.g. seeing a `prefix(key)` is not a problem. |
| 39 | // |
| 40 | // Prefixes should never be turned into keys. |
| 41 | type prefix uint64 |
| 42 | |
| 43 | // branchingBit returns the position of the first bit in `x` and `y` |
| 44 | // that are not equal. |
| 45 | func branchingBit(x, y prefix) bitpos { |
| 46 | p := x ^ y |
| 47 | if p == 0 { |
| 48 | return 0 |
| 49 | } |
| 50 | return bitpos(1) << uint(bits.Len64(uint64(p))-1) // uint conversion needed for go1.12 |
| 51 | } |
| 52 | |
| 53 | // zeroBit returns true if k has a 0 bit at position `b`. |
| 54 | func zeroBit(k prefix, b bitpos) bool { |
| 55 | return (uint64(k) & uint64(b)) == 0 |
| 56 | } |
| 57 | |
| 58 | // matchPrefix returns true if a prefix k matches a prefix p up to position `b`. |
| 59 | func matchPrefix(k prefix, p prefix, b bitpos) bool { |
| 60 | return mask(k, b) == p |
| 61 | } |
| 62 | |
| 63 | // mask returns a prefix of `k` with all bits after and including `b` zeroed out. |
| 64 | // |
| 65 | // In big endian encoding, this value is the [64-(m-1)] most significant bits of k |
| 66 | // followed by a `0` bit at bitpos m, followed m-1 `1` bits. |
| 67 | // Examples: |
| 68 | // |
| 69 | // prefix(0b1011) for a bitpos 0b0100 represents the keys: |
| 70 | // 0b1000, 0b1001, 0b1010, 0b1011, 0b1100, 0b1101, 0b1110, 0b1111 |
| 71 | // |
| 72 | // This mask function has the property that if matchPrefix(k, p, b), then |
| 73 | // k <= p if and only if zeroBit(k, m). This induces binary search tree tries. |
| 74 | // See Okasaki & Gill for more details about this choice of mask function. |
| 75 | // |
| 76 | // mask is idempotent for a given `b`, i.e. mask(mask(p, b), b) == mask(p,b). |
| 77 | func mask(k prefix, b bitpos) prefix { |
| 78 | return prefix((uint64(k) | (uint64(b) - 1)) & (^uint64(b))) |
| 79 | } |
| 80 | |
| 81 | // ord returns true if m comes before n in the bit ordering. |
| 82 | func ord(m, n bitpos) bool { |
| 83 | return m > n // big endian encoding |
| 84 | } |
| 85 | |
| 86 | // prefixesOverlap returns true if there is some key a prefix `p` for bitpos `m` |
| 87 | // can hold that can also be held by a prefix `q` for some bitpos `n`. |
| 88 | // |
| 89 | // This is equivalent to: |
| 90 | // |
| 91 | // m ==n && p == q, |
| 92 | // higher(m, n) && matchPrefix(q, p, m), or |
| 93 | // higher(n, m) && matchPrefix(p, q, n) |
| 94 | func prefixesOverlap(p prefix, m bitpos, q prefix, n bitpos) bool { |
| 95 | fbb := n |
| 96 | if ord(m, n) { |
| 97 | fbb = m |
| 98 | } |
| 99 | return mask(p, fbb) == mask(q, fbb) |
| 100 | // Lemma: |
| 101 | // mask(p, fbb) == mask(q, fbb) |
| 102 | // iff |
| 103 | // m > n && matchPrefix(q, p, m) or (note: big endian encoding) |
| 104 | // m < n && matchPrefix(p, q, n) or (note: big endian encoding) |
| 105 | // m ==n && p == q |
| 106 | // Quick-n-dirty proof: |
| 107 | // p == mask(p0, m) for some p0 by precondition. |
| 108 | // q == mask(q0, n) for some q0 by precondition. |
| 109 | // So mask(p, m) == p and mask(q, n) == q as mask(*, n') is idempotent. |
| 110 | // |
| 111 | // [=> proof] |
| 112 | // Suppose mask(p, fbb) == mask(q, fbb). |
| 113 | // if m ==n, p == mask(p, m) == mask(p, fbb) == mask(q, fbb) == mask(q, n) == q |
| 114 | // if m > n, fbb = firstBranchBit(m, n) = m (big endian). |
| 115 | // p == mask(p, m) == mask(p, fbb) == mask(q, fbb) == mask(q, m) |
| 116 | // so mask(q, m) == p or matchPrefix(q, p, m) |
| 117 | // if m < n, is symmetric to the above. |
| 118 | // |
| 119 | // [<= proof] |
| 120 | // case m ==n && p == q. Then mask(p, fbb) == mask(q, fbb) |
| 121 | // |
| 122 | // case m > n && matchPrefix(q, p, m). |
| 123 | // fbb == firstBranchBit(m, n) == m (by m>n). |
| 124 | // mask(q, fbb) == mask(q, m) == p == mask(p, m) == mask(p, fbb) |
| 125 | // |
| 126 | // case m < n && matchPrefix(p, q, n) is symmetric. |
| 127 | } |
| 128 |
Members