| 1 | //===--- arm_neon_incl.td - ARM NEON compiler interface ------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file defines data structures shared by arm_neon.td and arm_fp16.td. |
| 10 | // It constains base operation classes, operations, instructions, instruction |
| 11 | // modifiers, etc. |
| 12 | // |
| 13 | //===----------------------------------------------------------------------===// |
| 14 | // |
| 15 | // Each intrinsic is a subclass of the Inst class. An intrinsic can either |
| 16 | // generate a __builtin_* call or it can expand to a set of generic operations. |
| 17 | // |
| 18 | // The operations are subclasses of Operation providing a list of DAGs, the |
| 19 | // last of which is the return value. The available DAG nodes are documented |
| 20 | // below. |
| 21 | // |
| 22 | //===----------------------------------------------------------------------===// |
| 23 | |
| 24 | // The base Operation class. All operations must subclass this. |
| 25 | class Operation<list<dag> ops=[]> { |
| 26 | list<dag> Ops = ops; |
| 27 | bit Unavailable = 0; |
| 28 | } |
| 29 | // An operation that only contains a single DAG. |
| 30 | class Op<dag op> : Operation<[op]>; |
| 31 | // A shorter version of Operation - takes a list of DAGs. The last of these will |
| 32 | // be the return value. |
| 33 | class LOp<list<dag> ops> : Operation<ops>; |
| 34 | |
| 35 | // These defs and classes are used internally to implement the SetTheory |
| 36 | // expansion and should be ignored. |
| 37 | foreach Index = 0-63 in |
| 38 | def sv##Index; |
| 39 | class MaskExpand; |
| 40 | |
| 41 | //===----------------------------------------------------------------------===// |
| 42 | // Available operations |
| 43 | //===----------------------------------------------------------------------===// |
| 44 | |
| 45 | // DAG arguments can either be operations (documented below) or variables. |
| 46 | // Variables are prefixed with '$'. There are variables for each input argument, |
| 47 | // with the name $pN, where N starts at zero. So the zero'th argument will be |
| 48 | // $p0, the first $p1 etc. |
| 49 | |
| 50 | // op - Binary or unary operator, depending on the number of arguments. The |
| 51 | // operator itself is just treated as a raw string and is not checked. |
| 52 | // example: (op "+", $p0, $p1) -> "__p0 + __p1". |
| 53 | // (op "-", $p0) -> "-__p0" |
| 54 | def op; |
| 55 | // call - Invoke another intrinsic. The input types are type checked and |
| 56 | // disambiguated. If there is no intrinsic defined that takes |
| 57 | // the given types (or if there is a type ambiguity) an error is |
| 58 | // generated at tblgen time. The name of the intrinsic is the raw |
| 59 | // name as given to the Inst class (not mangled). |
| 60 | // example: (call "vget_high", $p0) -> "vgetq_high_s16(__p0)" |
| 61 | // (assuming $p0 has type int16x8_t). |
| 62 | def call; |
| 63 | // cast - Perform a cast to a different type. This gets emitted as a static |
| 64 | // C-style cast. For a pure reinterpret cast (T x = *(T*)&y), use |
| 65 | // "bitcast". |
| 66 | // |
| 67 | // The syntax is (cast MOD* VAL). The last argument is the value to |
| 68 | // cast, preceded by a sequence of type modifiers. The target type |
| 69 | // starts off as the type of VAL, and is modified by MOD in sequence. |
| 70 | // The available modifiers are: |
| 71 | // - $X - Take the type of parameter/variable X. For example: |
| 72 | // (cast $p0, $p1) would cast $p1 to the type of $p0. |
| 73 | // - "R" - The type of the return type. |
| 74 | // - A typedef string - A NEON or stdint.h type that is then parsed. |
| 75 | // for example: (cast "uint32x4_t", $p0). |
| 76 | // - "U" - Make the type unsigned. |
| 77 | // - "S" - Make the type signed. |
| 78 | // - "H" - Halve the number of lanes in the type. |
| 79 | // - "D" - Double the number of lanes in the type. |
| 80 | // - "8" - Convert type to an equivalent vector of 8-bit signed |
| 81 | // integers. |
| 82 | // example: (cast "R", "U", $p0) -> "(uint32x4_t)__p0" (assuming the return |
| 83 | // value is of type "int32x4_t". |
| 84 | // (cast $p0, "D", "8", $p1) -> "(int8x16_t)__p1" (assuming __p0 |
| 85 | // has type float64x1_t or any other vector type of 64 bits). |
| 86 | // (cast "int32_t", $p2) -> "(int32_t)__p2" |
| 87 | def cast; |
| 88 | // bitcast - Same as "cast", except a reinterpret-cast is produced: |
| 89 | // (bitcast "T", $p0) -> "*(T*)&__p0". |
| 90 | // The VAL argument is saved to a temporary so it can be used |
| 91 | // as an l-value. |
| 92 | def bitcast; |
| 93 | // dup - Take a scalar argument and create a vector by duplicating it into |
| 94 | // all lanes. The type of the vector is the base type of the intrinsic. |
| 95 | // example: (dup $p1) -> "(uint32x2_t) {__p1, __p1}" (assuming the base type |
| 96 | // is uint32x2_t). |
| 97 | def dup; |
| 98 | // dup_typed - Take a vector and a scalar argument, and create a new vector of |
| 99 | // the same type by duplicating the scalar value into all lanes. |
| 100 | // example: (dup_typed $p1, $p2) -> "(float16x4_t) {__p2, __p2, __p2, __p2}" |
| 101 | // (assuming __p1 is float16x4_t, and __p2 is a compatible scalar). |
| 102 | def dup_typed; |
| 103 | // splat - Take a vector and a lane index, and return a vector of the same type |
| 104 | // containing repeated instances of the source vector at the lane index. |
| 105 | // example: (splat $p0, $p1) -> |
| 106 | // "__builtin_shufflevector(__p0, __p0, __p1, __p1, __p1, __p1)" |
| 107 | // (assuming __p0 has four elements). |
| 108 | def splat; |
| 109 | // save_temp - Create a temporary (local) variable. The variable takes a name |
| 110 | // based on the zero'th parameter and can be referenced using |
| 111 | // using that name in subsequent DAGs in the same |
| 112 | // operation. The scope of a temp is the operation. If a variable |
| 113 | // with the given name already exists, an error will be given at |
| 114 | // tblgen time. |
| 115 | // example: [(save_temp $var, (call "foo", $p0)), |
| 116 | // (op "+", $var, $p1)] -> |
| 117 | // "int32x2_t __var = foo(__p0); return __var + __p1;" |
| 118 | def save_temp; |
| 119 | // name_replace - Return the name of the current intrinsic with the first |
| 120 | // argument replaced by the second argument. Raises an error if |
| 121 | // the first argument does not exist in the intrinsic name. |
| 122 | // example: (call (name_replace "_high_", "_"), $p0) (to call the non-high |
| 123 | // version of this intrinsic). |
| 124 | def name_replace; |
| 125 | // literal - Create a literal piece of code. The code is treated as a raw |
| 126 | // string, and must be given a type. The type is a stdint.h or |
| 127 | // NEON intrinsic type as given to (cast). |
| 128 | // example: (literal "int32_t", "0") |
| 129 | def literal; |
| 130 | // shuffle - Create a vector shuffle. The syntax is (shuffle ARG0, ARG1, MASK). |
| 131 | // The MASK argument is a set of elements. The elements are generated |
| 132 | // from the two special defs "mask0" and "mask1". "mask0" expands to |
| 133 | // the lane indices in sequence for ARG0, and "mask1" expands to |
| 134 | // the lane indices in sequence for ARG1. They can be used as-is, e.g. |
| 135 | // |
| 136 | // (shuffle $p0, $p1, mask0) -> $p0 |
| 137 | // (shuffle $p0, $p1, mask1) -> $p1 |
| 138 | // |
| 139 | // or, more usefully, they can be manipulated using the SetTheory |
| 140 | // operators plus some extra operators defined in the NEON emitter. |
| 141 | // The operators are described below. |
| 142 | // example: (shuffle $p0, $p1, (add (highhalf mask0), (highhalf mask1))) -> |
| 143 | // A concatenation of the high halves of the input vectors. |
| 144 | def shuffle; |
| 145 | |
| 146 | // add, interleave, decimate: These set operators are vanilla SetTheory |
| 147 | // operators and take their normal definition. |
| 148 | def add; |
| 149 | def interleave; |
| 150 | def decimate; |
| 151 | // rotl - Rotate set left by a number of elements. |
| 152 | // example: (rotl mask0, 3) -> [3, 4, 5, 6, 0, 1, 2] |
| 153 | def rotl; |
| 154 | // rotl - Rotate set right by a number of elements. |
| 155 | // example: (rotr mask0, 3) -> [4, 5, 6, 0, 1, 2, 3] |
| 156 | def rotr; |
| 157 | // highhalf - Take only the high half of the input. |
| 158 | // example: (highhalf mask0) -> [4, 5, 6, 7] (assuming mask0 had 8 elements) |
| 159 | def highhalf; |
| 160 | // highhalf - Take only the low half of the input. |
| 161 | // example: (lowhalf mask0) -> [0, 1, 2, 3] (assuming mask0 had 8 elements) |
| 162 | def lowhalf; |
| 163 | // rev - Perform a variable-width reversal of the elements. The zero'th argument |
| 164 | // is a width in bits to reverse. The lanes this maps to is determined |
| 165 | // based on the element width of the underlying type. |
| 166 | // example: (rev 32, mask0) -> [3, 2, 1, 0, 7, 6, 5, 4] (if 8-bit elements) |
| 167 | // example: (rev 32, mask0) -> [1, 0, 3, 2] (if 16-bit elements) |
| 168 | def rev; |
| 169 | // mask0 - The initial sequence of lanes for shuffle ARG0 |
| 170 | def mask0 : MaskExpand; |
| 171 | // mask0 - The initial sequence of lanes for shuffle ARG1 |
| 172 | def mask1 : MaskExpand; |
| 173 | |
| 174 | def OP_NONE : Operation; |
| 175 | def OP_UNAVAILABLE : Operation { |
| 176 | let Unavailable = 1; |
| 177 | } |
| 178 | |
| 179 | //===----------------------------------------------------------------------===// |
| 180 | // Instruction definitions |
| 181 | //===----------------------------------------------------------------------===// |
| 182 | |
| 183 | // Every intrinsic subclasses "Inst". An intrinsic has a name, a prototype and |
| 184 | // a sequence of typespecs. |
| 185 | // |
| 186 | // The name is the base name of the intrinsic, for example "vget_lane". This is |
| 187 | // then mangled by the tblgen backend to add type information ("vget_lane_s16"). |
| 188 | // |
| 189 | // A typespec is a sequence of uppercase characters (modifiers) followed by one |
| 190 | // lowercase character. A typespec encodes a particular "base type" of the |
| 191 | // intrinsic. |
| 192 | // |
| 193 | // An example typespec is "Qs" - quad-size short - uint16x8_t. The available |
| 194 | // typespec codes are given below. |
| 195 | // |
| 196 | // The string given to an Inst class is a sequence of typespecs. The intrinsic |
| 197 | // is instantiated for every typespec in the sequence. For example "sdQsQd". |
| 198 | // |
| 199 | // The prototype is a string that defines the return type of the intrinsic |
| 200 | // and the type of each argument. The return type and every argument gets a |
| 201 | // "modifier" that can change in some way the "base type" of the intrinsic. |
| 202 | // |
| 203 | // The modifier 'd' means "default" and does not modify the base type in any |
| 204 | // way. The available modifiers are given below. |
| 205 | // |
| 206 | // Typespecs |
| 207 | // --------- |
| 208 | // c: char |
| 209 | // s: short |
| 210 | // i: int |
| 211 | // l: long |
| 212 | // k: 128-bit long |
| 213 | // f: float |
| 214 | // h: half-float |
| 215 | // d: double |
| 216 | // |
| 217 | // Typespec modifiers |
| 218 | // ------------------ |
| 219 | // S: scalar, only used for function mangling. |
| 220 | // U: unsigned |
| 221 | // Q: 128b |
| 222 | // H: 128b without mangling 'q' |
| 223 | // P: polynomial |
| 224 | // |
| 225 | // Prototype modifiers |
| 226 | // ------------------- |
| 227 | // prototype: return (arg, arg, ...) |
| 228 | // |
| 229 | // v: void |
| 230 | // t: best-fit integer (int/poly args) |
| 231 | // x: signed integer (int/float args) |
| 232 | // u: unsigned integer (int/float args) |
| 233 | // f: float (int args) |
| 234 | // F: double (int args) |
| 235 | // H: half (int args) |
| 236 | // 0: half (int args), ignore 'Q' size modifier. |
| 237 | // 1: half (int args), force 'Q' size modifier. |
| 238 | // d: default |
| 239 | // g: default, ignore 'Q' size modifier. |
| 240 | // j: default, force 'Q' size modifier. |
| 241 | // w: double width elements, same num elts |
| 242 | // n: double width elements, half num elts |
| 243 | // h: half width elements, double num elts |
| 244 | // q: half width elements, quad num elts |
| 245 | // e: half width elements, double num elts, unsigned |
| 246 | // m: half width elements, same num elts |
| 247 | // i: constant int |
| 248 | // l: constant uint64 |
| 249 | // s: scalar of element type |
| 250 | // z: scalar of half width element type, signed |
| 251 | // r: scalar of double width element type, signed |
| 252 | // a: scalar of element type (splat to vector type) |
| 253 | // b: scalar of unsigned integer/long type (int/float args) |
| 254 | // $: scalar of signed integer/long type (int/float args) |
| 255 | // y: scalar of float |
| 256 | // o: scalar of double |
| 257 | // k: default elt width, double num elts |
| 258 | // 2,3,4: array of default vectors |
| 259 | // B,C,D: array of default elts, force 'Q' size modifier. |
| 260 | // p: pointer type |
| 261 | // c: const pointer type |
| 262 | // 7: vector of 8-bit elements, ignore 'Q' size modifier |
| 263 | // 8: vector of 8-bit elements, same width as default type |
| 264 | // 9: vector of 8-bit elements, force 'Q' size modifier |
| 265 | |
| 266 | // Every intrinsic subclasses Inst. |
| 267 | class Inst <string n, string p, string t, Operation o> { |
| 268 | string Name = n; |
| 269 | string Prototype = p; |
| 270 | string Types = t; |
| 271 | string ArchGuard = ""; |
| 272 | |
| 273 | Operation Operation = o; |
| 274 | bit CartesianProductOfTypes = 0; |
| 275 | bit BigEndianSafe = 0; |
| 276 | bit isShift = 0; |
| 277 | bit isScalarShift = 0; |
| 278 | bit isScalarNarrowShift = 0; |
| 279 | bit isVCVT_N = 0; |
| 280 | // For immediate checks: the immediate will be assumed to specify the lane of |
| 281 | // a Q register. Only used for intrinsics which end up calling polymorphic |
| 282 | // builtins. |
| 283 | bit isLaneQ = 0; |
| 284 | |
| 285 | // Certain intrinsics have different names than their representative |
| 286 | // instructions. This field allows us to handle this correctly when we |
| 287 | // are generating tests. |
| 288 | string InstName = ""; |
| 289 | |
| 290 | // Certain intrinsics even though they are not a WOpInst or LOpInst, |
| 291 | // generate a WOpInst/LOpInst instruction (see below for definition |
| 292 | // of a WOpInst/LOpInst). For testing purposes we need to know |
| 293 | // this. Ex: vset_lane which outputs vmov instructions. |
| 294 | bit isHiddenWInst = 0; |
| 295 | bit isHiddenLInst = 0; |
| 296 | } |
| 297 | |
| 298 | // The following instruction classes are implemented via builtins. |
| 299 | // These declarations are used to generate Builtins.def: |
| 300 | // |
| 301 | // SInst: Instruction with signed/unsigned suffix (e.g., "s8", "u8", "p8") |
| 302 | // IInst: Instruction with generic integer suffix (e.g., "i8") |
| 303 | // WInst: Instruction with only bit size suffix (e.g., "8") |
| 304 | class SInst<string n, string p, string t> : Inst<n, p, t, OP_NONE> {} |
| 305 | class IInst<string n, string p, string t> : Inst<n, p, t, OP_NONE> {} |
| 306 | class WInst<string n, string p, string t> : Inst<n, p, t, OP_NONE> {} |
| 307 | |
| 308 | // The following instruction classes are implemented via operators |
| 309 | // instead of builtins. As such these declarations are only used for |
| 310 | // the purpose of generating tests. |
| 311 | // |
| 312 | // SOpInst: Instruction with signed/unsigned suffix (e.g., "s8", |
| 313 | // "u8", "p8"). |
| 314 | // IOpInst: Instruction with generic integer suffix (e.g., "i8"). |
| 315 | // WOpInst: Instruction with bit size only suffix (e.g., "8"). |
| 316 | // LOpInst: Logical instruction with no bit size suffix. |
| 317 | // NoTestOpInst: Intrinsic that has no corresponding instruction. |
| 318 | class SOpInst<string n, string p, string t, Operation o> : Inst<n, p, t, o> {} |
| 319 | class IOpInst<string n, string p, string t, Operation o> : Inst<n, p, t, o> {} |
| 320 | class WOpInst<string n, string p, string t, Operation o> : Inst<n, p, t, o> {} |
| 321 | class LOpInst<string n, string p, string t, Operation o> : Inst<n, p, t, o> {} |
| 322 | class NoTestOpInst<string n, string p, string t, Operation o> : Inst<n, p, t, o> {} |
| 323 | |