| 1 | // REQUIRES: nvptx-registered-target |
| 2 | // RUN: %clang_cc1 -triple nvptx-unknown-unknown -target-cpu sm_60 \ |
| 3 | // RUN: -fcuda-is-device -S -emit-llvm -o - -x cuda %s \ |
| 4 | // RUN: | FileCheck -check-prefix=CHECK -check-prefix=LP32 %s |
| 5 | // RUN: %clang_cc1 -triple nvptx64-unknown-unknown -target-cpu sm_60 \ |
| 6 | // RUN: -fcuda-is-device -S -emit-llvm -o - -x cuda %s \ |
| 7 | // RUN: | FileCheck -check-prefix=CHECK -check-prefix=LP64 %s |
| 8 | // RUN: %clang_cc1 -triple nvptx64-unknown-unknown -target-cpu sm_61 \ |
| 9 | // RUN: -fcuda-is-device -S -emit-llvm -o - -x cuda %s \ |
| 10 | // RUN: | FileCheck -check-prefix=CHECK -check-prefix=LP64 %s |
| 11 | // RUN: %clang_cc1 -triple nvptx-unknown-unknown -target-cpu sm_53 \ |
| 12 | // RUN: -DERROR_CHECK -fcuda-is-device -S -o /dev/null -x cuda -verify %s |
| 13 | |
| 14 | #define __device__ __attribute__((device)) |
| 15 | #define __global__ __attribute__((global)) |
| 16 | #define __shared__ __attribute__((shared)) |
| 17 | #define __constant__ __attribute__((constant)) |
| 18 | |
| 19 | __device__ int read_tid() { |
| 20 | |
| 21 | // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.tid.x() |
| 22 | // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.tid.y() |
| 23 | // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.tid.z() |
| 24 | // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.tid.w() |
| 25 | |
| 26 | int x = __nvvm_read_ptx_sreg_tid_x(); |
| 27 | int y = __nvvm_read_ptx_sreg_tid_y(); |
| 28 | int z = __nvvm_read_ptx_sreg_tid_z(); |
| 29 | int w = __nvvm_read_ptx_sreg_tid_w(); |
| 30 | |
| 31 | return x + y + z + w; |
| 32 | |
| 33 | } |
| 34 | |
| 35 | __device__ int read_ntid() { |
| 36 | |
| 37 | // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() |
| 38 | // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ntid.y() |
| 39 | // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ntid.z() |
| 40 | // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ntid.w() |
| 41 | |
| 42 | int x = __nvvm_read_ptx_sreg_ntid_x(); |
| 43 | int y = __nvvm_read_ptx_sreg_ntid_y(); |
| 44 | int z = __nvvm_read_ptx_sreg_ntid_z(); |
| 45 | int w = __nvvm_read_ptx_sreg_ntid_w(); |
| 46 | |
| 47 | return x + y + z + w; |
| 48 | |
| 49 | } |
| 50 | |
| 51 | __device__ int read_ctaid() { |
| 52 | |
| 53 | // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() |
| 54 | // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.y() |
| 55 | // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.z() |
| 56 | // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.w() |
| 57 | |
| 58 | int x = __nvvm_read_ptx_sreg_ctaid_x(); |
| 59 | int y = __nvvm_read_ptx_sreg_ctaid_y(); |
| 60 | int z = __nvvm_read_ptx_sreg_ctaid_z(); |
| 61 | int w = __nvvm_read_ptx_sreg_ctaid_w(); |
| 62 | |
| 63 | return x + y + z + w; |
| 64 | |
| 65 | } |
| 66 | |
| 67 | __device__ int read_nctaid() { |
| 68 | |
| 69 | // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x() |
| 70 | // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.y() |
| 71 | // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.z() |
| 72 | // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.w() |
| 73 | |
| 74 | int x = __nvvm_read_ptx_sreg_nctaid_x(); |
| 75 | int y = __nvvm_read_ptx_sreg_nctaid_y(); |
| 76 | int z = __nvvm_read_ptx_sreg_nctaid_z(); |
| 77 | int w = __nvvm_read_ptx_sreg_nctaid_w(); |
| 78 | |
| 79 | return x + y + z + w; |
| 80 | |
| 81 | } |
| 82 | |
| 83 | __device__ int read_ids() { |
| 84 | |
| 85 | // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.laneid() |
| 86 | // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.warpid() |
| 87 | // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nwarpid() |
| 88 | // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.smid() |
| 89 | // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nsmid() |
| 90 | // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.gridid() |
| 91 | |
| 92 | int a = __nvvm_read_ptx_sreg_laneid(); |
| 93 | int b = __nvvm_read_ptx_sreg_warpid(); |
| 94 | int c = __nvvm_read_ptx_sreg_nwarpid(); |
| 95 | int d = __nvvm_read_ptx_sreg_smid(); |
| 96 | int e = __nvvm_read_ptx_sreg_nsmid(); |
| 97 | int f = __nvvm_read_ptx_sreg_gridid(); |
| 98 | |
| 99 | return a + b + c + d + e + f; |
| 100 | |
| 101 | } |
| 102 | |
| 103 | __device__ int read_lanemasks() { |
| 104 | |
| 105 | // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.eq() |
| 106 | // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.le() |
| 107 | // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.lt() |
| 108 | // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.ge() |
| 109 | // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.gt() |
| 110 | |
| 111 | int a = __nvvm_read_ptx_sreg_lanemask_eq(); |
| 112 | int b = __nvvm_read_ptx_sreg_lanemask_le(); |
| 113 | int c = __nvvm_read_ptx_sreg_lanemask_lt(); |
| 114 | int d = __nvvm_read_ptx_sreg_lanemask_ge(); |
| 115 | int e = __nvvm_read_ptx_sreg_lanemask_gt(); |
| 116 | |
| 117 | return a + b + c + d + e; |
| 118 | |
| 119 | } |
| 120 | |
| 121 | __device__ long long read_clocks() { |
| 122 | |
| 123 | // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.clock() |
| 124 | // CHECK: call i64 @llvm.nvvm.read.ptx.sreg.clock64() |
| 125 | |
| 126 | int a = __nvvm_read_ptx_sreg_clock(); |
| 127 | long long b = __nvvm_read_ptx_sreg_clock64(); |
| 128 | |
| 129 | return a + b; |
| 130 | } |
| 131 | |
| 132 | __device__ int read_pms() { |
| 133 | |
| 134 | // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.pm0() |
| 135 | // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.pm1() |
| 136 | // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.pm2() |
| 137 | // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.pm3() |
| 138 | |
| 139 | int a = __nvvm_read_ptx_sreg_pm0(); |
| 140 | int b = __nvvm_read_ptx_sreg_pm1(); |
| 141 | int c = __nvvm_read_ptx_sreg_pm2(); |
| 142 | int d = __nvvm_read_ptx_sreg_pm3(); |
| 143 | |
| 144 | return a + b + c + d; |
| 145 | |
| 146 | } |
| 147 | |
| 148 | __device__ void sync() { |
| 149 | |
| 150 | // CHECK: call void @llvm.nvvm.bar.sync(i32 0) |
| 151 | |
| 152 | __nvvm_bar_sync(0); |
| 153 | |
| 154 | } |
| 155 | |
| 156 | |
| 157 | // NVVM intrinsics |
| 158 | |
| 159 | // The idea is not to test all intrinsics, just that Clang is recognizing the |
| 160 | // builtins defined in BuiltinsNVPTX.def |
| 161 | __device__ void nvvm_math(float f1, float f2, double d1, double d2) { |
| 162 | // CHECK: call float @llvm.nvvm.fmax.f |
| 163 | float t1 = __nvvm_fmax_f(f1, f2); |
| 164 | // CHECK: call float @llvm.nvvm.fmin.f |
| 165 | float t2 = __nvvm_fmin_f(f1, f2); |
| 166 | // CHECK: call float @llvm.nvvm.sqrt.rn.f |
| 167 | float t3 = __nvvm_sqrt_rn_f(f1); |
| 168 | // CHECK: call float @llvm.nvvm.rcp.rn.f |
| 169 | float t4 = __nvvm_rcp_rn_f(f2); |
| 170 | // CHECK: call float @llvm.nvvm.add.rn.f |
| 171 | float t5 = __nvvm_add_rn_f(f1, f2); |
| 172 | |
| 173 | // CHECK: call double @llvm.nvvm.fmax.d |
| 174 | double td1 = __nvvm_fmax_d(d1, d2); |
| 175 | // CHECK: call double @llvm.nvvm.fmin.d |
| 176 | double td2 = __nvvm_fmin_d(d1, d2); |
| 177 | // CHECK: call double @llvm.nvvm.sqrt.rn.d |
| 178 | double td3 = __nvvm_sqrt_rn_d(d1); |
| 179 | // CHECK: call double @llvm.nvvm.rcp.rn.d |
| 180 | double td4 = __nvvm_rcp_rn_d(d2); |
| 181 | |
| 182 | // CHECK: call void @llvm.nvvm.membar.cta() |
| 183 | __nvvm_membar_cta(); |
| 184 | // CHECK: call void @llvm.nvvm.membar.gl() |
| 185 | __nvvm_membar_gl(); |
| 186 | // CHECK: call void @llvm.nvvm.membar.sys() |
| 187 | __nvvm_membar_sys(); |
| 188 | // CHECK: call void @llvm.nvvm.barrier0() |
| 189 | __syncthreads(); |
| 190 | } |
| 191 | |
| 192 | __device__ int di; |
| 193 | __shared__ int si; |
| 194 | __device__ long dl; |
| 195 | __shared__ long sl; |
| 196 | __device__ long long dll; |
| 197 | __shared__ long long sll; |
| 198 | |
| 199 | // Check for atomic intrinsics |
| 200 | // CHECK-LABEL: nvvm_atom |
| 201 | __device__ void nvvm_atom(float *fp, float f, double *dfp, double df, int *ip, |
| 202 | int i, unsigned int *uip, unsigned ui, long *lp, |
| 203 | long l, long long *llp, long long ll) { |
| 204 | // CHECK: atomicrmw add |
| 205 | __nvvm_atom_add_gen_i(ip, i); |
| 206 | // CHECK: atomicrmw add |
| 207 | __nvvm_atom_add_gen_l(&dl, l); |
| 208 | // CHECK: atomicrmw add |
| 209 | __nvvm_atom_add_gen_ll(&sll, ll); |
| 210 | |
| 211 | // CHECK: atomicrmw sub |
| 212 | __nvvm_atom_sub_gen_i(ip, i); |
| 213 | // CHECK: atomicrmw sub |
| 214 | __nvvm_atom_sub_gen_l(&dl, l); |
| 215 | // CHECK: atomicrmw sub |
| 216 | __nvvm_atom_sub_gen_ll(&sll, ll); |
| 217 | |
| 218 | // CHECK: atomicrmw and |
| 219 | __nvvm_atom_and_gen_i(ip, i); |
| 220 | // CHECK: atomicrmw and |
| 221 | __nvvm_atom_and_gen_l(&dl, l); |
| 222 | // CHECK: atomicrmw and |
| 223 | __nvvm_atom_and_gen_ll(&sll, ll); |
| 224 | |
| 225 | // CHECK: atomicrmw or |
| 226 | __nvvm_atom_or_gen_i(ip, i); |
| 227 | // CHECK: atomicrmw or |
| 228 | __nvvm_atom_or_gen_l(&dl, l); |
| 229 | // CHECK: atomicrmw or |
| 230 | __nvvm_atom_or_gen_ll(&sll, ll); |
| 231 | |
| 232 | // CHECK: atomicrmw xor |
| 233 | __nvvm_atom_xor_gen_i(ip, i); |
| 234 | // CHECK: atomicrmw xor |
| 235 | __nvvm_atom_xor_gen_l(&dl, l); |
| 236 | // CHECK: atomicrmw xor |
| 237 | __nvvm_atom_xor_gen_ll(&sll, ll); |
| 238 | |
| 239 | // CHECK: atomicrmw xchg |
| 240 | __nvvm_atom_xchg_gen_i(ip, i); |
| 241 | // CHECK: atomicrmw xchg |
| 242 | __nvvm_atom_xchg_gen_l(&dl, l); |
| 243 | // CHECK: atomicrmw xchg |
| 244 | __nvvm_atom_xchg_gen_ll(&sll, ll); |
| 245 | |
| 246 | // CHECK: atomicrmw max i32* |
| 247 | __nvvm_atom_max_gen_i(ip, i); |
| 248 | // CHECK: atomicrmw umax i32* |
| 249 | __nvvm_atom_max_gen_ui((unsigned int *)ip, i); |
| 250 | // CHECK: atomicrmw max |
| 251 | __nvvm_atom_max_gen_l(&dl, l); |
| 252 | // CHECK: atomicrmw umax |
| 253 | __nvvm_atom_max_gen_ul((unsigned long *)&dl, l); |
| 254 | // CHECK: atomicrmw max i64* |
| 255 | __nvvm_atom_max_gen_ll(&sll, ll); |
| 256 | // CHECK: atomicrmw umax i64* |
| 257 | __nvvm_atom_max_gen_ull((unsigned long long *)&sll, ll); |
| 258 | |
| 259 | // CHECK: atomicrmw min i32* |
| 260 | __nvvm_atom_min_gen_i(ip, i); |
| 261 | // CHECK: atomicrmw umin i32* |
| 262 | __nvvm_atom_min_gen_ui((unsigned int *)ip, i); |
| 263 | // CHECK: atomicrmw min |
| 264 | __nvvm_atom_min_gen_l(&dl, l); |
| 265 | // CHECK: atomicrmw umin |
| 266 | __nvvm_atom_min_gen_ul((unsigned long *)&dl, l); |
| 267 | // CHECK: atomicrmw min i64* |
| 268 | __nvvm_atom_min_gen_ll(&sll, ll); |
| 269 | // CHECK: atomicrmw umin i64* |
| 270 | __nvvm_atom_min_gen_ull((unsigned long long *)&sll, ll); |
| 271 | |
| 272 | // CHECK: cmpxchg |
| 273 | // CHECK-NEXT: extractvalue { i32, i1 } {{%[0-9]+}}, 0 |
| 274 | __nvvm_atom_cas_gen_i(ip, 0, i); |
| 275 | // CHECK: cmpxchg |
| 276 | // CHECK-NEXT: extractvalue { {{i32|i64}}, i1 } {{%[0-9]+}}, 0 |
| 277 | __nvvm_atom_cas_gen_l(&dl, 0, l); |
| 278 | // CHECK: cmpxchg |
| 279 | // CHECK-NEXT: extractvalue { i64, i1 } {{%[0-9]+}}, 0 |
| 280 | __nvvm_atom_cas_gen_ll(&sll, 0, ll); |
| 281 | |
| 282 | // CHECK: call float @llvm.nvvm.atomic.load.add.f32.p0f32 |
| 283 | __nvvm_atom_add_gen_f(fp, f); |
| 284 | |
| 285 | // CHECK: call i32 @llvm.nvvm.atomic.load.inc.32.p0i32 |
| 286 | __nvvm_atom_inc_gen_ui(uip, ui); |
| 287 | |
| 288 | // CHECK: call i32 @llvm.nvvm.atomic.load.dec.32.p0i32 |
| 289 | __nvvm_atom_dec_gen_ui(uip, ui); |
| 290 | |
| 291 | |
| 292 | ////////////////////////////////////////////////////////////////// |
| 293 | // Atomics with scope (only supported on sm_60+). |
| 294 | |
| 295 | #if ERROR_CHECK || __CUDA_ARCH__ >= 600 |
| 296 | |
| 297 | // CHECK: call i32 @llvm.nvvm.atomic.add.gen.i.cta.i32.p0i32 |
| 298 | // expected-error@+1 {{'__nvvm_atom_cta_add_gen_i' needs target feature sm_60}} |
| 299 | __nvvm_atom_cta_add_gen_i(ip, i); |
| 300 | // LP32: call i32 @llvm.nvvm.atomic.add.gen.i.cta.i32.p0i32 |
| 301 | // LP64: call i64 @llvm.nvvm.atomic.add.gen.i.cta.i64.p0i64 |
| 302 | // expected-error@+1 {{'__nvvm_atom_cta_add_gen_l' needs target feature sm_60}} |
| 303 | __nvvm_atom_cta_add_gen_l(&dl, l); |
| 304 | // CHECK: call i64 @llvm.nvvm.atomic.add.gen.i.cta.i64.p0i64 |
| 305 | // expected-error@+1 {{'__nvvm_atom_cta_add_gen_ll' needs target feature sm_60}} |
| 306 | __nvvm_atom_cta_add_gen_ll(&sll, ll); |
| 307 | // CHECK: call i32 @llvm.nvvm.atomic.add.gen.i.sys.i32.p0i32 |
| 308 | // expected-error@+1 {{'__nvvm_atom_sys_add_gen_i' needs target feature sm_60}} |
| 309 | __nvvm_atom_sys_add_gen_i(ip, i); |
| 310 | // LP32: call i32 @llvm.nvvm.atomic.add.gen.i.sys.i32.p0i32 |
| 311 | // LP64: call i64 @llvm.nvvm.atomic.add.gen.i.sys.i64.p0i64 |
| 312 | // expected-error@+1 {{'__nvvm_atom_sys_add_gen_l' needs target feature sm_60}} |
| 313 | __nvvm_atom_sys_add_gen_l(&dl, l); |
| 314 | // CHECK: call i64 @llvm.nvvm.atomic.add.gen.i.sys.i64.p0i64 |
| 315 | // expected-error@+1 {{'__nvvm_atom_sys_add_gen_ll' needs target feature sm_60}} |
| 316 | __nvvm_atom_sys_add_gen_ll(&sll, ll); |
| 317 | |
| 318 | // CHECK: call float @llvm.nvvm.atomic.add.gen.f.cta.f32.p0f32 |
| 319 | // expected-error@+1 {{'__nvvm_atom_cta_add_gen_f' needs target feature sm_60}} |
| 320 | __nvvm_atom_cta_add_gen_f(fp, f); |
| 321 | // CHECK: call double @llvm.nvvm.atomic.add.gen.f.cta.f64.p0f64 |
| 322 | // expected-error@+1 {{'__nvvm_atom_cta_add_gen_d' needs target feature sm_60}} |
| 323 | __nvvm_atom_cta_add_gen_d(dfp, df); |
| 324 | // CHECK: call float @llvm.nvvm.atomic.add.gen.f.sys.f32.p0f32 |
| 325 | // expected-error@+1 {{'__nvvm_atom_sys_add_gen_f' needs target feature sm_60}} |
| 326 | __nvvm_atom_sys_add_gen_f(fp, f); |
| 327 | // CHECK: call double @llvm.nvvm.atomic.add.gen.f.sys.f64.p0f64 |
| 328 | // expected-error@+1 {{'__nvvm_atom_sys_add_gen_d' needs target feature sm_60}} |
| 329 | __nvvm_atom_sys_add_gen_d(dfp, df); |
| 330 | |
| 331 | // CHECK: call i32 @llvm.nvvm.atomic.exch.gen.i.cta.i32.p0i32 |
| 332 | // expected-error@+1 {{'__nvvm_atom_cta_xchg_gen_i' needs target feature sm_60}} |
| 333 | __nvvm_atom_cta_xchg_gen_i(ip, i); |
| 334 | // LP32: call i32 @llvm.nvvm.atomic.exch.gen.i.cta.i32.p0i32 |
| 335 | // LP64: call i64 @llvm.nvvm.atomic.exch.gen.i.cta.i64.p0i64 |
| 336 | // expected-error@+1 {{'__nvvm_atom_cta_xchg_gen_l' needs target feature sm_60}} |
| 337 | __nvvm_atom_cta_xchg_gen_l(&dl, l); |
| 338 | // CHECK: call i64 @llvm.nvvm.atomic.exch.gen.i.cta.i64.p0i64 |
| 339 | // expected-error@+1 {{'__nvvm_atom_cta_xchg_gen_ll' needs target feature sm_60}} |
| 340 | __nvvm_atom_cta_xchg_gen_ll(&sll, ll); |
| 341 | |
| 342 | // CHECK: call i32 @llvm.nvvm.atomic.exch.gen.i.sys.i32.p0i32 |
| 343 | // expected-error@+1 {{'__nvvm_atom_sys_xchg_gen_i' needs target feature sm_60}} |
| 344 | __nvvm_atom_sys_xchg_gen_i(ip, i); |
| 345 | // LP32: call i32 @llvm.nvvm.atomic.exch.gen.i.sys.i32.p0i32 |
| 346 | // LP64: call i64 @llvm.nvvm.atomic.exch.gen.i.sys.i64.p0i64 |
| 347 | // expected-error@+1 {{'__nvvm_atom_sys_xchg_gen_l' needs target feature sm_60}} |
| 348 | __nvvm_atom_sys_xchg_gen_l(&dl, l); |
| 349 | // CHECK: call i64 @llvm.nvvm.atomic.exch.gen.i.sys.i64.p0i64 |
| 350 | // expected-error@+1 {{'__nvvm_atom_sys_xchg_gen_ll' needs target feature sm_60}} |
| 351 | __nvvm_atom_sys_xchg_gen_ll(&sll, ll); |
| 352 | |
| 353 | // CHECK: call i32 @llvm.nvvm.atomic.max.gen.i.cta.i32.p0i32 |
| 354 | // expected-error@+1 {{'__nvvm_atom_cta_max_gen_i' needs target feature sm_60}} |
| 355 | __nvvm_atom_cta_max_gen_i(ip, i); |
| 356 | // CHECK: call i32 @llvm.nvvm.atomic.max.gen.i.cta.i32.p0i32 |
| 357 | // expected-error@+1 {{'__nvvm_atom_cta_max_gen_ui' needs target feature sm_60}} |
| 358 | __nvvm_atom_cta_max_gen_ui((unsigned int *)ip, i); |
| 359 | // LP32: call i32 @llvm.nvvm.atomic.max.gen.i.cta.i32.p0i32 |
| 360 | // LP64: call i64 @llvm.nvvm.atomic.max.gen.i.cta.i64.p0i64 |
| 361 | // expected-error@+1 {{'__nvvm_atom_cta_max_gen_l' needs target feature sm_60}} |
| 362 | __nvvm_atom_cta_max_gen_l(&dl, l); |
| 363 | // LP32: call i32 @llvm.nvvm.atomic.max.gen.i.cta.i32.p0i32 |
| 364 | // LP64: call i64 @llvm.nvvm.atomic.max.gen.i.cta.i64.p0i64 |
| 365 | // expected-error@+1 {{'__nvvm_atom_cta_max_gen_ul' needs target feature sm_60}} |
| 366 | __nvvm_atom_cta_max_gen_ul((unsigned long *)lp, l); |
| 367 | // CHECK: call i64 @llvm.nvvm.atomic.max.gen.i.cta.i64.p0i64 |
| 368 | // expected-error@+1 {{'__nvvm_atom_cta_max_gen_ll' needs target feature sm_60}} |
| 369 | __nvvm_atom_cta_max_gen_ll(&sll, ll); |
| 370 | // CHECK: call i64 @llvm.nvvm.atomic.max.gen.i.cta.i64.p0i64 |
| 371 | // expected-error@+1 {{'__nvvm_atom_cta_max_gen_ull' needs target feature sm_60}} |
| 372 | __nvvm_atom_cta_max_gen_ull((unsigned long long *)llp, ll); |
| 373 | |
| 374 | // CHECK: call i32 @llvm.nvvm.atomic.max.gen.i.sys.i32.p0i32 |
| 375 | // expected-error@+1 {{'__nvvm_atom_sys_max_gen_i' needs target feature sm_60}} |
| 376 | __nvvm_atom_sys_max_gen_i(ip, i); |
| 377 | // CHECK: call i32 @llvm.nvvm.atomic.max.gen.i.sys.i32.p0i32 |
| 378 | // expected-error@+1 {{'__nvvm_atom_sys_max_gen_ui' needs target feature sm_60}} |
| 379 | __nvvm_atom_sys_max_gen_ui((unsigned int *)ip, i); |
| 380 | // LP32: call i32 @llvm.nvvm.atomic.max.gen.i.sys.i32.p0i32 |
| 381 | // LP64: call i64 @llvm.nvvm.atomic.max.gen.i.sys.i64.p0i64 |
| 382 | // expected-error@+1 {{'__nvvm_atom_sys_max_gen_l' needs target feature sm_60}} |
| 383 | __nvvm_atom_sys_max_gen_l(&dl, l); |
| 384 | // LP32: call i32 @llvm.nvvm.atomic.max.gen.i.sys.i32.p0i32 |
| 385 | // LP64: call i64 @llvm.nvvm.atomic.max.gen.i.sys.i64.p0i64 |
| 386 | // expected-error@+1 {{'__nvvm_atom_sys_max_gen_ul' needs target feature sm_60}} |
| 387 | __nvvm_atom_sys_max_gen_ul((unsigned long *)lp, l); |
| 388 | // CHECK: call i64 @llvm.nvvm.atomic.max.gen.i.sys.i64.p0i64 |
| 389 | // expected-error@+1 {{'__nvvm_atom_sys_max_gen_ll' needs target feature sm_60}} |
| 390 | __nvvm_atom_sys_max_gen_ll(&sll, ll); |
| 391 | // CHECK: call i64 @llvm.nvvm.atomic.max.gen.i.sys.i64.p0i64 |
| 392 | // expected-error@+1 {{'__nvvm_atom_sys_max_gen_ull' needs target feature sm_60}} |
| 393 | __nvvm_atom_sys_max_gen_ull((unsigned long long *)llp, ll); |
| 394 | |
| 395 | // CHECK: call i32 @llvm.nvvm.atomic.min.gen.i.cta.i32.p0i32 |
| 396 | // expected-error@+1 {{'__nvvm_atom_cta_min_gen_i' needs target feature sm_60}} |
| 397 | __nvvm_atom_cta_min_gen_i(ip, i); |
| 398 | // CHECK: call i32 @llvm.nvvm.atomic.min.gen.i.cta.i32.p0i32 |
| 399 | // expected-error@+1 {{'__nvvm_atom_cta_min_gen_ui' needs target feature sm_60}} |
| 400 | __nvvm_atom_cta_min_gen_ui((unsigned int *)ip, i); |
| 401 | // LP32: call i32 @llvm.nvvm.atomic.min.gen.i.cta.i32.p0i32 |
| 402 | // LP64: call i64 @llvm.nvvm.atomic.min.gen.i.cta.i64.p0i64 |
| 403 | // expected-error@+1 {{'__nvvm_atom_cta_min_gen_l' needs target feature sm_60}} |
| 404 | __nvvm_atom_cta_min_gen_l(&dl, l); |
| 405 | // LP32: call i32 @llvm.nvvm.atomic.min.gen.i.cta.i32.p0i32 |
| 406 | // LP64: call i64 @llvm.nvvm.atomic.min.gen.i.cta.i64.p0i64 |
| 407 | // expected-error@+1 {{'__nvvm_atom_cta_min_gen_ul' needs target feature sm_60}} |
| 408 | __nvvm_atom_cta_min_gen_ul((unsigned long *)lp, l); |
| 409 | // CHECK: call i64 @llvm.nvvm.atomic.min.gen.i.cta.i64.p0i64 |
| 410 | // expected-error@+1 {{'__nvvm_atom_cta_min_gen_ll' needs target feature sm_60}} |
| 411 | __nvvm_atom_cta_min_gen_ll(&sll, ll); |
| 412 | // CHECK: call i64 @llvm.nvvm.atomic.min.gen.i.cta.i64.p0i64 |
| 413 | // expected-error@+1 {{'__nvvm_atom_cta_min_gen_ull' needs target feature sm_60}} |
| 414 | __nvvm_atom_cta_min_gen_ull((unsigned long long *)llp, ll); |
| 415 | |
| 416 | // CHECK: call i32 @llvm.nvvm.atomic.min.gen.i.sys.i32.p0i32 |
| 417 | // expected-error@+1 {{'__nvvm_atom_sys_min_gen_i' needs target feature sm_60}} |
| 418 | __nvvm_atom_sys_min_gen_i(ip, i); |
| 419 | // CHECK: call i32 @llvm.nvvm.atomic.min.gen.i.sys.i32.p0i32 |
| 420 | // expected-error@+1 {{'__nvvm_atom_sys_min_gen_ui' needs target feature sm_60}} |
| 421 | __nvvm_atom_sys_min_gen_ui((unsigned int *)ip, i); |
| 422 | // LP32: call i32 @llvm.nvvm.atomic.min.gen.i.sys.i32.p0i32 |
| 423 | // LP64: call i64 @llvm.nvvm.atomic.min.gen.i.sys.i64.p0i64 |
| 424 | // expected-error@+1 {{'__nvvm_atom_sys_min_gen_l' needs target feature sm_60}} |
| 425 | __nvvm_atom_sys_min_gen_l(&dl, l); |
| 426 | // LP32: call i32 @llvm.nvvm.atomic.min.gen.i.sys.i32.p0i32 |
| 427 | // LP64: call i64 @llvm.nvvm.atomic.min.gen.i.sys.i64.p0i64 |
| 428 | // expected-error@+1 {{'__nvvm_atom_sys_min_gen_ul' needs target feature sm_60}} |
| 429 | __nvvm_atom_sys_min_gen_ul((unsigned long *)lp, l); |
| 430 | // CHECK: call i64 @llvm.nvvm.atomic.min.gen.i.sys.i64.p0i64 |
| 431 | // expected-error@+1 {{'__nvvm_atom_sys_min_gen_ll' needs target feature sm_60}} |
| 432 | __nvvm_atom_sys_min_gen_ll(&sll, ll); |
| 433 | // CHECK: call i64 @llvm.nvvm.atomic.min.gen.i.sys.i64.p0i64 |
| 434 | // expected-error@+1 {{'__nvvm_atom_sys_min_gen_ull' needs target feature sm_60}} |
| 435 | __nvvm_atom_sys_min_gen_ull((unsigned long long *)llp, ll); |
| 436 | |
| 437 | // CHECK: call i32 @llvm.nvvm.atomic.inc.gen.i.cta.i32.p0i32 |
| 438 | // expected-error@+1 {{'__nvvm_atom_cta_inc_gen_ui' needs target feature sm_60}} |
| 439 | __nvvm_atom_cta_inc_gen_ui((unsigned int *)ip, i); |
| 440 | // CHECK: call i32 @llvm.nvvm.atomic.inc.gen.i.sys.i32.p0i32 |
| 441 | // expected-error@+1 {{'__nvvm_atom_sys_inc_gen_ui' needs target feature sm_60}} |
| 442 | __nvvm_atom_sys_inc_gen_ui((unsigned int *)ip, i); |
| 443 | |
| 444 | // CHECK: call i32 @llvm.nvvm.atomic.dec.gen.i.cta.i32.p0i32 |
| 445 | // expected-error@+1 {{'__nvvm_atom_cta_dec_gen_ui' needs target feature sm_60}} |
| 446 | __nvvm_atom_cta_dec_gen_ui((unsigned int *)ip, i); |
| 447 | // CHECK: call i32 @llvm.nvvm.atomic.dec.gen.i.sys.i32.p0i32 |
| 448 | // expected-error@+1 {{'__nvvm_atom_sys_dec_gen_ui' needs target feature sm_60}} |
| 449 | __nvvm_atom_sys_dec_gen_ui((unsigned int *)ip, i); |
| 450 | |
| 451 | // CHECK: call i32 @llvm.nvvm.atomic.and.gen.i.cta.i32.p0i32 |
| 452 | // expected-error@+1 {{'__nvvm_atom_cta_and_gen_i' needs target feature sm_60}} |
| 453 | __nvvm_atom_cta_and_gen_i(ip, i); |
| 454 | // LP32: call i32 @llvm.nvvm.atomic.and.gen.i.cta.i32.p0i32 |
| 455 | // LP64: call i64 @llvm.nvvm.atomic.and.gen.i.cta.i64.p0i64 |
| 456 | // expected-error@+1 {{'__nvvm_atom_cta_and_gen_l' needs target feature sm_60}} |
| 457 | __nvvm_atom_cta_and_gen_l(&dl, l); |
| 458 | // CHECK: call i64 @llvm.nvvm.atomic.and.gen.i.cta.i64.p0i64 |
| 459 | // expected-error@+1 {{'__nvvm_atom_cta_and_gen_ll' needs target feature sm_60}} |
| 460 | __nvvm_atom_cta_and_gen_ll(&sll, ll); |
| 461 | |
| 462 | // CHECK: call i32 @llvm.nvvm.atomic.and.gen.i.sys.i32.p0i32 |
| 463 | // expected-error@+1 {{'__nvvm_atom_sys_and_gen_i' needs target feature sm_60}} |
| 464 | __nvvm_atom_sys_and_gen_i(ip, i); |
| 465 | // LP32: call i32 @llvm.nvvm.atomic.and.gen.i.sys.i32.p0i32 |
| 466 | // LP64: call i64 @llvm.nvvm.atomic.and.gen.i.sys.i64.p0i64 |
| 467 | // expected-error@+1 {{'__nvvm_atom_sys_and_gen_l' needs target feature sm_60}} |
| 468 | __nvvm_atom_sys_and_gen_l(&dl, l); |
| 469 | // CHECK: call i64 @llvm.nvvm.atomic.and.gen.i.sys.i64.p0i64 |
| 470 | // expected-error@+1 {{'__nvvm_atom_sys_and_gen_ll' needs target feature sm_60}} |
| 471 | __nvvm_atom_sys_and_gen_ll(&sll, ll); |
| 472 | |
| 473 | // CHECK: call i32 @llvm.nvvm.atomic.or.gen.i.cta.i32.p0i32 |
| 474 | // expected-error@+1 {{'__nvvm_atom_cta_or_gen_i' needs target feature sm_60}} |
| 475 | __nvvm_atom_cta_or_gen_i(ip, i); |
| 476 | // LP32: call i32 @llvm.nvvm.atomic.or.gen.i.cta.i32.p0i32 |
| 477 | // LP64: call i64 @llvm.nvvm.atomic.or.gen.i.cta.i64.p0i64 |
| 478 | // expected-error@+1 {{'__nvvm_atom_cta_or_gen_l' needs target feature sm_60}} |
| 479 | __nvvm_atom_cta_or_gen_l(&dl, l); |
| 480 | // CHECK: call i64 @llvm.nvvm.atomic.or.gen.i.cta.i64.p0i64 |
| 481 | // expected-error@+1 {{'__nvvm_atom_cta_or_gen_ll' needs target feature sm_60}} |
| 482 | __nvvm_atom_cta_or_gen_ll(&sll, ll); |
| 483 | |
| 484 | // CHECK: call i32 @llvm.nvvm.atomic.or.gen.i.sys.i32.p0i32 |
| 485 | // expected-error@+1 {{'__nvvm_atom_sys_or_gen_i' needs target feature sm_60}} |
| 486 | __nvvm_atom_sys_or_gen_i(ip, i); |
| 487 | // LP32: call i32 @llvm.nvvm.atomic.or.gen.i.sys.i32.p0i32 |
| 488 | // LP64: call i64 @llvm.nvvm.atomic.or.gen.i.sys.i64.p0i64 |
| 489 | // expected-error@+1 {{'__nvvm_atom_sys_or_gen_l' needs target feature sm_60}} |
| 490 | __nvvm_atom_sys_or_gen_l(&dl, l); |
| 491 | // CHECK: call i64 @llvm.nvvm.atomic.or.gen.i.sys.i64.p0i64 |
| 492 | // expected-error@+1 {{'__nvvm_atom_sys_or_gen_ll' needs target feature sm_60}} |
| 493 | __nvvm_atom_sys_or_gen_ll(&sll, ll); |
| 494 | |
| 495 | // CHECK: call i32 @llvm.nvvm.atomic.xor.gen.i.cta.i32.p0i32 |
| 496 | // expected-error@+1 {{'__nvvm_atom_cta_xor_gen_i' needs target feature sm_60}} |
| 497 | __nvvm_atom_cta_xor_gen_i(ip, i); |
| 498 | // LP32: call i32 @llvm.nvvm.atomic.xor.gen.i.cta.i32.p0i32 |
| 499 | // LP64: call i64 @llvm.nvvm.atomic.xor.gen.i.cta.i64.p0i64 |
| 500 | // expected-error@+1 {{'__nvvm_atom_cta_xor_gen_l' needs target feature sm_60}} |
| 501 | __nvvm_atom_cta_xor_gen_l(&dl, l); |
| 502 | // CHECK: call i64 @llvm.nvvm.atomic.xor.gen.i.cta.i64.p0i64 |
| 503 | // expected-error@+1 {{'__nvvm_atom_cta_xor_gen_ll' needs target feature sm_60}} |
| 504 | __nvvm_atom_cta_xor_gen_ll(&sll, ll); |
| 505 | |
| 506 | // CHECK: call i32 @llvm.nvvm.atomic.xor.gen.i.sys.i32.p0i32 |
| 507 | // expected-error@+1 {{'__nvvm_atom_sys_xor_gen_i' needs target feature sm_60}} |
| 508 | __nvvm_atom_sys_xor_gen_i(ip, i); |
| 509 | // LP32: call i32 @llvm.nvvm.atomic.xor.gen.i.sys.i32.p0i32 |
| 510 | // LP64: call i64 @llvm.nvvm.atomic.xor.gen.i.sys.i64.p0i64 |
| 511 | // expected-error@+1 {{'__nvvm_atom_sys_xor_gen_l' needs target feature sm_60}} |
| 512 | __nvvm_atom_sys_xor_gen_l(&dl, l); |
| 513 | // CHECK: call i64 @llvm.nvvm.atomic.xor.gen.i.sys.i64.p0i64 |
| 514 | // expected-error@+1 {{'__nvvm_atom_sys_xor_gen_ll' needs target feature sm_60}} |
| 515 | __nvvm_atom_sys_xor_gen_ll(&sll, ll); |
| 516 | |
| 517 | // CHECK: call i32 @llvm.nvvm.atomic.cas.gen.i.cta.i32.p0i32 |
| 518 | // expected-error@+1 {{'__nvvm_atom_cta_cas_gen_i' needs target feature sm_60}} |
| 519 | __nvvm_atom_cta_cas_gen_i(ip, i, 0); |
| 520 | // LP32: call i32 @llvm.nvvm.atomic.cas.gen.i.cta.i32.p0i32 |
| 521 | // LP64: call i64 @llvm.nvvm.atomic.cas.gen.i.cta.i64.p0i64 |
| 522 | // expected-error@+1 {{'__nvvm_atom_cta_cas_gen_l' needs target feature sm_60}} |
| 523 | __nvvm_atom_cta_cas_gen_l(&dl, l, 0); |
| 524 | // CHECK: call i64 @llvm.nvvm.atomic.cas.gen.i.cta.i64.p0i64 |
| 525 | // expected-error@+1 {{'__nvvm_atom_cta_cas_gen_ll' needs target feature sm_60}} |
| 526 | __nvvm_atom_cta_cas_gen_ll(&sll, ll, 0); |
| 527 | |
| 528 | // CHECK: call i32 @llvm.nvvm.atomic.cas.gen.i.sys.i32.p0i32 |
| 529 | // expected-error@+1 {{'__nvvm_atom_sys_cas_gen_i' needs target feature sm_60}} |
| 530 | __nvvm_atom_sys_cas_gen_i(ip, i, 0); |
| 531 | // LP32: call i32 @llvm.nvvm.atomic.cas.gen.i.sys.i32.p0i32 |
| 532 | // LP64: call i64 @llvm.nvvm.atomic.cas.gen.i.sys.i64.p0i64 |
| 533 | // expected-error@+1 {{'__nvvm_atom_sys_cas_gen_l' needs target feature sm_60}} |
| 534 | __nvvm_atom_sys_cas_gen_l(&dl, l, 0); |
| 535 | // CHECK: call i64 @llvm.nvvm.atomic.cas.gen.i.sys.i64.p0i64 |
| 536 | // expected-error@+1 {{'__nvvm_atom_sys_cas_gen_ll' needs target feature sm_60}} |
| 537 | __nvvm_atom_sys_cas_gen_ll(&sll, ll, 0); |
| 538 | #endif |
| 539 | |
| 540 | // CHECK: ret |
| 541 | } |
| 542 | |
| 543 | // CHECK-LABEL: nvvm_ldg |
| 544 | __device__ void nvvm_ldg(const void *p) { |
| 545 | // CHECK: call i8 @llvm.nvvm.ldg.global.i.i8.p0i8(i8* {{%[0-9]+}}, i32 1) |
| 546 | // CHECK: call i8 @llvm.nvvm.ldg.global.i.i8.p0i8(i8* {{%[0-9]+}}, i32 1) |
| 547 | __nvvm_ldg_c((const char *)p); |
| 548 | __nvvm_ldg_uc((const unsigned char *)p); |
| 549 | |
| 550 | // CHECK: call i16 @llvm.nvvm.ldg.global.i.i16.p0i16(i16* {{%[0-9]+}}, i32 2) |
| 551 | // CHECK: call i16 @llvm.nvvm.ldg.global.i.i16.p0i16(i16* {{%[0-9]+}}, i32 2) |
| 552 | __nvvm_ldg_s((const short *)p); |
| 553 | __nvvm_ldg_us((const unsigned short *)p); |
| 554 | |
| 555 | // CHECK: call i32 @llvm.nvvm.ldg.global.i.i32.p0i32(i32* {{%[0-9]+}}, i32 4) |
| 556 | // CHECK: call i32 @llvm.nvvm.ldg.global.i.i32.p0i32(i32* {{%[0-9]+}}, i32 4) |
| 557 | __nvvm_ldg_i((const int *)p); |
| 558 | __nvvm_ldg_ui((const unsigned int *)p); |
| 559 | |
| 560 | // LP32: call i32 @llvm.nvvm.ldg.global.i.i32.p0i32(i32* {{%[0-9]+}}, i32 4) |
| 561 | // LP32: call i32 @llvm.nvvm.ldg.global.i.i32.p0i32(i32* {{%[0-9]+}}, i32 4) |
| 562 | // LP64: call i64 @llvm.nvvm.ldg.global.i.i64.p0i64(i64* {{%[0-9]+}}, i32 8) |
| 563 | // LP64: call i64 @llvm.nvvm.ldg.global.i.i64.p0i64(i64* {{%[0-9]+}}, i32 8) |
| 564 | __nvvm_ldg_l((const long *)p); |
| 565 | __nvvm_ldg_ul((const unsigned long *)p); |
| 566 | |
| 567 | // CHECK: call float @llvm.nvvm.ldg.global.f.f32.p0f32(float* {{%[0-9]+}}, i32 4) |
| 568 | __nvvm_ldg_f((const float *)p); |
| 569 | // CHECK: call double @llvm.nvvm.ldg.global.f.f64.p0f64(double* {{%[0-9]+}}, i32 8) |
| 570 | __nvvm_ldg_d((const double *)p); |
| 571 | |
| 572 | // In practice, the pointers we pass to __ldg will be aligned as appropriate |
| 573 | // for the CUDA <type>N vector types (e.g. short4), which are not the same as |
| 574 | // the LLVM vector types. However, each LLVM vector type has an alignment |
| 575 | // less than or equal to its corresponding CUDA type, so we're OK. |
| 576 | // |
| 577 | // PTX Interoperability section 2.2: "For a vector with an even number of |
| 578 | // elements, its alignment is set to number of elements times the alignment of |
| 579 | // its member: n*alignof(t)." |
| 580 | |
| 581 | // CHECK: call <2 x i8> @llvm.nvvm.ldg.global.i.v2i8.p0v2i8(<2 x i8>* {{%[0-9]+}}, i32 2) |
| 582 | // CHECK: call <2 x i8> @llvm.nvvm.ldg.global.i.v2i8.p0v2i8(<2 x i8>* {{%[0-9]+}}, i32 2) |
| 583 | typedef char char2 __attribute__((ext_vector_type(2))); |
| 584 | typedef unsigned char uchar2 __attribute__((ext_vector_type(2))); |
| 585 | __nvvm_ldg_c2((const char2 *)p); |
| 586 | __nvvm_ldg_uc2((const uchar2 *)p); |
| 587 | |
| 588 | // CHECK: call <4 x i8> @llvm.nvvm.ldg.global.i.v4i8.p0v4i8(<4 x i8>* {{%[0-9]+}}, i32 4) |
| 589 | // CHECK: call <4 x i8> @llvm.nvvm.ldg.global.i.v4i8.p0v4i8(<4 x i8>* {{%[0-9]+}}, i32 4) |
| 590 | typedef char char4 __attribute__((ext_vector_type(4))); |
| 591 | typedef unsigned char uchar4 __attribute__((ext_vector_type(4))); |
| 592 | __nvvm_ldg_c4((const char4 *)p); |
| 593 | __nvvm_ldg_uc4((const uchar4 *)p); |
| 594 | |
| 595 | // CHECK: call <2 x i16> @llvm.nvvm.ldg.global.i.v2i16.p0v2i16(<2 x i16>* {{%[0-9]+}}, i32 4) |
| 596 | // CHECK: call <2 x i16> @llvm.nvvm.ldg.global.i.v2i16.p0v2i16(<2 x i16>* {{%[0-9]+}}, i32 4) |
| 597 | typedef short short2 __attribute__((ext_vector_type(2))); |
| 598 | typedef unsigned short ushort2 __attribute__((ext_vector_type(2))); |
| 599 | __nvvm_ldg_s2((const short2 *)p); |
| 600 | __nvvm_ldg_us2((const ushort2 *)p); |
| 601 | |
| 602 | // CHECK: call <4 x i16> @llvm.nvvm.ldg.global.i.v4i16.p0v4i16(<4 x i16>* {{%[0-9]+}}, i32 8) |
| 603 | // CHECK: call <4 x i16> @llvm.nvvm.ldg.global.i.v4i16.p0v4i16(<4 x i16>* {{%[0-9]+}}, i32 8) |
| 604 | typedef short short4 __attribute__((ext_vector_type(4))); |
| 605 | typedef unsigned short ushort4 __attribute__((ext_vector_type(4))); |
| 606 | __nvvm_ldg_s4((const short4 *)p); |
| 607 | __nvvm_ldg_us4((const ushort4 *)p); |
| 608 | |
| 609 | // CHECK: call <2 x i32> @llvm.nvvm.ldg.global.i.v2i32.p0v2i32(<2 x i32>* {{%[0-9]+}}, i32 8) |
| 610 | // CHECK: call <2 x i32> @llvm.nvvm.ldg.global.i.v2i32.p0v2i32(<2 x i32>* {{%[0-9]+}}, i32 8) |
| 611 | typedef int int2 __attribute__((ext_vector_type(2))); |
| 612 | typedef unsigned int uint2 __attribute__((ext_vector_type(2))); |
| 613 | __nvvm_ldg_i2((const int2 *)p); |
| 614 | __nvvm_ldg_ui2((const uint2 *)p); |
| 615 | |
| 616 | // CHECK: call <4 x i32> @llvm.nvvm.ldg.global.i.v4i32.p0v4i32(<4 x i32>* {{%[0-9]+}}, i32 16) |
| 617 | // CHECK: call <4 x i32> @llvm.nvvm.ldg.global.i.v4i32.p0v4i32(<4 x i32>* {{%[0-9]+}}, i32 16) |
| 618 | typedef int int4 __attribute__((ext_vector_type(4))); |
| 619 | typedef unsigned int uint4 __attribute__((ext_vector_type(4))); |
| 620 | __nvvm_ldg_i4((const int4 *)p); |
| 621 | __nvvm_ldg_ui4((const uint4 *)p); |
| 622 | |
| 623 | // CHECK: call <2 x i64> @llvm.nvvm.ldg.global.i.v2i64.p0v2i64(<2 x i64>* {{%[0-9]+}}, i32 16) |
| 624 | // CHECK: call <2 x i64> @llvm.nvvm.ldg.global.i.v2i64.p0v2i64(<2 x i64>* {{%[0-9]+}}, i32 16) |
| 625 | typedef long long longlong2 __attribute__((ext_vector_type(2))); |
| 626 | typedef unsigned long long ulonglong2 __attribute__((ext_vector_type(2))); |
| 627 | __nvvm_ldg_ll2((const longlong2 *)p); |
| 628 | __nvvm_ldg_ull2((const ulonglong2 *)p); |
| 629 | |
| 630 | // CHECK: call <2 x float> @llvm.nvvm.ldg.global.f.v2f32.p0v2f32(<2 x float>* {{%[0-9]+}}, i32 8) |
| 631 | typedef float float2 __attribute__((ext_vector_type(2))); |
| 632 | __nvvm_ldg_f2((const float2 *)p); |
| 633 | |
| 634 | // CHECK: call <4 x float> @llvm.nvvm.ldg.global.f.v4f32.p0v4f32(<4 x float>* {{%[0-9]+}}, i32 16) |
| 635 | typedef float float4 __attribute__((ext_vector_type(4))); |
| 636 | __nvvm_ldg_f4((const float4 *)p); |
| 637 | |
| 638 | // CHECK: call <2 x double> @llvm.nvvm.ldg.global.f.v2f64.p0v2f64(<2 x double>* {{%[0-9]+}}, i32 16) |
| 639 | typedef double double2 __attribute__((ext_vector_type(2))); |
| 640 | __nvvm_ldg_d2((const double2 *)p); |
| 641 | } |
| 642 | |
| 643 | // CHECK-LABEL: nvvm_shfl |
| 644 | __device__ void nvvm_shfl(int i, float f, int a, int b) { |
| 645 | // CHECK: call i32 @llvm.nvvm.shfl.down.i32(i32 |
| 646 | __nvvm_shfl_down_i32(i, a, b); |
| 647 | // CHECK: call float @llvm.nvvm.shfl.down.f32(float |
| 648 | __nvvm_shfl_down_f32(f, a, b); |
| 649 | // CHECK: call i32 @llvm.nvvm.shfl.up.i32(i32 |
| 650 | __nvvm_shfl_up_i32(i, a, b); |
| 651 | // CHECK: call float @llvm.nvvm.shfl.up.f32(float |
| 652 | __nvvm_shfl_up_f32(f, a, b); |
| 653 | // CHECK: call i32 @llvm.nvvm.shfl.bfly.i32(i32 |
| 654 | __nvvm_shfl_bfly_i32(i, a, b); |
| 655 | // CHECK: call float @llvm.nvvm.shfl.bfly.f32(float |
| 656 | __nvvm_shfl_bfly_f32(f, a, b); |
| 657 | // CHECK: call i32 @llvm.nvvm.shfl.idx.i32(i32 |
| 658 | __nvvm_shfl_idx_i32(i, a, b); |
| 659 | // CHECK: call float @llvm.nvvm.shfl.idx.f32(float |
| 660 | __nvvm_shfl_idx_f32(f, a, b); |
| 661 | // CHECK: ret void |
| 662 | } |
| 663 | |
| 664 | __device__ void nvvm_vote(int pred) { |
| 665 | // CHECK: call i1 @llvm.nvvm.vote.all(i1 |
| 666 | __nvvm_vote_all(pred); |
| 667 | // CHECK: call i1 @llvm.nvvm.vote.any(i1 |
| 668 | __nvvm_vote_any(pred); |
| 669 | // CHECK: call i1 @llvm.nvvm.vote.uni(i1 |
| 670 | __nvvm_vote_uni(pred); |
| 671 | // CHECK: call i32 @llvm.nvvm.vote.ballot(i1 |
| 672 | __nvvm_vote_ballot(pred); |
| 673 | // CHECK: ret void |
| 674 | } |
| 675 | |