builtins-nvptx.c source code [clang_source_code/test/CodeGen/builtins-nvptx.c]

1	// REQUIRES: nvptx-registered-target
2	// RUN: %clang_cc1 -triple nvptx-unknown-unknown -target-cpu sm_60 \
3	// RUN: -fcuda-is-device -S -emit-llvm -o - -x cuda %s \
4	// RUN: \| FileCheck -check-prefix=CHECK -check-prefix=LP32 %s
5	// RUN: %clang_cc1 -triple nvptx64-unknown-unknown -target-cpu sm_60 \
6	// RUN: -fcuda-is-device -S -emit-llvm -o - -x cuda %s \
7	// RUN: \| FileCheck -check-prefix=CHECK -check-prefix=LP64 %s
8	// RUN: %clang_cc1 -triple nvptx64-unknown-unknown -target-cpu sm_61 \
9	// RUN: -fcuda-is-device -S -emit-llvm -o - -x cuda %s \
10	// RUN: \| FileCheck -check-prefix=CHECK -check-prefix=LP64 %s
11	// RUN: %clang_cc1 -triple nvptx-unknown-unknown -target-cpu sm_53 \
12	// RUN: -DERROR_CHECK -fcuda-is-device -S -o /dev/null -x cuda -verify %s
13
14	#define __device__ __attribute__((device))
15	#define __global__ __attribute__((global))
16	#define __shared__ __attribute__((shared))
17	#define __constant__ __attribute__((constant))
18
19	__device__ int read_tid() {
20
21	// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
22	// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.tid.y()
23	// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.tid.z()
24	// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.tid.w()
25
26	int x = __nvvm_read_ptx_sreg_tid_x();
27	int y = __nvvm_read_ptx_sreg_tid_y();
28	int z = __nvvm_read_ptx_sreg_tid_z();
29	int w = __nvvm_read_ptx_sreg_tid_w();
30
31	return x + y + z + w;
32
33	}
34
35	__device__ int read_ntid() {
36
37	// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
38	// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ntid.y()
39	// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ntid.z()
40	// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ntid.w()
41
42	int x = __nvvm_read_ptx_sreg_ntid_x();
43	int y = __nvvm_read_ptx_sreg_ntid_y();
44	int z = __nvvm_read_ptx_sreg_ntid_z();
45	int w = __nvvm_read_ptx_sreg_ntid_w();
46
47	return x + y + z + w;
48
49	}
50
51	__device__ int read_ctaid() {
52
53	// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
54	// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.y()
55	// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.z()
56	// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.w()
57
58	int x = __nvvm_read_ptx_sreg_ctaid_x();
59	int y = __nvvm_read_ptx_sreg_ctaid_y();
60	int z = __nvvm_read_ptx_sreg_ctaid_z();
61	int w = __nvvm_read_ptx_sreg_ctaid_w();
62
63	return x + y + z + w;
64
65	}
66
67	__device__ int read_nctaid() {
68
69	// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x()
70	// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.y()
71	// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.z()
72	// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.w()
73
74	int x = __nvvm_read_ptx_sreg_nctaid_x();
75	int y = __nvvm_read_ptx_sreg_nctaid_y();
76	int z = __nvvm_read_ptx_sreg_nctaid_z();
77	int w = __nvvm_read_ptx_sreg_nctaid_w();
78
79	return x + y + z + w;
80
81	}
82
83	__device__ int read_ids() {
84
85	// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.laneid()
86	// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.warpid()
87	// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nwarpid()
88	// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.smid()
89	// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nsmid()
90	// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.gridid()
91
92	int a = __nvvm_read_ptx_sreg_laneid();
93	int b = __nvvm_read_ptx_sreg_warpid();
94	int c = __nvvm_read_ptx_sreg_nwarpid();
95	int d = __nvvm_read_ptx_sreg_smid();
96	int e = __nvvm_read_ptx_sreg_nsmid();
97	int f = __nvvm_read_ptx_sreg_gridid();
98
99	return a + b + c + d + e + f;
100
101	}
102
103	__device__ int read_lanemasks() {
104
105	// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.eq()
106	// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.le()
107	// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.lt()
108	// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.ge()
109	// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.gt()
110
111	int a = __nvvm_read_ptx_sreg_lanemask_eq();
112	int b = __nvvm_read_ptx_sreg_lanemask_le();
113	int c = __nvvm_read_ptx_sreg_lanemask_lt();
114	int d = __nvvm_read_ptx_sreg_lanemask_ge();
115	int e = __nvvm_read_ptx_sreg_lanemask_gt();
116
117	return a + b + c + d + e;
118
119	}
120
121	__device__ long long read_clocks() {
122
123	// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.clock()
124	// CHECK: call i64 @llvm.nvvm.read.ptx.sreg.clock64()
125
126	int a = __nvvm_read_ptx_sreg_clock();
127	long long b = __nvvm_read_ptx_sreg_clock64();
128
129	return a + b;
130	}
131
132	__device__ int read_pms() {
133
134	// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.pm0()
135	// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.pm1()
136	// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.pm2()
137	// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.pm3()
138
139	int a = __nvvm_read_ptx_sreg_pm0();
140	int b = __nvvm_read_ptx_sreg_pm1();
141	int c = __nvvm_read_ptx_sreg_pm2();
142	int d = __nvvm_read_ptx_sreg_pm3();
143
144	return a + b + c + d;
145
146	}
147
148	__device__ void sync() {
149
150	// CHECK: call void @llvm.nvvm.bar.sync(i32 0)
151
152	__nvvm_bar_sync(0);
153
154	}
155
156
157	// NVVM intrinsics
158
159	// The idea is not to test all intrinsics, just that Clang is recognizing the
160	// builtins defined in BuiltinsNVPTX.def
161	__device__ void nvvm_math(float f1, float f2, double d1, double d2) {
162	// CHECK: call float @llvm.nvvm.fmax.f
163	float t1 = __nvvm_fmax_f(f1, f2);
164	// CHECK: call float @llvm.nvvm.fmin.f
165	float t2 = __nvvm_fmin_f(f1, f2);
166	// CHECK: call float @llvm.nvvm.sqrt.rn.f
167	float t3 = __nvvm_sqrt_rn_f(f1);
168	// CHECK: call float @llvm.nvvm.rcp.rn.f
169	float t4 = __nvvm_rcp_rn_f(f2);
170	// CHECK: call float @llvm.nvvm.add.rn.f
171	float t5 = __nvvm_add_rn_f(f1, f2);
172
173	// CHECK: call double @llvm.nvvm.fmax.d
174	double td1 = __nvvm_fmax_d(d1, d2);
175	// CHECK: call double @llvm.nvvm.fmin.d
176	double td2 = __nvvm_fmin_d(d1, d2);
177	// CHECK: call double @llvm.nvvm.sqrt.rn.d
178	double td3 = __nvvm_sqrt_rn_d(d1);
179	// CHECK: call double @llvm.nvvm.rcp.rn.d
180	double td4 = __nvvm_rcp_rn_d(d2);
181
182	// CHECK: call void @llvm.nvvm.membar.cta()
183	__nvvm_membar_cta();
184	// CHECK: call void @llvm.nvvm.membar.gl()
185	__nvvm_membar_gl();
186	// CHECK: call void @llvm.nvvm.membar.sys()
187	__nvvm_membar_sys();
188	// CHECK: call void @llvm.nvvm.barrier0()
189	__syncthreads();
190	}
191
192	__device__ int di;
193	__shared__ int si;
194	__device__ long dl;
195	__shared__ long sl;
196	__device__ long long dll;
197	__shared__ long long sll;
198
199	// Check for atomic intrinsics
200	// CHECK-LABEL: nvvm_atom
201	__device__ void nvvm_atom(float fp, float f, double dfp, double df, int *ip,
202	int i, unsigned int uip, unsigned ui, long lp,
203	long l, long long *llp, long long ll) {
204	// CHECK: atomicrmw add
205	__nvvm_atom_add_gen_i(ip, i);
206	// CHECK: atomicrmw add
207	__nvvm_atom_add_gen_l(&dl, l);
208	// CHECK: atomicrmw add
209	__nvvm_atom_add_gen_ll(&sll, ll);
210
211	// CHECK: atomicrmw sub
212	__nvvm_atom_sub_gen_i(ip, i);
213	// CHECK: atomicrmw sub
214	__nvvm_atom_sub_gen_l(&dl, l);
215	// CHECK: atomicrmw sub
216	__nvvm_atom_sub_gen_ll(&sll, ll);
217
218	// CHECK: atomicrmw and
219	__nvvm_atom_and_gen_i(ip, i);
220	// CHECK: atomicrmw and
221	__nvvm_atom_and_gen_l(&dl, l);
222	// CHECK: atomicrmw and
223	__nvvm_atom_and_gen_ll(&sll, ll);
224
225	// CHECK: atomicrmw or
226	__nvvm_atom_or_gen_i(ip, i);
227	// CHECK: atomicrmw or
228	__nvvm_atom_or_gen_l(&dl, l);
229	// CHECK: atomicrmw or
230	__nvvm_atom_or_gen_ll(&sll, ll);
231
232	// CHECK: atomicrmw xor
233	__nvvm_atom_xor_gen_i(ip, i);
234	// CHECK: atomicrmw xor
235	__nvvm_atom_xor_gen_l(&dl, l);
236	// CHECK: atomicrmw xor
237	__nvvm_atom_xor_gen_ll(&sll, ll);
238
239	// CHECK: atomicrmw xchg
240	__nvvm_atom_xchg_gen_i(ip, i);
241	// CHECK: atomicrmw xchg
242	__nvvm_atom_xchg_gen_l(&dl, l);
243	// CHECK: atomicrmw xchg
244	__nvvm_atom_xchg_gen_ll(&sll, ll);
245
246	// CHECK: atomicrmw max i32*
247	__nvvm_atom_max_gen_i(ip, i);
248	// CHECK: atomicrmw umax i32*
249	__nvvm_atom_max_gen_ui((unsigned int *)ip, i);
250	// CHECK: atomicrmw max
251	__nvvm_atom_max_gen_l(&dl, l);
252	// CHECK: atomicrmw umax
253	__nvvm_atom_max_gen_ul((unsigned long *)&dl, l);
254	// CHECK: atomicrmw max i64*
255	__nvvm_atom_max_gen_ll(&sll, ll);
256	// CHECK: atomicrmw umax i64*
257	__nvvm_atom_max_gen_ull((unsigned long long *)&sll, ll);
258
259	// CHECK: atomicrmw min i32*
260	__nvvm_atom_min_gen_i(ip, i);
261	// CHECK: atomicrmw umin i32*
262	__nvvm_atom_min_gen_ui((unsigned int *)ip, i);
263	// CHECK: atomicrmw min
264	__nvvm_atom_min_gen_l(&dl, l);
265	// CHECK: atomicrmw umin
266	__nvvm_atom_min_gen_ul((unsigned long *)&dl, l);
267	// CHECK: atomicrmw min i64*
268	__nvvm_atom_min_gen_ll(&sll, ll);
269	// CHECK: atomicrmw umin i64*
270	__nvvm_atom_min_gen_ull((unsigned long long *)&sll, ll);
271
272	// CHECK: cmpxchg
273	// CHECK-NEXT: extractvalue { i32, i1 } {{%[0-9]+}}, 0
274	__nvvm_atom_cas_gen_i(ip, 0, i);
275	// CHECK: cmpxchg
276	// CHECK-NEXT: extractvalue { {{i32\|i64}}, i1 } {{%[0-9]+}}, 0
277	__nvvm_atom_cas_gen_l(&dl, 0, l);
278	// CHECK: cmpxchg
279	// CHECK-NEXT: extractvalue { i64, i1 } {{%[0-9]+}}, 0
280	__nvvm_atom_cas_gen_ll(&sll, 0, ll);
281
282	// CHECK: call float @llvm.nvvm.atomic.load.add.f32.p0f32
283	__nvvm_atom_add_gen_f(fp, f);
284
285	// CHECK: call i32 @llvm.nvvm.atomic.load.inc.32.p0i32
286	__nvvm_atom_inc_gen_ui(uip, ui);
287
288	// CHECK: call i32 @llvm.nvvm.atomic.load.dec.32.p0i32
289	__nvvm_atom_dec_gen_ui(uip, ui);
290
291
292	//////////////////////////////////////////////////////////////////
293	// Atomics with scope (only supported on sm_60+).
294
295	#if ERROR_CHECK \|\| __CUDA_ARCH__ >= 600
296
297	// CHECK: call i32 @llvm.nvvm.atomic.add.gen.i.cta.i32.p0i32
298	// expected-error@+1 {{'__nvvm_atom_cta_add_gen_i' needs target feature sm_60}}
299	__nvvm_atom_cta_add_gen_i(ip, i);
300	// LP32: call i32 @llvm.nvvm.atomic.add.gen.i.cta.i32.p0i32
301	// LP64: call i64 @llvm.nvvm.atomic.add.gen.i.cta.i64.p0i64
302	// expected-error@+1 {{'__nvvm_atom_cta_add_gen_l' needs target feature sm_60}}
303	__nvvm_atom_cta_add_gen_l(&dl, l);
304	// CHECK: call i64 @llvm.nvvm.atomic.add.gen.i.cta.i64.p0i64
305	// expected-error@+1 {{'__nvvm_atom_cta_add_gen_ll' needs target feature sm_60}}
306	__nvvm_atom_cta_add_gen_ll(&sll, ll);
307	// CHECK: call i32 @llvm.nvvm.atomic.add.gen.i.sys.i32.p0i32
308	// expected-error@+1 {{'__nvvm_atom_sys_add_gen_i' needs target feature sm_60}}
309	__nvvm_atom_sys_add_gen_i(ip, i);
310	// LP32: call i32 @llvm.nvvm.atomic.add.gen.i.sys.i32.p0i32
311	// LP64: call i64 @llvm.nvvm.atomic.add.gen.i.sys.i64.p0i64
312	// expected-error@+1 {{'__nvvm_atom_sys_add_gen_l' needs target feature sm_60}}
313	__nvvm_atom_sys_add_gen_l(&dl, l);
314	// CHECK: call i64 @llvm.nvvm.atomic.add.gen.i.sys.i64.p0i64
315	// expected-error@+1 {{'__nvvm_atom_sys_add_gen_ll' needs target feature sm_60}}
316	__nvvm_atom_sys_add_gen_ll(&sll, ll);
317
318	// CHECK: call float @llvm.nvvm.atomic.add.gen.f.cta.f32.p0f32
319	// expected-error@+1 {{'__nvvm_atom_cta_add_gen_f' needs target feature sm_60}}
320	__nvvm_atom_cta_add_gen_f(fp, f);
321	// CHECK: call double @llvm.nvvm.atomic.add.gen.f.cta.f64.p0f64
322	// expected-error@+1 {{'__nvvm_atom_cta_add_gen_d' needs target feature sm_60}}
323	__nvvm_atom_cta_add_gen_d(dfp, df);
324	// CHECK: call float @llvm.nvvm.atomic.add.gen.f.sys.f32.p0f32
325	// expected-error@+1 {{'__nvvm_atom_sys_add_gen_f' needs target feature sm_60}}
326	__nvvm_atom_sys_add_gen_f(fp, f);
327	// CHECK: call double @llvm.nvvm.atomic.add.gen.f.sys.f64.p0f64
328	// expected-error@+1 {{'__nvvm_atom_sys_add_gen_d' needs target feature sm_60}}
329	__nvvm_atom_sys_add_gen_d(dfp, df);
330
331	// CHECK: call i32 @llvm.nvvm.atomic.exch.gen.i.cta.i32.p0i32
332	// expected-error@+1 {{'__nvvm_atom_cta_xchg_gen_i' needs target feature sm_60}}
333	__nvvm_atom_cta_xchg_gen_i(ip, i);
334	// LP32: call i32 @llvm.nvvm.atomic.exch.gen.i.cta.i32.p0i32
335	// LP64: call i64 @llvm.nvvm.atomic.exch.gen.i.cta.i64.p0i64
336	// expected-error@+1 {{'__nvvm_atom_cta_xchg_gen_l' needs target feature sm_60}}
337	__nvvm_atom_cta_xchg_gen_l(&dl, l);
338	// CHECK: call i64 @llvm.nvvm.atomic.exch.gen.i.cta.i64.p0i64
339	// expected-error@+1 {{'__nvvm_atom_cta_xchg_gen_ll' needs target feature sm_60}}
340	__nvvm_atom_cta_xchg_gen_ll(&sll, ll);
341
342	// CHECK: call i32 @llvm.nvvm.atomic.exch.gen.i.sys.i32.p0i32
343	// expected-error@+1 {{'__nvvm_atom_sys_xchg_gen_i' needs target feature sm_60}}
344	__nvvm_atom_sys_xchg_gen_i(ip, i);
345	// LP32: call i32 @llvm.nvvm.atomic.exch.gen.i.sys.i32.p0i32
346	// LP64: call i64 @llvm.nvvm.atomic.exch.gen.i.sys.i64.p0i64
347	// expected-error@+1 {{'__nvvm_atom_sys_xchg_gen_l' needs target feature sm_60}}
348	__nvvm_atom_sys_xchg_gen_l(&dl, l);
349	// CHECK: call i64 @llvm.nvvm.atomic.exch.gen.i.sys.i64.p0i64
350	// expected-error@+1 {{'__nvvm_atom_sys_xchg_gen_ll' needs target feature sm_60}}
351	__nvvm_atom_sys_xchg_gen_ll(&sll, ll);
352
353	// CHECK: call i32 @llvm.nvvm.atomic.max.gen.i.cta.i32.p0i32
354	// expected-error@+1 {{'__nvvm_atom_cta_max_gen_i' needs target feature sm_60}}
355	__nvvm_atom_cta_max_gen_i(ip, i);
356	// CHECK: call i32 @llvm.nvvm.atomic.max.gen.i.cta.i32.p0i32
357	// expected-error@+1 {{'__nvvm_atom_cta_max_gen_ui' needs target feature sm_60}}
358	__nvvm_atom_cta_max_gen_ui((unsigned int *)ip, i);
359	// LP32: call i32 @llvm.nvvm.atomic.max.gen.i.cta.i32.p0i32
360	// LP64: call i64 @llvm.nvvm.atomic.max.gen.i.cta.i64.p0i64
361	// expected-error@+1 {{'__nvvm_atom_cta_max_gen_l' needs target feature sm_60}}
362	__nvvm_atom_cta_max_gen_l(&dl, l);
363	// LP32: call i32 @llvm.nvvm.atomic.max.gen.i.cta.i32.p0i32
364	// LP64: call i64 @llvm.nvvm.atomic.max.gen.i.cta.i64.p0i64
365	// expected-error@+1 {{'__nvvm_atom_cta_max_gen_ul' needs target feature sm_60}}
366	__nvvm_atom_cta_max_gen_ul((unsigned long *)lp, l);
367	// CHECK: call i64 @llvm.nvvm.atomic.max.gen.i.cta.i64.p0i64
368	// expected-error@+1 {{'__nvvm_atom_cta_max_gen_ll' needs target feature sm_60}}
369	__nvvm_atom_cta_max_gen_ll(&sll, ll);
370	// CHECK: call i64 @llvm.nvvm.atomic.max.gen.i.cta.i64.p0i64
371	// expected-error@+1 {{'__nvvm_atom_cta_max_gen_ull' needs target feature sm_60}}
372	__nvvm_atom_cta_max_gen_ull((unsigned long long *)llp, ll);
373
374	// CHECK: call i32 @llvm.nvvm.atomic.max.gen.i.sys.i32.p0i32
375	// expected-error@+1 {{'__nvvm_atom_sys_max_gen_i' needs target feature sm_60}}
376	__nvvm_atom_sys_max_gen_i(ip, i);
377	// CHECK: call i32 @llvm.nvvm.atomic.max.gen.i.sys.i32.p0i32
378	// expected-error@+1 {{'__nvvm_atom_sys_max_gen_ui' needs target feature sm_60}}
379	__nvvm_atom_sys_max_gen_ui((unsigned int *)ip, i);
380	// LP32: call i32 @llvm.nvvm.atomic.max.gen.i.sys.i32.p0i32
381	// LP64: call i64 @llvm.nvvm.atomic.max.gen.i.sys.i64.p0i64
382	// expected-error@+1 {{'__nvvm_atom_sys_max_gen_l' needs target feature sm_60}}
383	__nvvm_atom_sys_max_gen_l(&dl, l);
384	// LP32: call i32 @llvm.nvvm.atomic.max.gen.i.sys.i32.p0i32
385	// LP64: call i64 @llvm.nvvm.atomic.max.gen.i.sys.i64.p0i64
386	// expected-error@+1 {{'__nvvm_atom_sys_max_gen_ul' needs target feature sm_60}}
387	__nvvm_atom_sys_max_gen_ul((unsigned long *)lp, l);
388	// CHECK: call i64 @llvm.nvvm.atomic.max.gen.i.sys.i64.p0i64
389	// expected-error@+1 {{'__nvvm_atom_sys_max_gen_ll' needs target feature sm_60}}
390	__nvvm_atom_sys_max_gen_ll(&sll, ll);
391	// CHECK: call i64 @llvm.nvvm.atomic.max.gen.i.sys.i64.p0i64
392	// expected-error@+1 {{'__nvvm_atom_sys_max_gen_ull' needs target feature sm_60}}
393	__nvvm_atom_sys_max_gen_ull((unsigned long long *)llp, ll);
394
395	// CHECK: call i32 @llvm.nvvm.atomic.min.gen.i.cta.i32.p0i32
396	// expected-error@+1 {{'__nvvm_atom_cta_min_gen_i' needs target feature sm_60}}
397	__nvvm_atom_cta_min_gen_i(ip, i);
398	// CHECK: call i32 @llvm.nvvm.atomic.min.gen.i.cta.i32.p0i32
399	// expected-error@+1 {{'__nvvm_atom_cta_min_gen_ui' needs target feature sm_60}}
400	__nvvm_atom_cta_min_gen_ui((unsigned int *)ip, i);
401	// LP32: call i32 @llvm.nvvm.atomic.min.gen.i.cta.i32.p0i32
402	// LP64: call i64 @llvm.nvvm.atomic.min.gen.i.cta.i64.p0i64
403	// expected-error@+1 {{'__nvvm_atom_cta_min_gen_l' needs target feature sm_60}}
404	__nvvm_atom_cta_min_gen_l(&dl, l);
405	// LP32: call i32 @llvm.nvvm.atomic.min.gen.i.cta.i32.p0i32
406	// LP64: call i64 @llvm.nvvm.atomic.min.gen.i.cta.i64.p0i64
407	// expected-error@+1 {{'__nvvm_atom_cta_min_gen_ul' needs target feature sm_60}}
408	__nvvm_atom_cta_min_gen_ul((unsigned long *)lp, l);
409	// CHECK: call i64 @llvm.nvvm.atomic.min.gen.i.cta.i64.p0i64
410	// expected-error@+1 {{'__nvvm_atom_cta_min_gen_ll' needs target feature sm_60}}
411	__nvvm_atom_cta_min_gen_ll(&sll, ll);
412	// CHECK: call i64 @llvm.nvvm.atomic.min.gen.i.cta.i64.p0i64
413	// expected-error@+1 {{'__nvvm_atom_cta_min_gen_ull' needs target feature sm_60}}
414	__nvvm_atom_cta_min_gen_ull((unsigned long long *)llp, ll);
415
416	// CHECK: call i32 @llvm.nvvm.atomic.min.gen.i.sys.i32.p0i32
417	// expected-error@+1 {{'__nvvm_atom_sys_min_gen_i' needs target feature sm_60}}
418	__nvvm_atom_sys_min_gen_i(ip, i);
419	// CHECK: call i32 @llvm.nvvm.atomic.min.gen.i.sys.i32.p0i32
420	// expected-error@+1 {{'__nvvm_atom_sys_min_gen_ui' needs target feature sm_60}}
421	__nvvm_atom_sys_min_gen_ui((unsigned int *)ip, i);
422	// LP32: call i32 @llvm.nvvm.atomic.min.gen.i.sys.i32.p0i32
423	// LP64: call i64 @llvm.nvvm.atomic.min.gen.i.sys.i64.p0i64
424	// expected-error@+1 {{'__nvvm_atom_sys_min_gen_l' needs target feature sm_60}}
425	__nvvm_atom_sys_min_gen_l(&dl, l);
426	// LP32: call i32 @llvm.nvvm.atomic.min.gen.i.sys.i32.p0i32
427	// LP64: call i64 @llvm.nvvm.atomic.min.gen.i.sys.i64.p0i64
428	// expected-error@+1 {{'__nvvm_atom_sys_min_gen_ul' needs target feature sm_60}}
429	__nvvm_atom_sys_min_gen_ul((unsigned long *)lp, l);
430	// CHECK: call i64 @llvm.nvvm.atomic.min.gen.i.sys.i64.p0i64
431	// expected-error@+1 {{'__nvvm_atom_sys_min_gen_ll' needs target feature sm_60}}
432	__nvvm_atom_sys_min_gen_ll(&sll, ll);
433	// CHECK: call i64 @llvm.nvvm.atomic.min.gen.i.sys.i64.p0i64
434	// expected-error@+1 {{'__nvvm_atom_sys_min_gen_ull' needs target feature sm_60}}
435	__nvvm_atom_sys_min_gen_ull((unsigned long long *)llp, ll);
436
437	// CHECK: call i32 @llvm.nvvm.atomic.inc.gen.i.cta.i32.p0i32
438	// expected-error@+1 {{'__nvvm_atom_cta_inc_gen_ui' needs target feature sm_60}}
439	__nvvm_atom_cta_inc_gen_ui((unsigned int *)ip, i);
440	// CHECK: call i32 @llvm.nvvm.atomic.inc.gen.i.sys.i32.p0i32
441	// expected-error@+1 {{'__nvvm_atom_sys_inc_gen_ui' needs target feature sm_60}}
442	__nvvm_atom_sys_inc_gen_ui((unsigned int *)ip, i);
443
444	// CHECK: call i32 @llvm.nvvm.atomic.dec.gen.i.cta.i32.p0i32
445	// expected-error@+1 {{'__nvvm_atom_cta_dec_gen_ui' needs target feature sm_60}}
446	__nvvm_atom_cta_dec_gen_ui((unsigned int *)ip, i);
447	// CHECK: call i32 @llvm.nvvm.atomic.dec.gen.i.sys.i32.p0i32
448	// expected-error@+1 {{'__nvvm_atom_sys_dec_gen_ui' needs target feature sm_60}}
449	__nvvm_atom_sys_dec_gen_ui((unsigned int *)ip, i);
450
451	// CHECK: call i32 @llvm.nvvm.atomic.and.gen.i.cta.i32.p0i32
452	// expected-error@+1 {{'__nvvm_atom_cta_and_gen_i' needs target feature sm_60}}
453	__nvvm_atom_cta_and_gen_i(ip, i);
454	// LP32: call i32 @llvm.nvvm.atomic.and.gen.i.cta.i32.p0i32
455	// LP64: call i64 @llvm.nvvm.atomic.and.gen.i.cta.i64.p0i64
456	// expected-error@+1 {{'__nvvm_atom_cta_and_gen_l' needs target feature sm_60}}
457	__nvvm_atom_cta_and_gen_l(&dl, l);
458	// CHECK: call i64 @llvm.nvvm.atomic.and.gen.i.cta.i64.p0i64
459	// expected-error@+1 {{'__nvvm_atom_cta_and_gen_ll' needs target feature sm_60}}
460	__nvvm_atom_cta_and_gen_ll(&sll, ll);
461
462	// CHECK: call i32 @llvm.nvvm.atomic.and.gen.i.sys.i32.p0i32
463	// expected-error@+1 {{'__nvvm_atom_sys_and_gen_i' needs target feature sm_60}}
464	__nvvm_atom_sys_and_gen_i(ip, i);
465	// LP32: call i32 @llvm.nvvm.atomic.and.gen.i.sys.i32.p0i32
466	// LP64: call i64 @llvm.nvvm.atomic.and.gen.i.sys.i64.p0i64
467	// expected-error@+1 {{'__nvvm_atom_sys_and_gen_l' needs target feature sm_60}}
468	__nvvm_atom_sys_and_gen_l(&dl, l);
469	// CHECK: call i64 @llvm.nvvm.atomic.and.gen.i.sys.i64.p0i64
470	// expected-error@+1 {{'__nvvm_atom_sys_and_gen_ll' needs target feature sm_60}}
471	__nvvm_atom_sys_and_gen_ll(&sll, ll);
472
473	// CHECK: call i32 @llvm.nvvm.atomic.or.gen.i.cta.i32.p0i32
474	// expected-error@+1 {{'__nvvm_atom_cta_or_gen_i' needs target feature sm_60}}
475	__nvvm_atom_cta_or_gen_i(ip, i);
476	// LP32: call i32 @llvm.nvvm.atomic.or.gen.i.cta.i32.p0i32
477	// LP64: call i64 @llvm.nvvm.atomic.or.gen.i.cta.i64.p0i64
478	// expected-error@+1 {{'__nvvm_atom_cta_or_gen_l' needs target feature sm_60}}
479	__nvvm_atom_cta_or_gen_l(&dl, l);
480	// CHECK: call i64 @llvm.nvvm.atomic.or.gen.i.cta.i64.p0i64
481	// expected-error@+1 {{'__nvvm_atom_cta_or_gen_ll' needs target feature sm_60}}
482	__nvvm_atom_cta_or_gen_ll(&sll, ll);
483
484	// CHECK: call i32 @llvm.nvvm.atomic.or.gen.i.sys.i32.p0i32
485	// expected-error@+1 {{'__nvvm_atom_sys_or_gen_i' needs target feature sm_60}}
486	__nvvm_atom_sys_or_gen_i(ip, i);
487	// LP32: call i32 @llvm.nvvm.atomic.or.gen.i.sys.i32.p0i32
488	// LP64: call i64 @llvm.nvvm.atomic.or.gen.i.sys.i64.p0i64
489	// expected-error@+1 {{'__nvvm_atom_sys_or_gen_l' needs target feature sm_60}}
490	__nvvm_atom_sys_or_gen_l(&dl, l);
491	// CHECK: call i64 @llvm.nvvm.atomic.or.gen.i.sys.i64.p0i64
492	// expected-error@+1 {{'__nvvm_atom_sys_or_gen_ll' needs target feature sm_60}}
493	__nvvm_atom_sys_or_gen_ll(&sll, ll);
494
495	// CHECK: call i32 @llvm.nvvm.atomic.xor.gen.i.cta.i32.p0i32
496	// expected-error@+1 {{'__nvvm_atom_cta_xor_gen_i' needs target feature sm_60}}
497	__nvvm_atom_cta_xor_gen_i(ip, i);
498	// LP32: call i32 @llvm.nvvm.atomic.xor.gen.i.cta.i32.p0i32
499	// LP64: call i64 @llvm.nvvm.atomic.xor.gen.i.cta.i64.p0i64
500	// expected-error@+1 {{'__nvvm_atom_cta_xor_gen_l' needs target feature sm_60}}
501	__nvvm_atom_cta_xor_gen_l(&dl, l);
502	// CHECK: call i64 @llvm.nvvm.atomic.xor.gen.i.cta.i64.p0i64
503	// expected-error@+1 {{'__nvvm_atom_cta_xor_gen_ll' needs target feature sm_60}}
504	__nvvm_atom_cta_xor_gen_ll(&sll, ll);
505
506	// CHECK: call i32 @llvm.nvvm.atomic.xor.gen.i.sys.i32.p0i32
507	// expected-error@+1 {{'__nvvm_atom_sys_xor_gen_i' needs target feature sm_60}}
508	__nvvm_atom_sys_xor_gen_i(ip, i);
509	// LP32: call i32 @llvm.nvvm.atomic.xor.gen.i.sys.i32.p0i32
510	// LP64: call i64 @llvm.nvvm.atomic.xor.gen.i.sys.i64.p0i64
511	// expected-error@+1 {{'__nvvm_atom_sys_xor_gen_l' needs target feature sm_60}}
512	__nvvm_atom_sys_xor_gen_l(&dl, l);
513	// CHECK: call i64 @llvm.nvvm.atomic.xor.gen.i.sys.i64.p0i64
514	// expected-error@+1 {{'__nvvm_atom_sys_xor_gen_ll' needs target feature sm_60}}
515	__nvvm_atom_sys_xor_gen_ll(&sll, ll);
516
517	// CHECK: call i32 @llvm.nvvm.atomic.cas.gen.i.cta.i32.p0i32
518	// expected-error@+1 {{'__nvvm_atom_cta_cas_gen_i' needs target feature sm_60}}
519	__nvvm_atom_cta_cas_gen_i(ip, i, 0);
520	// LP32: call i32 @llvm.nvvm.atomic.cas.gen.i.cta.i32.p0i32
521	// LP64: call i64 @llvm.nvvm.atomic.cas.gen.i.cta.i64.p0i64
522	// expected-error@+1 {{'__nvvm_atom_cta_cas_gen_l' needs target feature sm_60}}
523	__nvvm_atom_cta_cas_gen_l(&dl, l, 0);
524	// CHECK: call i64 @llvm.nvvm.atomic.cas.gen.i.cta.i64.p0i64
525	// expected-error@+1 {{'__nvvm_atom_cta_cas_gen_ll' needs target feature sm_60}}
526	__nvvm_atom_cta_cas_gen_ll(&sll, ll, 0);
527
528	// CHECK: call i32 @llvm.nvvm.atomic.cas.gen.i.sys.i32.p0i32
529	// expected-error@+1 {{'__nvvm_atom_sys_cas_gen_i' needs target feature sm_60}}
530	__nvvm_atom_sys_cas_gen_i(ip, i, 0);
531	// LP32: call i32 @llvm.nvvm.atomic.cas.gen.i.sys.i32.p0i32
532	// LP64: call i64 @llvm.nvvm.atomic.cas.gen.i.sys.i64.p0i64
533	// expected-error@+1 {{'__nvvm_atom_sys_cas_gen_l' needs target feature sm_60}}
534	__nvvm_atom_sys_cas_gen_l(&dl, l, 0);
535	// CHECK: call i64 @llvm.nvvm.atomic.cas.gen.i.sys.i64.p0i64
536	// expected-error@+1 {{'__nvvm_atom_sys_cas_gen_ll' needs target feature sm_60}}
537	__nvvm_atom_sys_cas_gen_ll(&sll, ll, 0);
538	#endif
539
540	// CHECK: ret
541	}
542
543	// CHECK-LABEL: nvvm_ldg
544	__device__ void nvvm_ldg(const void *p) {
545	// CHECK: call i8 @llvm.nvvm.ldg.global.i.i8.p0i8(i8* {{%[0-9]+}}, i32 1)
546	// CHECK: call i8 @llvm.nvvm.ldg.global.i.i8.p0i8(i8* {{%[0-9]+}}, i32 1)
547	__nvvm_ldg_c((const char *)p);
548	__nvvm_ldg_uc((const unsigned char *)p);
549
550	// CHECK: call i16 @llvm.nvvm.ldg.global.i.i16.p0i16(i16* {{%[0-9]+}}, i32 2)
551	// CHECK: call i16 @llvm.nvvm.ldg.global.i.i16.p0i16(i16* {{%[0-9]+}}, i32 2)
552	__nvvm_ldg_s((const short *)p);
553	__nvvm_ldg_us((const unsigned short *)p);
554
555	// CHECK: call i32 @llvm.nvvm.ldg.global.i.i32.p0i32(i32* {{%[0-9]+}}, i32 4)
556	// CHECK: call i32 @llvm.nvvm.ldg.global.i.i32.p0i32(i32* {{%[0-9]+}}, i32 4)
557	__nvvm_ldg_i((const int *)p);
558	__nvvm_ldg_ui((const unsigned int *)p);
559
560	// LP32: call i32 @llvm.nvvm.ldg.global.i.i32.p0i32(i32* {{%[0-9]+}}, i32 4)
561	// LP32: call i32 @llvm.nvvm.ldg.global.i.i32.p0i32(i32* {{%[0-9]+}}, i32 4)
562	// LP64: call i64 @llvm.nvvm.ldg.global.i.i64.p0i64(i64* {{%[0-9]+}}, i32 8)
563	// LP64: call i64 @llvm.nvvm.ldg.global.i.i64.p0i64(i64* {{%[0-9]+}}, i32 8)
564	__nvvm_ldg_l((const long *)p);
565	__nvvm_ldg_ul((const unsigned long *)p);
566
567	// CHECK: call float @llvm.nvvm.ldg.global.f.f32.p0f32(float* {{%[0-9]+}}, i32 4)
568	__nvvm_ldg_f((const float *)p);
569	// CHECK: call double @llvm.nvvm.ldg.global.f.f64.p0f64(double* {{%[0-9]+}}, i32 8)
570	__nvvm_ldg_d((const double *)p);
571
572	// In practice, the pointers we pass to __ldg will be aligned as appropriate
573	// for the CUDA <type>N vector types (e.g. short4), which are not the same as
574	// the LLVM vector types. However, each LLVM vector type has an alignment
575	// less than or equal to its corresponding CUDA type, so we're OK.
576	//
577	// PTX Interoperability section 2.2: "For a vector with an even number of
578	// elements, its alignment is set to number of elements times the alignment of
579	// its member: n*alignof(t)."
580
581	// CHECK: call <2 x i8> @llvm.nvvm.ldg.global.i.v2i8.p0v2i8(<2 x i8>* {{%[0-9]+}}, i32 2)
582	// CHECK: call <2 x i8> @llvm.nvvm.ldg.global.i.v2i8.p0v2i8(<2 x i8>* {{%[0-9]+}}, i32 2)
583	typedef char char2 __attribute__((ext_vector_type(2)));
584	typedef unsigned char uchar2 __attribute__((ext_vector_type(2)));
585	__nvvm_ldg_c2((const char2 *)p);
586	__nvvm_ldg_uc2((const uchar2 *)p);
587
588	// CHECK: call <4 x i8> @llvm.nvvm.ldg.global.i.v4i8.p0v4i8(<4 x i8>* {{%[0-9]+}}, i32 4)
589	// CHECK: call <4 x i8> @llvm.nvvm.ldg.global.i.v4i8.p0v4i8(<4 x i8>* {{%[0-9]+}}, i32 4)
590	typedef char char4 __attribute__((ext_vector_type(4)));
591	typedef unsigned char uchar4 __attribute__((ext_vector_type(4)));
592	__nvvm_ldg_c4((const char4 *)p);
593	__nvvm_ldg_uc4((const uchar4 *)p);
594
595	// CHECK: call <2 x i16> @llvm.nvvm.ldg.global.i.v2i16.p0v2i16(<2 x i16>* {{%[0-9]+}}, i32 4)
596	// CHECK: call <2 x i16> @llvm.nvvm.ldg.global.i.v2i16.p0v2i16(<2 x i16>* {{%[0-9]+}}, i32 4)
597	typedef short short2 __attribute__((ext_vector_type(2)));
598	typedef unsigned short ushort2 __attribute__((ext_vector_type(2)));
599	__nvvm_ldg_s2((const short2 *)p);
600	__nvvm_ldg_us2((const ushort2 *)p);
601
602	// CHECK: call <4 x i16> @llvm.nvvm.ldg.global.i.v4i16.p0v4i16(<4 x i16>* {{%[0-9]+}}, i32 8)
603	// CHECK: call <4 x i16> @llvm.nvvm.ldg.global.i.v4i16.p0v4i16(<4 x i16>* {{%[0-9]+}}, i32 8)
604	typedef short short4 __attribute__((ext_vector_type(4)));
605	typedef unsigned short ushort4 __attribute__((ext_vector_type(4)));
606	__nvvm_ldg_s4((const short4 *)p);
607	__nvvm_ldg_us4((const ushort4 *)p);
608
609	// CHECK: call <2 x i32> @llvm.nvvm.ldg.global.i.v2i32.p0v2i32(<2 x i32>* {{%[0-9]+}}, i32 8)
610	// CHECK: call <2 x i32> @llvm.nvvm.ldg.global.i.v2i32.p0v2i32(<2 x i32>* {{%[0-9]+}}, i32 8)
611	typedef int int2 __attribute__((ext_vector_type(2)));
612	typedef unsigned int uint2 __attribute__((ext_vector_type(2)));
613	__nvvm_ldg_i2((const int2 *)p);
614	__nvvm_ldg_ui2((const uint2 *)p);
615
616	// CHECK: call <4 x i32> @llvm.nvvm.ldg.global.i.v4i32.p0v4i32(<4 x i32>* {{%[0-9]+}}, i32 16)
617	// CHECK: call <4 x i32> @llvm.nvvm.ldg.global.i.v4i32.p0v4i32(<4 x i32>* {{%[0-9]+}}, i32 16)
618	typedef int int4 __attribute__((ext_vector_type(4)));
619	typedef unsigned int uint4 __attribute__((ext_vector_type(4)));
620	__nvvm_ldg_i4((const int4 *)p);
621	__nvvm_ldg_ui4((const uint4 *)p);
622
623	// CHECK: call <2 x i64> @llvm.nvvm.ldg.global.i.v2i64.p0v2i64(<2 x i64>* {{%[0-9]+}}, i32 16)
624	// CHECK: call <2 x i64> @llvm.nvvm.ldg.global.i.v2i64.p0v2i64(<2 x i64>* {{%[0-9]+}}, i32 16)
625	typedef long long longlong2 __attribute__((ext_vector_type(2)));
626	typedef unsigned long long ulonglong2 __attribute__((ext_vector_type(2)));
627	__nvvm_ldg_ll2((const longlong2 *)p);
628	__nvvm_ldg_ull2((const ulonglong2 *)p);
629
630	// CHECK: call <2 x float> @llvm.nvvm.ldg.global.f.v2f32.p0v2f32(<2 x float>* {{%[0-9]+}}, i32 8)
631	typedef float float2 __attribute__((ext_vector_type(2)));
632	__nvvm_ldg_f2((const float2 *)p);
633
634	// CHECK: call <4 x float> @llvm.nvvm.ldg.global.f.v4f32.p0v4f32(<4 x float>* {{%[0-9]+}}, i32 16)
635	typedef float float4 __attribute__((ext_vector_type(4)));
636	__nvvm_ldg_f4((const float4 *)p);
637
638	// CHECK: call <2 x double> @llvm.nvvm.ldg.global.f.v2f64.p0v2f64(<2 x double>* {{%[0-9]+}}, i32 16)
639	typedef double double2 __attribute__((ext_vector_type(2)));
640	__nvvm_ldg_d2((const double2 *)p);
641	}
642
643	// CHECK-LABEL: nvvm_shfl
644	__device__ void nvvm_shfl(int i, float f, int a, int b) {
645	// CHECK: call i32 @llvm.nvvm.shfl.down.i32(i32
646	__nvvm_shfl_down_i32(i, a, b);
647	// CHECK: call float @llvm.nvvm.shfl.down.f32(float
648	__nvvm_shfl_down_f32(f, a, b);
649	// CHECK: call i32 @llvm.nvvm.shfl.up.i32(i32
650	__nvvm_shfl_up_i32(i, a, b);
651	// CHECK: call float @llvm.nvvm.shfl.up.f32(float
652	__nvvm_shfl_up_f32(f, a, b);
653	// CHECK: call i32 @llvm.nvvm.shfl.bfly.i32(i32
654	__nvvm_shfl_bfly_i32(i, a, b);
655	// CHECK: call float @llvm.nvvm.shfl.bfly.f32(float
656	__nvvm_shfl_bfly_f32(f, a, b);
657	// CHECK: call i32 @llvm.nvvm.shfl.idx.i32(i32
658	__nvvm_shfl_idx_i32(i, a, b);
659	// CHECK: call float @llvm.nvvm.shfl.idx.f32(float
660	__nvvm_shfl_idx_f32(f, a, b);
661	// CHECK: ret void
662	}
663
664	__device__ void nvvm_vote(int pred) {
665	// CHECK: call i1 @llvm.nvvm.vote.all(i1
666	__nvvm_vote_all(pred);
667	// CHECK: call i1 @llvm.nvvm.vote.any(i1
668	__nvvm_vote_any(pred);
669	// CHECK: call i1 @llvm.nvvm.vote.uni(i1
670	__nvvm_vote_uni(pred);
671	// CHECK: call i32 @llvm.nvvm.vote.ballot(i1
672	__nvvm_vote_ballot(pred);
673	// CHECK: ret void
674	}
675

Clang Project