aarch64-neon-vget.c source code [clang_source_code/test/CodeGen/aarch64-neon-vget.c]

1	// RUN: %clang_cc1 -triple arm64-apple-darwin -target-feature +neon \
2	// RUN: -fallow-half-arguments-and-returns -disable-O0-optnone -emit-llvm -o - %s \
3	// RUN: \| opt -S -mem2reg \| FileCheck %s
4
5	#include <arm_neon.h>
6
7	// CHECK-LABEL: define i8 @test_vget_lane_u8(<8 x i8> %a) #0 {
8	// CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i8> %a, i32 7
9	// CHECK: ret i8 [[VGET_LANE]]
10	uint8_t test_vget_lane_u8(uint8x8_t a) {
11	return vget_lane_u8(a, 7);
12	}
13
14	// CHECK-LABEL: define i16 @test_vget_lane_u16(<4 x i16> %a) #0 {
15	// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
16	// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
17	// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
18	// CHECK: ret i16 [[VGET_LANE]]
19	uint16_t test_vget_lane_u16(uint16x4_t a) {
20	return vget_lane_u16(a, 3);
21	}
22
23	// CHECK-LABEL: define i32 @test_vget_lane_u32(<2 x i32> %a) #0 {
24	// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
25	// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
26	// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
27	// CHECK: ret i32 [[VGET_LANE]]
28	uint32_t test_vget_lane_u32(uint32x2_t a) {
29	return vget_lane_u32(a, 1);
30	}
31
32	// CHECK-LABEL: define i8 @test_vget_lane_s8(<8 x i8> %a) #0 {
33	// CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i8> %a, i32 7
34	// CHECK: ret i8 [[VGET_LANE]]
35	int8_t test_vget_lane_s8(int8x8_t a) {
36	return vget_lane_s8(a, 7);
37	}
38
39	// CHECK-LABEL: define i16 @test_vget_lane_s16(<4 x i16> %a) #0 {
40	// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
41	// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
42	// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
43	// CHECK: ret i16 [[VGET_LANE]]
44	int16_t test_vget_lane_s16(int16x4_t a) {
45	return vget_lane_s16(a, 3);
46	}
47
48	// CHECK-LABEL: define i32 @test_vget_lane_s32(<2 x i32> %a) #0 {
49	// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
50	// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
51	// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
52	// CHECK: ret i32 [[VGET_LANE]]
53	int32_t test_vget_lane_s32(int32x2_t a) {
54	return vget_lane_s32(a, 1);
55	}
56
57	// CHECK-LABEL: define i8 @test_vget_lane_p8(<8 x i8> %a) #0 {
58	// CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i8> %a, i32 7
59	// CHECK: ret i8 [[VGET_LANE]]
60	poly8_t test_vget_lane_p8(poly8x8_t a) {
61	return vget_lane_p8(a, 7);
62	}
63
64	// CHECK-LABEL: define i16 @test_vget_lane_p16(<4 x i16> %a) #0 {
65	// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
66	// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
67	// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
68	// CHECK: ret i16 [[VGET_LANE]]
69	poly16_t test_vget_lane_p16(poly16x4_t a) {
70	return vget_lane_p16(a, 3);
71	}
72
73	// CHECK-LABEL: define float @test_vget_lane_f32(<2 x float> %a) #0 {
74	// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
75	// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
76	// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x float> [[TMP1]], i32 1
77	// CHECK: ret float [[VGET_LANE]]
78	float32_t test_vget_lane_f32(float32x2_t a) {
79	return vget_lane_f32(a, 1);
80	}
81
82	// CHECK-LABEL: define float @test_vget_lane_f16(<4 x half> %a) #0 {
83	// CHECK: [[__REINT_242:%.*]] = alloca <4 x half>, align 8
84	// CHECK: [[__REINT1_242:%.*]] = alloca i16, align 2
85	// CHECK: store <4 x half> %a, <4 x half>* [[__REINT_242]], align 8
86	// CHECK: [[TMP0:%.]] = bitcast <4 x half> [[__REINT_242]] to <4 x i16>*
87	// CHECK: [[TMP1:%.]] = load <4 x i16>, <4 x i16> [[TMP0]], align 8
88	// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
89	// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
90	// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP3]], i32 1
91	// CHECK: store i16 [[VGET_LANE]], i16* [[__REINT1_242]], align 2
92	// CHECK: [[TMP4:%.]] = bitcast i16 [[__REINT1_242]] to half*
93	// CHECK: [[TMP5:%.]] = load half, half [[TMP4]], align 2
94	// CHECK: [[CONV:%.*]] = fpext half [[TMP5]] to float
95	// CHECK: ret float [[CONV]]
96	float32_t test_vget_lane_f16(float16x4_t a) {
97	return vget_lane_f16(a, 1);
98	}
99
100	// CHECK-LABEL: define i8 @test_vgetq_lane_u8(<16 x i8> %a) #1 {
101	// CHECK: [[VGETQ_LANE:%.*]] = extractelement <16 x i8> %a, i32 15
102	// CHECK: ret i8 [[VGETQ_LANE]]
103	uint8_t test_vgetq_lane_u8(uint8x16_t a) {
104	return vgetq_lane_u8(a, 15);
105	}
106
107	// CHECK-LABEL: define i16 @test_vgetq_lane_u16(<8 x i16> %a) #1 {
108	// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
109	// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
110	// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
111	// CHECK: ret i16 [[VGETQ_LANE]]
112	uint16_t test_vgetq_lane_u16(uint16x8_t a) {
113	return vgetq_lane_u16(a, 7);
114	}
115
116	// CHECK-LABEL: define i32 @test_vgetq_lane_u32(<4 x i32> %a) #1 {
117	// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
118	// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
119	// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
120	// CHECK: ret i32 [[VGETQ_LANE]]
121	uint32_t test_vgetq_lane_u32(uint32x4_t a) {
122	return vgetq_lane_u32(a, 3);
123	}
124
125	// CHECK-LABEL: define i8 @test_vgetq_lane_s8(<16 x i8> %a) #1 {
126	// CHECK: [[VGETQ_LANE:%.*]] = extractelement <16 x i8> %a, i32 15
127	// CHECK: ret i8 [[VGETQ_LANE]]
128	int8_t test_vgetq_lane_s8(int8x16_t a) {
129	return vgetq_lane_s8(a, 15);
130	}
131
132	// CHECK-LABEL: define i16 @test_vgetq_lane_s16(<8 x i16> %a) #1 {
133	// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
134	// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
135	// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
136	// CHECK: ret i16 [[VGETQ_LANE]]
137	int16_t test_vgetq_lane_s16(int16x8_t a) {
138	return vgetq_lane_s16(a, 7);
139	}
140
141	// CHECK-LABEL: define i32 @test_vgetq_lane_s32(<4 x i32> %a) #1 {
142	// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
143	// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
144	// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
145	// CHECK: ret i32 [[VGETQ_LANE]]
146	int32_t test_vgetq_lane_s32(int32x4_t a) {
147	return vgetq_lane_s32(a, 3);
148	}
149
150	// CHECK-LABEL: define i8 @test_vgetq_lane_p8(<16 x i8> %a) #1 {
151	// CHECK: [[VGETQ_LANE:%.*]] = extractelement <16 x i8> %a, i32 15
152	// CHECK: ret i8 [[VGETQ_LANE]]
153	poly8_t test_vgetq_lane_p8(poly8x16_t a) {
154	return vgetq_lane_p8(a, 15);
155	}
156
157	// CHECK-LABEL: define i16 @test_vgetq_lane_p16(<8 x i16> %a) #1 {
158	// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
159	// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
160	// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
161	// CHECK: ret i16 [[VGETQ_LANE]]
162	poly16_t test_vgetq_lane_p16(poly16x8_t a) {
163	return vgetq_lane_p16(a, 7);
164	}
165
166	// CHECK-LABEL: define float @test_vgetq_lane_f32(<4 x float> %a) #1 {
167	// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
168	// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
169	// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
170	// CHECK: ret float [[VGETQ_LANE]]
171	float32_t test_vgetq_lane_f32(float32x4_t a) {
172	return vgetq_lane_f32(a, 3);
173	}
174
175	// CHECK-LABEL: define float @test_vgetq_lane_f16(<8 x half> %a) #1 {
176	// CHECK: [[__REINT_244:%.*]] = alloca <8 x half>, align 16
177	// CHECK: [[__REINT1_244:%.*]] = alloca i16, align 2
178	// CHECK: store <8 x half> %a, <8 x half>* [[__REINT_244]], align 16
179	// CHECK: [[TMP0:%.]] = bitcast <8 x half> [[__REINT_244]] to <8 x i16>*
180	// CHECK: [[TMP1:%.]] = load <8 x i16>, <8 x i16> [[TMP0]], align 16
181	// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
182	// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
183	// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP3]], i32 3
184	// CHECK: store i16 [[VGETQ_LANE]], i16* [[__REINT1_244]], align 2
185	// CHECK: [[TMP4:%.]] = bitcast i16 [[__REINT1_244]] to half*
186	// CHECK: [[TMP5:%.]] = load half, half [[TMP4]], align 2
187	// CHECK: [[CONV:%.*]] = fpext half [[TMP5]] to float
188	// CHECK: ret float [[CONV]]
189	float32_t test_vgetq_lane_f16(float16x8_t a) {
190	return vgetq_lane_f16(a, 3);
191	}
192
193	// CHECK-LABEL: define i64 @test_vget_lane_s64(<1 x i64> %a) #0 {
194	// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
195	// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
196	// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
197	// CHECK: ret i64 [[VGET_LANE]]
198	int64_t test_vget_lane_s64(int64x1_t a) {
199	return vget_lane_s64(a, 0);
200	}
201
202	// CHECK-LABEL: define i64 @test_vget_lane_u64(<1 x i64> %a) #0 {
203	// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
204	// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
205	// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
206	// CHECK: ret i64 [[VGET_LANE]]
207	uint64_t test_vget_lane_u64(uint64x1_t a) {
208	return vget_lane_u64(a, 0);
209	}
210
211	// CHECK-LABEL: define i64 @test_vgetq_lane_s64(<2 x i64> %a) #1 {
212	// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
213	// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
214	// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
215	// CHECK: ret i64 [[VGETQ_LANE]]
216	int64_t test_vgetq_lane_s64(int64x2_t a) {
217	return vgetq_lane_s64(a, 1);
218	}
219
220	// CHECK-LABEL: define i64 @test_vgetq_lane_u64(<2 x i64> %a) #1 {
221	// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
222	// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
223	// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
224	// CHECK: ret i64 [[VGETQ_LANE]]
225	uint64_t test_vgetq_lane_u64(uint64x2_t a) {
226	return vgetq_lane_u64(a, 1);
227	}
228
229
230	// CHECK-LABEL: define <8 x i8> @test_vset_lane_u8(i8 %a, <8 x i8> %b) #0 {
231	// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i8> %b, i8 %a, i32 7
232	// CHECK: ret <8 x i8> [[VSET_LANE]]
233	uint8x8_t test_vset_lane_u8(uint8_t a, uint8x8_t b) {
234	return vset_lane_u8(a, b, 7);
235	}
236
237	// CHECK-LABEL: define <4 x i16> @test_vset_lane_u16(i16 %a, <4 x i16> %b) #0 {
238	// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
239	// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
240	// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP1]], i16 %a, i32 3
241	// CHECK: ret <4 x i16> [[VSET_LANE]]
242	uint16x4_t test_vset_lane_u16(uint16_t a, uint16x4_t b) {
243	return vset_lane_u16(a, b, 3);
244	}
245
246	// CHECK-LABEL: define <2 x i32> @test_vset_lane_u32(i32 %a, <2 x i32> %b) #0 {
247	// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
248	// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
249	// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i32> [[TMP1]], i32 %a, i32 1
250	// CHECK: ret <2 x i32> [[VSET_LANE]]
251	uint32x2_t test_vset_lane_u32(uint32_t a, uint32x2_t b) {
252	return vset_lane_u32(a, b, 1);
253	}
254
255	// CHECK-LABEL: define <8 x i8> @test_vset_lane_s8(i8 %a, <8 x i8> %b) #0 {
256	// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i8> %b, i8 %a, i32 7
257	// CHECK: ret <8 x i8> [[VSET_LANE]]
258	int8x8_t test_vset_lane_s8(int8_t a, int8x8_t b) {
259	return vset_lane_s8(a, b, 7);
260	}
261
262	// CHECK-LABEL: define <4 x i16> @test_vset_lane_s16(i16 %a, <4 x i16> %b) #0 {
263	// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
264	// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
265	// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP1]], i16 %a, i32 3
266	// CHECK: ret <4 x i16> [[VSET_LANE]]
267	int16x4_t test_vset_lane_s16(int16_t a, int16x4_t b) {
268	return vset_lane_s16(a, b, 3);
269	}
270
271	// CHECK-LABEL: define <2 x i32> @test_vset_lane_s32(i32 %a, <2 x i32> %b) #0 {
272	// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
273	// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
274	// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i32> [[TMP1]], i32 %a, i32 1
275	// CHECK: ret <2 x i32> [[VSET_LANE]]
276	int32x2_t test_vset_lane_s32(int32_t a, int32x2_t b) {
277	return vset_lane_s32(a, b, 1);
278	}
279
280	// CHECK-LABEL: define <8 x i8> @test_vset_lane_p8(i8 %a, <8 x i8> %b) #0 {
281	// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i8> %b, i8 %a, i32 7
282	// CHECK: ret <8 x i8> [[VSET_LANE]]
283	poly8x8_t test_vset_lane_p8(poly8_t a, poly8x8_t b) {
284	return vset_lane_p8(a, b, 7);
285	}
286
287	// CHECK-LABEL: define <4 x i16> @test_vset_lane_p16(i16 %a, <4 x i16> %b) #0 {
288	// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
289	// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
290	// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP1]], i16 %a, i32 3
291	// CHECK: ret <4 x i16> [[VSET_LANE]]
292	poly16x4_t test_vset_lane_p16(poly16_t a, poly16x4_t b) {
293	return vset_lane_p16(a, b, 3);
294	}
295
296	// CHECK-LABEL: define <2 x float> @test_vset_lane_f32(float %a, <2 x float> %b) #0 {
297	// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %b to <8 x i8>
298	// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
299	// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x float> [[TMP1]], float %a, i32 1
300	// CHECK: ret <2 x float> [[VSET_LANE]]
301	float32x2_t test_vset_lane_f32(float32_t a, float32x2_t b) {
302	return vset_lane_f32(a, b, 1);
303	}
304
305	// CHECK-LABEL: define <4 x half> @test_vset_lane_f16(half* %a, <4 x half> %b) #0 {
306	// CHECK: [[__REINT_246:%.*]] = alloca half, align 2
307	// CHECK: [[__REINT1_246:%.*]] = alloca <4 x half>, align 8
308	// CHECK: [[__REINT2_246:%.*]] = alloca <4 x i16>, align 8
309	// CHECK: [[TMP0:%.]] = load half, half %a, align 2
310	// CHECK: store half [[TMP0]], half* [[__REINT_246]], align 2
311	// CHECK: store <4 x half> %b, <4 x half>* [[__REINT1_246]], align 8
312	// CHECK: [[TMP1:%.]] = bitcast half [[__REINT_246]] to i16*
313	// CHECK: [[TMP2:%.]] = load i16, i16 [[TMP1]], align 2
314	// CHECK: [[TMP3:%.]] = bitcast <4 x half> [[__REINT1_246]] to <4 x i16>*
315	// CHECK: [[TMP4:%.]] = load <4 x i16>, <4 x i16> [[TMP3]], align 8
316	// CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
317	// CHECK: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
318	// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP6]], i16 [[TMP2]], i32 3
319	// CHECK: store <4 x i16> [[VSET_LANE]], <4 x i16>* [[__REINT2_246]], align 8
320	// CHECK: [[TMP7:%.]] = bitcast <4 x i16> [[__REINT2_246]] to <4 x half>*
321	// CHECK: [[TMP8:%.]] = load <4 x half>, <4 x half> [[TMP7]], align 8
322	// CHECK: ret <4 x half> [[TMP8]]
323	float16x4_t test_vset_lane_f16(float16_t *a, float16x4_t b) {
324	return vset_lane_f16(*a, b, 3);
325	}
326
327	// CHECK-LABEL: define <16 x i8> @test_vsetq_lane_u8(i8 %a, <16 x i8> %b) #1 {
328	// CHECK: [[VSET_LANE:%.*]] = insertelement <16 x i8> %b, i8 %a, i32 15
329	// CHECK: ret <16 x i8> [[VSET_LANE]]
330	uint8x16_t test_vsetq_lane_u8(uint8_t a, uint8x16_t b) {
331	return vsetq_lane_u8(a, b, 15);
332	}
333
334	// CHECK-LABEL: define <8 x i16> @test_vsetq_lane_u16(i16 %a, <8 x i16> %b) #1 {
335	// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
336	// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
337	// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP1]], i16 %a, i32 7
338	// CHECK: ret <8 x i16> [[VSET_LANE]]
339	uint16x8_t test_vsetq_lane_u16(uint16_t a, uint16x8_t b) {
340	return vsetq_lane_u16(a, b, 7);
341	}
342
343	// CHECK-LABEL: define <4 x i32> @test_vsetq_lane_u32(i32 %a, <4 x i32> %b) #1 {
344	// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
345	// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
346	// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i32> [[TMP1]], i32 %a, i32 3
347	// CHECK: ret <4 x i32> [[VSET_LANE]]
348	uint32x4_t test_vsetq_lane_u32(uint32_t a, uint32x4_t b) {
349	return vsetq_lane_u32(a, b, 3);
350	}
351
352	// CHECK-LABEL: define <16 x i8> @test_vsetq_lane_s8(i8 %a, <16 x i8> %b) #1 {
353	// CHECK: [[VSET_LANE:%.*]] = insertelement <16 x i8> %b, i8 %a, i32 15
354	// CHECK: ret <16 x i8> [[VSET_LANE]]
355	int8x16_t test_vsetq_lane_s8(int8_t a, int8x16_t b) {
356	return vsetq_lane_s8(a, b, 15);
357	}
358
359	// CHECK-LABEL: define <8 x i16> @test_vsetq_lane_s16(i16 %a, <8 x i16> %b) #1 {
360	// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
361	// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
362	// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP1]], i16 %a, i32 7
363	// CHECK: ret <8 x i16> [[VSET_LANE]]
364	int16x8_t test_vsetq_lane_s16(int16_t a, int16x8_t b) {
365	return vsetq_lane_s16(a, b, 7);
366	}
367
368	// CHECK-LABEL: define <4 x i32> @test_vsetq_lane_s32(i32 %a, <4 x i32> %b) #1 {
369	// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
370	// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
371	// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i32> [[TMP1]], i32 %a, i32 3
372	// CHECK: ret <4 x i32> [[VSET_LANE]]
373	int32x4_t test_vsetq_lane_s32(int32_t a, int32x4_t b) {
374	return vsetq_lane_s32(a, b, 3);
375	}
376
377	// CHECK-LABEL: define <16 x i8> @test_vsetq_lane_p8(i8 %a, <16 x i8> %b) #1 {
378	// CHECK: [[VSET_LANE:%.*]] = insertelement <16 x i8> %b, i8 %a, i32 15
379	// CHECK: ret <16 x i8> [[VSET_LANE]]
380	poly8x16_t test_vsetq_lane_p8(poly8_t a, poly8x16_t b) {
381	return vsetq_lane_p8(a, b, 15);
382	}
383
384	// CHECK-LABEL: define <8 x i16> @test_vsetq_lane_p16(i16 %a, <8 x i16> %b) #1 {
385	// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
386	// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
387	// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP1]], i16 %a, i32 7
388	// CHECK: ret <8 x i16> [[VSET_LANE]]
389	poly16x8_t test_vsetq_lane_p16(poly16_t a, poly16x8_t b) {
390	return vsetq_lane_p16(a, b, 7);
391	}
392
393	// CHECK-LABEL: define <4 x float> @test_vsetq_lane_f32(float %a, <4 x float> %b) #1 {
394	// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %b to <16 x i8>
395	// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
396	// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x float> [[TMP1]], float %a, i32 3
397	// CHECK: ret <4 x float> [[VSET_LANE]]
398	float32x4_t test_vsetq_lane_f32(float32_t a, float32x4_t b) {
399	return vsetq_lane_f32(a, b, 3);
400	}
401
402	// CHECK-LABEL: define <8 x half> @test_vsetq_lane_f16(half* %a, <8 x half> %b) #1 {
403	// CHECK: [[__REINT_248:%.*]] = alloca half, align 2
404	// CHECK: [[__REINT1_248:%.*]] = alloca <8 x half>, align 16
405	// CHECK: [[__REINT2_248:%.*]] = alloca <8 x i16>, align 16
406	// CHECK: [[TMP0:%.]] = load half, half %a, align 2
407	// CHECK: store half [[TMP0]], half* [[__REINT_248]], align 2
408	// CHECK: store <8 x half> %b, <8 x half>* [[__REINT1_248]], align 16
409	// CHECK: [[TMP1:%.]] = bitcast half [[__REINT_248]] to i16*
410	// CHECK: [[TMP2:%.]] = load i16, i16 [[TMP1]], align 2
411	// CHECK: [[TMP3:%.]] = bitcast <8 x half> [[__REINT1_248]] to <8 x i16>*
412	// CHECK: [[TMP4:%.]] = load <8 x i16>, <8 x i16> [[TMP3]], align 16
413	// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
414	// CHECK: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
415	// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP6]], i16 [[TMP2]], i32 7
416	// CHECK: store <8 x i16> [[VSET_LANE]], <8 x i16>* [[__REINT2_248]], align 16
417	// CHECK: [[TMP7:%.]] = bitcast <8 x i16> [[__REINT2_248]] to <8 x half>*
418	// CHECK: [[TMP8:%.]] = load <8 x half>, <8 x half> [[TMP7]], align 16
419	// CHECK: ret <8 x half> [[TMP8]]
420	float16x8_t test_vsetq_lane_f16(float16_t *a, float16x8_t b) {
421	return vsetq_lane_f16(*a, b, 7);
422	}
423
424	// CHECK-LABEL: define <1 x i64> @test_vset_lane_s64(i64 %a, <1 x i64> %b) #0 {
425	// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %b to <8 x i8>
426	// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
427	// CHECK: [[VSET_LANE:%.*]] = insertelement <1 x i64> [[TMP1]], i64 %a, i32 0
428	// CHECK: ret <1 x i64> [[VSET_LANE]]
429	int64x1_t test_vset_lane_s64(int64_t a, int64x1_t b) {
430	return vset_lane_s64(a, b, 0);
431	}
432
433	// CHECK-LABEL: define <1 x i64> @test_vset_lane_u64(i64 %a, <1 x i64> %b) #0 {
434	// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %b to <8 x i8>
435	// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
436	// CHECK: [[VSET_LANE:%.*]] = insertelement <1 x i64> [[TMP1]], i64 %a, i32 0
437	// CHECK: ret <1 x i64> [[VSET_LANE]]
438	uint64x1_t test_vset_lane_u64(uint64_t a, uint64x1_t b) {
439	return vset_lane_u64(a, b, 0);
440	}
441
442	// CHECK-LABEL: define <2 x i64> @test_vsetq_lane_s64(i64 %a, <2 x i64> %b) #1 {
443	// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
444	// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
445	// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 %a, i32 1
446	// CHECK: ret <2 x i64> [[VSET_LANE]]
447	int64x2_t test_vsetq_lane_s64(int64_t a, int64x2_t b) {
448	return vsetq_lane_s64(a, b, 1);
449	}
450
451	// CHECK-LABEL: define <2 x i64> @test_vsetq_lane_u64(i64 %a, <2 x i64> %b) #1 {
452	// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
453	// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
454	// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 %a, i32 1
455	// CHECK: ret <2 x i64> [[VSET_LANE]]
456	uint64x2_t test_vsetq_lane_u64(uint64_t a, uint64x2_t b) {
457	return vsetq_lane_u64(a, b, 1);
458	}
459
460	// CHECK: attributes #0 ={{.*}}"min-legal-vector-width"="64"
461	// CHECK: attributes #1 ={{.*}}"min-legal-vector-width"="128"
462

Clang Project