| 1 | // RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s | \ |
| 2 | // RUN: FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=NO-AVX512 |
| 3 | // RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s -target-feature +avx | \ |
| 4 | // RUN: FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=NO-AVX512 |
| 5 | // RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s -target-feature +avx512f | \ |
| 6 | // RUN: FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX512 |
| 7 | #include <stdarg.h> |
| 8 | |
| 9 | // CHECK-LABEL: define signext i8 @f0() |
| 10 | char f0(void) { |
| 11 | return 0; |
| 12 | } |
| 13 | |
| 14 | // CHECK-LABEL: define signext i16 @f1() |
| 15 | short f1(void) { |
| 16 | return 0; |
| 17 | } |
| 18 | |
| 19 | // CHECK-LABEL: define i32 @f2() |
| 20 | int f2(void) { |
| 21 | return 0; |
| 22 | } |
| 23 | |
| 24 | // CHECK-LABEL: define float @f3() |
| 25 | float f3(void) { |
| 26 | return 0; |
| 27 | } |
| 28 | |
| 29 | // CHECK-LABEL: define double @f4() |
| 30 | double f4(void) { |
| 31 | return 0; |
| 32 | } |
| 33 | |
| 34 | // CHECK-LABEL: define x86_fp80 @f5() |
| 35 | long double f5(void) { |
| 36 | return 0; |
| 37 | } |
| 38 | |
| 39 | // CHECK-LABEL: define void @f6(i8 signext %a0, i16 signext %a1, i32 %a2, i64 %a3, i8* %a4) |
| 40 | void f6(char a0, short a1, int a2, long long a3, void *a4) { |
| 41 | } |
| 42 | |
| 43 | // CHECK-LABEL: define void @f7(i32 %a0) |
| 44 | typedef enum { A, B, C } e7; |
| 45 | void f7(e7 a0) { |
| 46 | } |
| 47 | |
| 48 | // Test merging/passing of upper eightbyte with X87 class. |
| 49 | // |
| 50 | // CHECK-LABEL: define void @f8_1(%union.u8* noalias sret %agg.result) |
| 51 | // CHECK-LABEL: define void @f8_2(%union.u8* byval align 16 %a0) |
| 52 | union u8 { |
| 53 | long double a; |
| 54 | int b; |
| 55 | }; |
| 56 | union u8 f8_1() { while (1) {} } |
| 57 | void f8_2(union u8 a0) {} |
| 58 | |
| 59 | // CHECK-LABEL: define i64 @f9() |
| 60 | struct s9 { int a; int b; int : 0; } f9(void) { while (1) {} } |
| 61 | |
| 62 | // CHECK-LABEL: define void @f10(i64 %a0.coerce) |
| 63 | struct s10 { int a; int b; int : 0; }; |
| 64 | void f10(struct s10 a0) {} |
| 65 | |
| 66 | // CHECK-LABEL: define void @f11(%union.anon* noalias sret %agg.result) |
| 67 | union { long double a; float b; } f11() { while (1) {} } |
| 68 | |
| 69 | // CHECK-LABEL: define i32 @f12_0() |
| 70 | // CHECK-LABEL: define void @f12_1(i32 %a0.coerce) |
| 71 | struct s12 { int a __attribute__((aligned(16))); }; |
| 72 | struct s12 f12_0(void) { while (1) {} } |
| 73 | void f12_1(struct s12 a0) {} |
| 74 | |
| 75 | // Check that sret parameter is accounted for when checking available integer |
| 76 | // registers. |
| 77 | // CHECK: define void @f13(%struct.s13_0* noalias sret %agg.result, i32 %a, i32 %b, i32 %c, i32 %d, {{.*}}* byval align 8 %e, i32 %f) |
| 78 | |
| 79 | struct s13_0 { long long f0[3]; }; |
| 80 | struct s13_1 { long long f0[2]; }; |
| 81 | struct s13_0 f13(int a, int b, int c, int d, |
| 82 | struct s13_1 e, int f) { while (1) {} } |
| 83 | |
| 84 | // CHECK: define void @f14({{.*}}, i8 signext %X) |
| 85 | void f14(int a, int b, int c, int d, int e, int f, char X) {} |
| 86 | |
| 87 | // CHECK: define void @f15({{.*}}, i8* %X) |
| 88 | void f15(int a, int b, int c, int d, int e, int f, void *X) {} |
| 89 | |
| 90 | // CHECK: define void @f16({{.*}}, float %X) |
| 91 | void f16(float a, float b, float c, float d, float e, float f, float g, float h, |
| 92 | float X) {} |
| 93 | |
| 94 | // CHECK: define void @f17({{.*}}, x86_fp80 %X) |
| 95 | void f17(float a, float b, float c, float d, float e, float f, float g, float h, |
| 96 | long double X) {} |
| 97 | |
| 98 | // Check for valid coercion. The struct should be passed/returned as i32, not |
| 99 | // as i64 for better code quality. |
| 100 | // rdar://8135035 |
| 101 | // CHECK-LABEL: define void @f18(i32 %a, i32 %f18_arg1.coerce) |
| 102 | struct f18_s0 { int f0; }; |
| 103 | void f18(int a, struct f18_s0 f18_arg1) { while (1) {} } |
| 104 | |
| 105 | // Check byval alignment. |
| 106 | |
| 107 | // CHECK-LABEL: define void @f19(%struct.s19* byval align 16 %x) |
| 108 | struct s19 { |
| 109 | long double a; |
| 110 | }; |
| 111 | void f19(struct s19 x) {} |
| 112 | |
| 113 | // CHECK-LABEL: define void @f20(%struct.s20* byval align 32 %x) |
| 114 | struct __attribute__((aligned(32))) s20 { |
| 115 | int x; |
| 116 | int y; |
| 117 | }; |
| 118 | void f20(struct s20 x) {} |
| 119 | |
| 120 | struct StringRef { |
| 121 | long x; |
| 122 | const char *Ptr; |
| 123 | }; |
| 124 | |
| 125 | // rdar://7375902 |
| 126 | // CHECK-LABEL: define i8* @f21(i64 %S.coerce0, i8* %S.coerce1) |
| 127 | const char *f21(struct StringRef S) { return S.x+S.Ptr; } |
| 128 | |
| 129 | // PR7567 |
| 130 | typedef __attribute__ ((aligned(16))) struct f22s { unsigned long long x[2]; } L; |
| 131 | void f22(L x, L y) { } |
| 132 | // CHECK: @f22 |
| 133 | // CHECK: %x = alloca{{.*}}, align 16 |
| 134 | // CHECK: %y = alloca{{.*}}, align 16 |
| 135 | |
| 136 | |
| 137 | |
| 138 | // PR7714 |
| 139 | struct f23S { |
| 140 | short f0; |
| 141 | unsigned f1; |
| 142 | int f2; |
| 143 | }; |
| 144 | |
| 145 | |
| 146 | void f23(int A, struct f23S B) { |
| 147 | // CHECK-LABEL: define void @f23(i32 %A, i64 %B.coerce0, i32 %B.coerce1) |
| 148 | } |
| 149 | |
| 150 | struct f24s { long a; int b; }; |
| 151 | |
| 152 | struct f23S f24(struct f23S *X, struct f24s *P2) { |
| 153 | return *X; |
| 154 | |
| 155 | // CHECK: define { i64, i32 } @f24(%struct.f23S* %X, %struct.f24s* %P2) |
| 156 | } |
| 157 | |
| 158 | // rdar://8248065 |
| 159 | typedef float v4f32 __attribute__((__vector_size__(16))); |
| 160 | v4f32 f25(v4f32 X) { |
| 161 | // CHECK-LABEL: define <4 x float> @f25(<4 x float> %X) |
| 162 | // CHECK-NOT: alloca |
| 163 | // CHECK: alloca <4 x float> |
| 164 | // CHECK-NOT: alloca |
| 165 | // CHECK: store <4 x float> %X, <4 x float>* |
| 166 | // CHECK-NOT: store |
| 167 | // CHECK: ret <4 x float> |
| 168 | return X+X; |
| 169 | } |
| 170 | |
| 171 | struct foo26 { |
| 172 | int *X; |
| 173 | float *Y; |
| 174 | }; |
| 175 | |
| 176 | struct foo26 f26(struct foo26 *P) { |
| 177 | // CHECK: define { i32*, float* } @f26(%struct.foo26* %P) |
| 178 | return *P; |
| 179 | } |
| 180 | |
| 181 | |
| 182 | struct v4f32wrapper { |
| 183 | v4f32 v; |
| 184 | }; |
| 185 | |
| 186 | struct v4f32wrapper f27(struct v4f32wrapper X) { |
| 187 | // CHECK-LABEL: define <4 x float> @f27(<4 x float> %X.coerce) |
| 188 | return X; |
| 189 | } |
| 190 | |
| 191 | // PR22563 - We should unwrap simple structs and arrays to pass |
| 192 | // and return them in the appropriate vector registers if possible. |
| 193 | |
| 194 | typedef float v8f32 __attribute__((__vector_size__(32))); |
| 195 | struct v8f32wrapper { |
| 196 | v8f32 v; |
| 197 | }; |
| 198 | |
| 199 | struct v8f32wrapper f27a(struct v8f32wrapper X) { |
| 200 | // AVX-LABEL: define <8 x float> @f27a(<8 x float> %X.coerce) |
| 201 | return X; |
| 202 | } |
| 203 | |
| 204 | struct v8f32wrapper_wrapper { |
| 205 | v8f32 v[1]; |
| 206 | }; |
| 207 | |
| 208 | struct v8f32wrapper_wrapper f27b(struct v8f32wrapper_wrapper X) { |
| 209 | // AVX-LABEL: define <8 x float> @f27b(<8 x float> %X.coerce) |
| 210 | return X; |
| 211 | } |
| 212 | |
| 213 | // rdar://5711709 |
| 214 | struct f28c { |
| 215 | double x; |
| 216 | int y; |
| 217 | }; |
| 218 | void f28(struct f28c C) { |
| 219 | // CHECK-LABEL: define void @f28(double %C.coerce0, i32 %C.coerce1) |
| 220 | } |
| 221 | |
| 222 | struct f29a { |
| 223 | struct c { |
| 224 | double x; |
| 225 | int y; |
| 226 | } x[1]; |
| 227 | }; |
| 228 | |
| 229 | void f29a(struct f29a A) { |
| 230 | // CHECK-LABEL: define void @f29a(double %A.coerce0, i32 %A.coerce1) |
| 231 | } |
| 232 | |
| 233 | // rdar://8249586 |
| 234 | struct S0 { char f0[8]; char f2; char f3; char f4; }; |
| 235 | void f30(struct S0 p_4) { |
| 236 | // CHECK-LABEL: define void @f30(i64 %p_4.coerce0, i24 %p_4.coerce1) |
| 237 | } |
| 238 | |
| 239 | // Pass the third element as a float when followed by tail padding. |
| 240 | // rdar://8251384 |
| 241 | struct f31foo { float a, b, c; }; |
| 242 | float f31(struct f31foo X) { |
| 243 | // CHECK-LABEL: define float @f31(<2 x float> %X.coerce0, float %X.coerce1) |
| 244 | return X.c; |
| 245 | } |
| 246 | |
| 247 | _Complex float f32(_Complex float A, _Complex float B) { |
| 248 | // rdar://6379669 |
| 249 | // CHECK-LABEL: define <2 x float> @f32(<2 x float> %A.coerce, <2 x float> %B.coerce) |
| 250 | return A+B; |
| 251 | } |
| 252 | |
| 253 | |
| 254 | // rdar://8357396 |
| 255 | struct f33s { long x; float c,d; }; |
| 256 | |
| 257 | void f33(va_list X) { |
| 258 | va_arg(X, struct f33s); |
| 259 | } |
| 260 | |
| 261 | typedef unsigned long long v1i64 __attribute__((__vector_size__(8))); |
| 262 | |
| 263 | // rdar://8359248 |
| 264 | // CHECK-LABEL: define double @f34(double %arg.coerce) |
| 265 | v1i64 f34(v1i64 arg) { return arg; } |
| 266 | |
| 267 | |
| 268 | // rdar://8358475 |
| 269 | // CHECK-LABEL: define double @f35(double %arg.coerce) |
| 270 | typedef unsigned long v1i64_2 __attribute__((__vector_size__(8))); |
| 271 | v1i64_2 f35(v1i64_2 arg) { return arg+arg; } |
| 272 | |
| 273 | // rdar://9122143 |
| 274 | // CHECK: declare void @func(%struct._str* byval align 16) |
| 275 | typedef struct _str { |
| 276 | union { |
| 277 | long double a; |
| 278 | long c; |
| 279 | }; |
| 280 | } str; |
| 281 | |
| 282 | void func(str s); |
| 283 | str ss; |
| 284 | void f9122143() |
| 285 | { |
| 286 | func(ss); |
| 287 | } |
| 288 | |
| 289 | // CHECK-LABEL: define double @f36(double %arg.coerce) |
| 290 | typedef unsigned v2i32 __attribute((__vector_size__(8))); |
| 291 | v2i32 f36(v2i32 arg) { return arg; } |
| 292 | |
| 293 | // AVX: declare void @f38(<8 x float>) |
| 294 | // AVX: declare void @f37(<8 x float>) |
| 295 | // SSE: declare void @f38(%struct.s256* byval align 32) |
| 296 | // SSE: declare void @f37(<8 x float>* byval align 32) |
| 297 | typedef float __m256 __attribute__ ((__vector_size__ (32))); |
| 298 | typedef struct { |
| 299 | __m256 m; |
| 300 | } s256; |
| 301 | |
| 302 | s256 x38; |
| 303 | __m256 x37; |
| 304 | |
| 305 | void f38(s256 x); |
| 306 | void f37(__m256 x); |
| 307 | void f39() { f38(x38); f37(x37); } |
| 308 | |
| 309 | // The two next tests make sure that the struct below is passed |
| 310 | // in the same way regardless of avx being used |
| 311 | |
| 312 | // CHECK: declare void @func40(%struct.t128* byval align 16) |
| 313 | typedef float __m128 __attribute__ ((__vector_size__ (16))); |
| 314 | typedef struct t128 { |
| 315 | __m128 m; |
| 316 | __m128 n; |
| 317 | } two128; |
| 318 | |
| 319 | extern void func40(two128 s); |
| 320 | void func41(two128 s) { |
| 321 | func40(s); |
| 322 | } |
| 323 | |
| 324 | // CHECK: declare void @func42(%struct.t128_2* byval align 16) |
| 325 | typedef struct xxx { |
| 326 | __m128 array[2]; |
| 327 | } Atwo128; |
| 328 | typedef struct t128_2 { |
| 329 | Atwo128 x; |
| 330 | } SA; |
| 331 | |
| 332 | extern void func42(SA s); |
| 333 | void func43(SA s) { |
| 334 | func42(s); |
| 335 | } |
| 336 | |
| 337 | // CHECK-LABEL: define i32 @f44 |
| 338 | // CHECK: ptrtoint |
| 339 | // CHECK-NEXT: add i64 %{{[0-9]+}}, 31 |
| 340 | // CHECK-NEXT: and i64 %{{[0-9]+}}, -32 |
| 341 | // CHECK-NEXT: inttoptr |
| 342 | typedef int T44 __attribute((vector_size(32))); |
| 343 | struct s44 { T44 x; int y; }; |
| 344 | int f44(int i, ...) { |
| 345 | __builtin_va_list ap; |
| 346 | __builtin_va_start(ap, i); |
| 347 | struct s44 s = __builtin_va_arg(ap, struct s44); |
| 348 | __builtin_va_end(ap); |
| 349 | return s.y; |
| 350 | } |
| 351 | |
| 352 | // Text that vec3 returns the correct LLVM IR type. |
| 353 | // AVX-LABEL: define i32 @foo(<3 x i64> %X) |
| 354 | typedef long long3 __attribute((ext_vector_type(3))); |
| 355 | int foo(long3 X) |
| 356 | { |
| 357 | return 0; |
| 358 | } |
| 359 | |
| 360 | // Make sure we don't use a varargs convention for a function without a |
| 361 | // prototype where AVX types are involved. |
| 362 | // AVX: @test45 |
| 363 | // AVX: call i32 bitcast (i32 (...)* @f45 to i32 (<8 x float>)*) |
| 364 | int f45(); |
| 365 | __m256 x45; |
| 366 | void test45() { f45(x45); } |
| 367 | |
| 368 | // Make sure we use byval to pass 64-bit vectors in memory; the LLVM call |
| 369 | // lowering can't handle this case correctly because it runs after legalization. |
| 370 | // CHECK: @test46 |
| 371 | // CHECK: call void @f46({{.*}}<2 x float>* byval align 8 {{.*}}, <2 x float>* byval align 8 {{.*}}) |
| 372 | typedef float v46 __attribute((vector_size(8))); |
| 373 | void f46(v46,v46,v46,v46,v46,v46,v46,v46,v46,v46); |
| 374 | void test46() { v46 x = {1,2}; f46(x,x,x,x,x,x,x,x,x,x); } |
| 375 | |
| 376 | // Check that we pass the struct below without using byval, which helps out |
| 377 | // codegen. |
| 378 | // |
| 379 | // CHECK: @test47 |
| 380 | // CHECK: call void @f47(i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}) |
| 381 | struct s47 { unsigned a; }; |
| 382 | void f47(int,int,int,int,int,int,struct s47); |
| 383 | void test47(int a, struct s47 b) { f47(a, a, a, a, a, a, b); } |
| 384 | |
| 385 | // rdar://12723368 |
| 386 | // In the following example, there are holes in T4 at the 3rd byte and the 4th |
| 387 | // byte, however, T2 does not have those holes. T4 is chosen to be the |
| 388 | // representing type for union T1, but we can't use load or store of T4 since |
| 389 | // it will skip the 3rd byte and the 4th byte. |
| 390 | // In general, Since we don't accurately represent the data fields of a union, |
| 391 | // do not use load or store of the representing llvm type for the union. |
| 392 | typedef _Complex int T2; |
| 393 | typedef _Complex char T5; |
| 394 | typedef _Complex int T7; |
| 395 | typedef struct T4 { T5 field0; T7 field1; } T4; |
| 396 | typedef union T1 { T2 field0; T4 field1; } T1; |
| 397 | extern T1 T1_retval; |
| 398 | T1 test48(void) { |
| 399 | // CHECK: @test48 |
| 400 | // CHECK: memcpy |
| 401 | // CHECK: memcpy |
| 402 | return T1_retval; |
| 403 | } |
| 404 | |
| 405 | void test49_helper(double, ...); |
| 406 | void test49(double d, double e) { |
| 407 | test49_helper(d, e); |
| 408 | } |
| 409 | // CHECK-LABEL: define void @test49( |
| 410 | // CHECK: [[T0:%.*]] = load double, double* |
| 411 | // CHECK-NEXT: [[T1:%.*]] = load double, double* |
| 412 | // CHECK-NEXT: call void (double, ...) @test49_helper(double [[T0]], double [[T1]]) |
| 413 | |
| 414 | void test50_helper(); |
| 415 | void test50(double d, double e) { |
| 416 | test50_helper(d, e); |
| 417 | } |
| 418 | // CHECK-LABEL: define void @test50( |
| 419 | // CHECK: [[T0:%.*]] = load double, double* |
| 420 | // CHECK-NEXT: [[T1:%.*]] = load double, double* |
| 421 | // CHECK-NEXT: call void (double, double, ...) bitcast (void (...)* @test50_helper to void (double, double, ...)*)(double [[T0]], double [[T1]]) |
| 422 | |
| 423 | struct test51_s { __uint128_t intval; }; |
| 424 | void test51(struct test51_s *s, __builtin_va_list argList) { |
| 425 | *s = __builtin_va_arg(argList, struct test51_s); |
| 426 | } |
| 427 | |
| 428 | // CHECK-LABEL: define void @test51 |
| 429 | // CHECK: [[TMP_ADDR:%.*]] = alloca [[STRUCT_TEST51:%.*]], align 16 |
| 430 | // CHECK: br i1 |
| 431 | // CHECK: [[REG_SAVE_AREA_PTR:%.*]] = getelementptr inbounds {{.*}}, i32 0, i32 3 |
| 432 | // CHECK-NEXT: [[REG_SAVE_AREA:%.*]] = load i8*, i8** [[REG_SAVE_AREA_PTR]] |
| 433 | // CHECK-NEXT: [[VALUE_ADDR:%.*]] = getelementptr i8, i8* [[REG_SAVE_AREA]], i32 {{.*}} |
| 434 | // CHECK-NEXT: [[CASTED_VALUE_ADDR:%.*]] = bitcast i8* [[VALUE_ADDR]] to [[STRUCT_TEST51]] |
| 435 | // CHECK-NEXT: [[CASTED_TMP_ADDR:%.*]] = bitcast [[STRUCT_TEST51]]* [[TMP_ADDR]] to i8* |
| 436 | // CHECK-NEXT: [[RECASTED_VALUE_ADDR:%.*]] = bitcast [[STRUCT_TEST51]]* [[CASTED_VALUE_ADDR]] to i8* |
| 437 | // CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[CASTED_TMP_ADDR]], i8* align 8 [[RECASTED_VALUE_ADDR]], i64 16, i1 false) |
| 438 | // CHECK-NEXT: add i32 {{.*}}, 16 |
| 439 | // CHECK-NEXT: store i32 {{.*}}, i32* {{.*}} |
| 440 | // CHECK-NEXT: br label |
| 441 | |
| 442 | void test52_helper(int, ...); |
| 443 | __m256 x52; |
| 444 | void test52() { |
| 445 | test52_helper(0, x52, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i); |
| 446 | } |
| 447 | // AVX: @test52_helper(i32 0, <8 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double {{%[a-zA-Z0-9]+}}, double {{%[a-zA-Z0-9]+}}) |
| 448 | |
| 449 | void test53(__m256 *m, __builtin_va_list argList) { |
| 450 | *m = __builtin_va_arg(argList, __m256); |
| 451 | } |
| 452 | // AVX-LABEL: define void @test53 |
| 453 | // AVX-NOT: br i1 |
| 454 | // AVX: ret void |
| 455 | |
| 456 | void test54_helper(__m256, ...); |
| 457 | __m256 x54; |
| 458 | void test54() { |
| 459 | test54_helper(x54, x54, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i); |
| 460 | test54_helper(x54, x54, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i); |
| 461 | } |
| 462 | // AVX: @test54_helper(<8 x float> {{%[a-zA-Z0-9]+}}, <8 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double {{%[a-zA-Z0-9]+}}, double {{%[a-zA-Z0-9]+}}) |
| 463 | // AVX: @test54_helper(<8 x float> {{%[a-zA-Z0-9]+}}, <8 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, { double, double }* byval align 8 {{%[^)]+}}) |
| 464 | |
| 465 | typedef float __m512 __attribute__ ((__vector_size__ (64))); |
| 466 | typedef struct { |
| 467 | __m512 m; |
| 468 | } s512; |
| 469 | |
| 470 | s512 x55; |
| 471 | __m512 x56; |
| 472 | |
| 473 | // On AVX512, aggregates which contain a __m512 type are classified as SSE/SSEUP |
| 474 | // as per https://github.com/hjl-tools/x86-psABI/commit/30f9c9 3.2.3p2 Rule 1 |
| 475 | // |
| 476 | // AVX512: declare void @f55(<16 x float>) |
| 477 | // NO-AVX512: declare void @f55(%struct.s512* byval align 64) |
| 478 | void f55(s512 x); |
| 479 | |
| 480 | // __m512 has type SSE/SSEUP on AVX512. |
| 481 | // |
| 482 | // AVX512: declare void @f56(<16 x float>) |
| 483 | // NO-AVX512: declare void @f56(<16 x float>* byval align 64) |
| 484 | void f56(__m512 x); |
| 485 | void f57() { f55(x55); f56(x56); } |
| 486 | |
| 487 | // Like for __m128 on AVX, check that the struct below is passed |
| 488 | // in the same way regardless of AVX512 being used. |
| 489 | // |
| 490 | // CHECK: declare void @f58(%struct.t256* byval align 32) |
| 491 | typedef struct t256 { |
| 492 | __m256 m; |
| 493 | __m256 n; |
| 494 | } two256; |
| 495 | |
| 496 | extern void f58(two256 s); |
| 497 | void f59(two256 s) { |
| 498 | f58(s); |
| 499 | } |
| 500 | |
| 501 | // CHECK: declare void @f60(%struct.sat256* byval align 32) |
| 502 | typedef struct at256 { |
| 503 | __m256 array[2]; |
| 504 | } Atwo256; |
| 505 | typedef struct sat256 { |
| 506 | Atwo256 x; |
| 507 | } SAtwo256; |
| 508 | |
| 509 | extern void f60(SAtwo256 s); |
| 510 | void f61(SAtwo256 s) { |
| 511 | f60(s); |
| 512 | } |
| 513 | |
| 514 | // AVX512: @f62_helper(i32 0, <16 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double {{%[a-zA-Z0-9]+}}, double {{%[a-zA-Z0-9]+}}) |
| 515 | void f62_helper(int, ...); |
| 516 | __m512 x62; |
| 517 | void f62() { |
| 518 | f62_helper(0, x62, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i); |
| 519 | } |
| 520 | |
| 521 | // Like for __m256 on AVX, we always pass __m512 in memory, and don't |
| 522 | // need to use the register save area. |
| 523 | // |
| 524 | // AVX512-LABEL: define void @f63 |
| 525 | // AVX512-NOT: br i1 |
| 526 | // AVX512: ret void |
| 527 | void f63(__m512 *m, __builtin_va_list argList) { |
| 528 | *m = __builtin_va_arg(argList, __m512); |
| 529 | } |
| 530 | |
| 531 | // AVX512: @f64_helper(<16 x float> {{%[a-zA-Z0-9]+}}, <16 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double {{%[a-zA-Z0-9]+}}, double {{%[a-zA-Z0-9]+}}) |
| 532 | // AVX512: @f64_helper(<16 x float> {{%[a-zA-Z0-9]+}}, <16 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, { double, double }* byval align 8 {{%[^)]+}}) |
| 533 | void f64_helper(__m512, ...); |
| 534 | __m512 x64; |
| 535 | void f64() { |
| 536 | f64_helper(x64, x64, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i); |
| 537 | f64_helper(x64, x64, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i); |
| 538 | } |
| 539 | |
| 540 | struct t65 { |
| 541 | __m256 m; |
| 542 | int : 0; |
| 543 | }; |
| 544 | // SSE-LABEL: @f65(%struct.t65* byval align 32 %{{[^,)]+}}) |
| 545 | // AVX: @f65(<8 x float> %{{[^,)]+}}) |
| 546 | void f65(struct t65 a0) { |
| 547 | } |
| 548 | |