| 1 | // RUN: %clang_cc1 -fopenmp -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86 |
| 2 | // RUN: %clang_cc1 -fopenmp -triple x86_64-unknown-unknown -target-feature +avx -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86-AVX |
| 3 | // RUN: %clang_cc1 -fopenmp -triple x86_64-unknown-unknown -target-feature +avx512f -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86-AVX512 |
| 4 | // RUN: %clang_cc1 -fopenmp -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86 |
| 5 | // RUN: %clang_cc1 -fopenmp -triple i386-unknown-unknown -target-feature +avx -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86-AVX |
| 6 | // RUN: %clang_cc1 -fopenmp -triple i386-unknown-unknown -target-feature +avx512f -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86-AVX512 |
| 7 | // RUN: %clang_cc1 -fopenmp -triple powerpc64-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=PPC |
| 8 | // RUN: %clang_cc1 -fopenmp -triple powerpc64-unknown-unknown -target-abi elfv1-qpx -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=PPC-QPX |
| 9 | |
| 10 | // RUN: %clang_cc1 -fopenmp-simd -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86 |
| 11 | // RUN: %clang_cc1 -fopenmp-simd -triple x86_64-unknown-unknown -target-feature +avx -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86-AVX |
| 12 | // RUN: %clang_cc1 -fopenmp-simd -triple x86_64-unknown-unknown -target-feature +avx512f -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86-AVX512 |
| 13 | // RUN: %clang_cc1 -fopenmp-simd -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86 |
| 14 | // RUN: %clang_cc1 -fopenmp-simd -triple i386-unknown-unknown -target-feature +avx -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86-AVX |
| 15 | // RUN: %clang_cc1 -fopenmp-simd -triple i386-unknown-unknown -target-feature +avx512f -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86-AVX512 |
| 16 | // RUN: %clang_cc1 -fopenmp-simd -triple powerpc64-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=PPC |
| 17 | // RUN: %clang_cc1 -fopenmp-simd -triple powerpc64-unknown-unknown -target-abi elfv1-qpx -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=PPC-QPX |
| 18 | |
| 19 | void h1(float *c, float *a, double b[], int size) |
| 20 | { |
| 21 | // CHECK-LABEL: define void @h1 |
| 22 | int t = 0; |
| 23 | #pragma omp simd safelen(16) linear(t) aligned(c:32) aligned(a,b) |
| 24 | // CHECK: [[C_PTRINT:%.+]] = ptrtoint |
| 25 | // CHECK-NEXT: [[C_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[C_PTRINT]], 31 |
| 26 | // CHECK-NEXT: [[C_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[C_MASKEDPTR]], 0 |
| 27 | // CHECK-NEXT: call void @llvm.assume(i1 [[C_MASKCOND]]) |
| 28 | // CHECK: [[A_PTRINT:%.+]] = ptrtoint |
| 29 | |
| 30 | // X86-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15 |
| 31 | // X86-AVX-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 31 |
| 32 | // X86-AVX512-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 63 |
| 33 | // PPC-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15 |
| 34 | // PPC-QPX-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15 |
| 35 | |
| 36 | // CHECK-NEXT: [[A_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[A_MASKEDPTR]], 0 |
| 37 | // CHECK-NEXT: call void @llvm.assume(i1 [[A_MASKCOND]]) |
| 38 | // CHECK: [[B_PTRINT:%.+]] = ptrtoint |
| 39 | |
| 40 | // X86-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 15 |
| 41 | // X86-AVX-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 31 |
| 42 | // X86-AVX512-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 63 |
| 43 | // PPC-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 15 |
| 44 | // PPC-QPX-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 31 |
| 45 | |
| 46 | // CHECK-NEXT: [[B_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[B_MASKEDPTR]], 0 |
| 47 | // CHECK-NEXT: call void @llvm.assume(i1 [[B_MASKCOND]]) |
| 48 | for (int i = 0; i < size; ++i) { |
| 49 | c[i] = a[i] * a[i] + b[i] * b[t]; |
| 50 | ++t; |
| 51 | } |
| 52 | // do not emit llvm.access.group metadata due to usage of safelen clause. |
| 53 | // CHECK-NOT: store float {{.+}}, float* {{.+}}, align {{.+}}, !llvm.access.group {{![0-9]+}} |
| 54 | #pragma omp simd safelen(16) linear(t) aligned(c:32) aligned(a,b) simdlen(8) |
| 55 | // CHECK: [[C_PTRINT:%.+]] = ptrtoint |
| 56 | // CHECK-NEXT: [[C_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[C_PTRINT]], 31 |
| 57 | // CHECK-NEXT: [[C_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[C_MASKEDPTR]], 0 |
| 58 | // CHECK-NEXT: call void @llvm.assume(i1 [[C_MASKCOND]]) |
| 59 | // CHECK: [[A_PTRINT:%.+]] = ptrtoint |
| 60 | |
| 61 | // X86-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15 |
| 62 | // X86-AVX-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 31 |
| 63 | // X86-AVX512-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 63 |
| 64 | // PPC-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15 |
| 65 | // PPC-QPX-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15 |
| 66 | |
| 67 | // CHECK-NEXT: [[A_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[A_MASKEDPTR]], 0 |
| 68 | // CHECK-NEXT: call void @llvm.assume(i1 [[A_MASKCOND]]) |
| 69 | // CHECK: [[B_PTRINT:%.+]] = ptrtoint |
| 70 | |
| 71 | // X86-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 15 |
| 72 | // X86-AVX-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 31 |
| 73 | // X86-AVX512-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 63 |
| 74 | // PPC-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 15 |
| 75 | // PPC-QPX-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 31 |
| 76 | |
| 77 | // CHECK-NEXT: [[B_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[B_MASKEDPTR]], 0 |
| 78 | // CHECK-NEXT: call void @llvm.assume(i1 [[B_MASKCOND]]) |
| 79 | for (int i = 0; i < size; ++i) { |
| 80 | c[i] = a[i] * a[i] + b[i] * b[t]; |
| 81 | ++t; |
| 82 | } |
| 83 | // do not emit llvm.access.group metadata due to usage of safelen clause. |
| 84 | // CHECK-NOT: store float {{.+}}, float* {{.+}}, align {{.+}}, !llvm.access.group {{![0-9]+}} |
| 85 | #pragma omp simd linear(t) aligned(c:32) aligned(a,b) simdlen(8) |
| 86 | // CHECK: [[C_PTRINT:%.+]] = ptrtoint |
| 87 | // CHECK-NEXT: [[C_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[C_PTRINT]], 31 |
| 88 | // CHECK-NEXT: [[C_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[C_MASKEDPTR]], 0 |
| 89 | // CHECK-NEXT: call void @llvm.assume(i1 [[C_MASKCOND]]) |
| 90 | // CHECK: [[A_PTRINT:%.+]] = ptrtoint |
| 91 | |
| 92 | // X86-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15 |
| 93 | // X86-AVX-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 31 |
| 94 | // X86-AVX512-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 63 |
| 95 | // PPC-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15 |
| 96 | // PPC-QPX-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15 |
| 97 | |
| 98 | // CHECK-NEXT: [[A_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[A_MASKEDPTR]], 0 |
| 99 | // CHECK-NEXT: call void @llvm.assume(i1 [[A_MASKCOND]]) |
| 100 | // CHECK: [[B_PTRINT:%.+]] = ptrtoint |
| 101 | |
| 102 | // X86-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 15 |
| 103 | // X86-AVX-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 31 |
| 104 | // X86-AVX512-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 63 |
| 105 | // PPC-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 15 |
| 106 | // PPC-QPX-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 31 |
| 107 | |
| 108 | // CHECK-NEXT: [[B_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[B_MASKEDPTR]], 0 |
| 109 | // CHECK-NEXT: call void @llvm.assume(i1 [[B_MASKCOND]]) |
| 110 | for (int i = 0; i < size; ++i) { |
| 111 | c[i] = a[i] * a[i] + b[i] * b[t]; |
| 112 | ++t; |
| 113 | // CHECK: store float {{.+}}, float* {{.+}}, align {{.+}}, !llvm.access.group ![[ACCESS_GROUP_7:[0-9]+]] |
| 114 | } |
| 115 | } |
| 116 | |
| 117 | void h2(float *c, float *a, float *b, int size) |
| 118 | { |
| 119 | // CHECK-LABEL: define void @h2 |
| 120 | int t = 0; |
| 121 | #pragma omp simd linear(t) |
| 122 | for (int i = 0; i < size; ++i) { |
| 123 | c[i] = a[i] * a[i] + b[i] * b[t]; |
| 124 | ++t; |
| 125 | // CHECK: store float {{.+}}, float* {{.+}}, align {{.+}}, !llvm.access.group ![[ACCESS_GROUP_10:[0-9]+]] |
| 126 | } |
| 127 | // CHECK: br label %{{.+}}, !llvm.loop [[LOOP_H2_HEADER:![0-9]+]] |
| 128 | } |
| 129 | |
| 130 | void h3(float *c, float *a, float *b, int size) |
| 131 | { |
| 132 | // CHECK-LABEL: define void @h3 |
| 133 | #pragma omp simd |
| 134 | for (int i = 0; i < size; ++i) { |
| 135 | for (int j = 0; j < size; ++j) { |
| 136 | c[j*i] = a[i] * b[j]; |
| 137 | } |
| 138 | // CHECK: store float {{.+}}, float* {{.+}}, align {{.+}}, !llvm.access.group ![[ACCESS_GROUP_13:[0-9]+]] |
| 139 | } |
| 140 | // CHECK: br label %{{.+}}, !llvm.loop [[LOOP_H3_HEADER:![0-9]+]] |
| 141 | } |
| 142 | |
| 143 | // Metadata for h1: |
| 144 | // CHECK: [[LOOP_H1_HEADER:![0-9]+]] = distinct !{[[LOOP_H1_HEADER]], [[LOOP_WIDTH_16:![0-9]+]], [[LOOP_VEC_ENABLE:![0-9]+]]} |
| 145 | // CHECK: [[LOOP_WIDTH_16]] = !{!"llvm.loop.vectorize.width", i32 16} |
| 146 | // CHECK: [[LOOP_VEC_ENABLE]] = !{!"llvm.loop.vectorize.enable", i1 true} |
| 147 | // CHECK: [[LOOP_H1_HEADER:![0-9]+]] = distinct !{[[LOOP_H1_HEADER]], [[LOOP_WIDTH_8:![0-9]+]], [[LOOP_VEC_ENABLE]]} |
| 148 | // CHECK: [[LOOP_WIDTH_8]] = !{!"llvm.loop.vectorize.width", i32 8} |
| 149 | // CHECK: ![[ACCESS_GROUP_7]] = distinct !{} |
| 150 | // CHECK: [[LOOP_H1_HEADER:![0-9]+]] = distinct !{[[LOOP_H1_HEADER]], [[LOOP_WIDTH_8]], [[LOOP_VEC_ENABLE]], ![[PARALLEL_ACCESSES_9:[0-9]+]]} |
| 151 | // CHECK: ![[PARALLEL_ACCESSES_9]] = !{!"llvm.loop.parallel_accesses", ![[ACCESS_GROUP_7]]} |
| 152 | // |
| 153 | // Metadata for h2: |
| 154 | // CHECK: ![[ACCESS_GROUP_10]] = distinct !{} |
| 155 | // CHECK: [[LOOP_H2_HEADER]] = distinct !{[[LOOP_H2_HEADER]], [[LOOP_VEC_ENABLE]], ![[PARALLEL_ACCESSES_12:[0-9]+]]} |
| 156 | // CHECK: ![[PARALLEL_ACCESSES_12]] = !{!"llvm.loop.parallel_accesses", ![[ACCESS_GROUP_10]]} |
| 157 | // |
| 158 | // Metadata for h3: |
| 159 | // CHECK: ![[ACCESS_GROUP_13]] = distinct !{} |
| 160 | // CHECK: [[LOOP_H3_HEADER]] = distinct !{[[LOOP_H3_HEADER]], [[LOOP_VEC_ENABLE]], ![[PARALLEL_ACCESSES_15:[0-9]+]]} |
| 161 | // CHECK: ![[PARALLEL_ACCESSES_15]] = !{!"llvm.loop.parallel_accesses", ![[ACCESS_GROUP_13]]} |
| 162 | // |
| 163 | |